├── .github ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug.yml │ ├── documentation.yml │ └── features.yml └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── Dockerfile ├── LEGAL.md ├── LICENSE.md ├── README.md ├── README_en.md ├── configs ├── default_config.py ├── model_config.py.example ├── server_config.py.example └── utils.py ├── docker_build.sh ├── env_start.sh ├── examples ├── agent_examples │ ├── baseGroupPhase_example.py │ ├── baseTaskPhase_example.py │ ├── codeChatPhaseLocal_example.py │ ├── codeChatPhase_example.py │ ├── codeGenDoc_example.py │ ├── codeGenTestCases_example.py │ ├── codeReactPhase_example.py │ ├── codeRetrieval_example.py │ ├── codeToolReactPhase_example.py │ ├── docChatPhase_example.py │ ├── metagpt_phase_example.py │ ├── searchChatPhase_example.py │ └── toolReactPhase_example.py ├── api.py ├── auto_examples │ ├── agentchat_RetrievalChat.py │ ├── agentchat_function_call.py │ ├── agentchat_teachability.py │ ├── agentchat_teaching.py │ ├── agentchat_web_info.py │ └── auto_feedback_from_code_execution.py ├── gptq.py ├── llm_api.py ├── model_workers │ ├── SparkApi.py │ ├── __init__.py │ ├── azure.py │ ├── baichuan.py │ ├── base.py │ ├── fangzhou.py │ ├── minimax.py │ ├── openai.py │ ├── qianfan.py │ ├── qwen.py │ ├── tiangong.py │ ├── xinghuo.py │ └── zhipu.py ├── sdfile_api.py ├── start.py ├── start.sh ├── stop.py ├── utils.py ├── webui.py ├── webui │ ├── __init__.py │ ├── code.py │ ├── dialogue.py │ ├── document.py │ ├── prompt.py │ ├── utils.py │ └── yamls │ │ ├── webui_en.yaml │ │ └── webui_zh.yaml └── webui_config.py ├── nltk_data ├── corpora │ └── cmudict │ │ ├── README │ │ └── cmudict ├── taggers │ └── averaged_perceptron_tagger │ │ └── averaged_perceptron_tagger.pickle └── tokenizers │ └── punkt │ ├── PY3 │ ├── README │ ├── czech.pickle │ ├── danish.pickle │ ├── dutch.pickle │ ├── english.pickle │ ├── estonian.pickle │ ├── finnish.pickle │ ├── french.pickle │ ├── german.pickle │ ├── greek.pickle │ ├── italian.pickle │ ├── malayalam.pickle │ ├── norwegian.pickle │ ├── polish.pickle │ ├── portuguese.pickle │ ├── russian.pickle │ ├── slovene.pickle │ ├── spanish.pickle │ ├── swedish.pickle │ └── turkish.pickle │ ├── README │ ├── czech.pickle │ ├── danish.pickle │ ├── dutch.pickle │ ├── english.pickle │ ├── estonian.pickle │ ├── finnish.pickle │ ├── french.pickle │ ├── german.pickle │ ├── greek.pickle │ ├── italian.pickle │ ├── malayalam.pickle │ ├── norwegian.pickle │ ├── polish.pickle │ ├── portuguese.pickle │ ├── russian.pickle │ ├── slovene.pickle │ ├── spanish.pickle │ ├── swedish.pickle │ └── turkish.pickle ├── requirements.txt ├── sources ├── docs │ ├── langchain_asia.jsonl │ ├── langchain_text_10.jsonl │ └── python_langchain_com_docs_get_started_introduction_text.jsonl ├── docs_imgs │ ├── BaseAgent.png │ ├── agent-flow.png │ ├── devops-chatbot-module-v2.png │ ├── devops-chatbot-module.png │ ├── devopsgpt_example.png │ ├── devopsgpt_example2.png │ ├── luban.png │ ├── objective.png │ ├── objective_v4.png │ ├── roadmap.png │ ├── roadmap2.png │ ├── webui_config.png │ └── wechat.png ├── imgs │ ├── devops-chatbot.png │ ├── devops-chatbot2.png │ ├── docker_logs.png │ └── fastapi_docs_020_0.png ├── readme_docs │ ├── coagent │ │ ├── agent-flow-en.md │ │ ├── agent-flow.md │ │ ├── coagent-en.md │ │ ├── coagent.md │ │ ├── connector │ │ │ ├── connector_agent.md │ │ │ ├── connector_chain.md │ │ │ ├── connector_memory.md │ │ │ ├── connector_phase.md │ │ │ ├── connector_prompt.md │ │ │ └── customed_examples.md │ │ ├── quick-start-en.md │ │ └── quick-start.md │ ├── contribution │ │ ├── contribute_guide.md │ │ └── contribute_guide_en.md │ ├── fastchat-en.md │ ├── fastchat.md │ ├── roadmap-en.md │ ├── roadmap.md │ ├── start-en.md │ └── start.md └── tool_datas │ └── stock.json ├── tests ├── file_test.py └── torch_test.py └── web_crawler ├── data ├── html │ └── test_langchain_html.jsonl └── text │ └── test_langchain_text.jsonl ├── main_test.py └── utils ├── DocTokenizer.py ├── Html2Text.py ├── WebCrawler.py └── WebHtmlExtractor.py /.github/ISSUE_TEMPLATE/bug.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Report a bug in Codefuse. To report a security issue, please instead use the security option below. 3 | labels: ["01 Bug Report"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: > 8 | Thank you for taking the time to file a bug report. 9 | 10 | Use this to report bugs in Codefuse. 11 | 12 | If you're not certain that your issue is due to a bug in Codefuse, please use [GitHub Discussions](https://github.com/codefuse-ai/codefuse-chatbot/discussions) 13 | to ask for help with your issue. 14 | 15 | We warmly welcome any suggestions, opinions (including criticisms), comments, and contributions to the Codefuse project. 16 | 17 | Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or 18 | if there's another way to solve your problem: 19 | 20 | [API Reference](https://codefuse-ai.github.io/), 21 | [GitHub search](https://github.com/codefuse-ai/codefuse-chatbot), 22 | [Chatbot Github Discussions](https://github.com/codefuse-ai/codefuse-chatbot/discussions), 23 | [Chatbot Github Issues](https://github.com/codefuse-ai/codefuse-chatbot/issues) 24 | 25 | - type: checkboxes 26 | id: checks 27 | attributes: 28 | label: Checked other resources 29 | description: Please confirm and check all the following options. 30 | options: 31 | - label: I searched the Codefuse documentation with the integrated search. 32 | required: true 33 | - label: I used the GitHub search to find a similar question and didn't find it. 34 | required: true 35 | - label: I am sure that this is a bug in Codefuse-Repos rather than my code. 36 | required: true 37 | - label: I added a very descriptive title to this issue. 38 | required: true 39 | 40 | - type: dropdown 41 | id: system-info 42 | attributes: 43 | label: System Info 44 | description: > 45 | Please select the operating system you were using to run codefuse-ai/repos when this problem occurred. 46 | options: 47 | - Windows 48 | - Linux 49 | - MacOS 50 | - Docker 51 | - Devcontainer / Codespace 52 | - Windows Subsystem for Linux (WSL) 53 | - Other 54 | validations: 55 | required: true 56 | nested_fields: 57 | - type: text 58 | attributes: 59 | label: Specify the system 60 | description: Please specify the system you are working on. 61 | 62 | - type: dropdown 63 | attributes: 64 | label: Code Version 65 | description: | 66 | Please select which version of Codefuse-Repos you were using when this issue occurred. 67 | **If you weren't please try with the **. 68 | If installed with git you can run `git branch` to see which version of codefuse-ai you are running. 69 | options: 70 | - Latest Release 71 | - Stable (branch) 72 | - Master (branch) 73 | validations: 74 | required: true 75 | 76 | - type: textarea 77 | id: description 78 | attributes: 79 | label: Description 80 | description: | 81 | What is the problem, question, or error? 82 | 83 | Write a short description telling what you are doing, what you expect to happen, and what is currently happening. 84 | placeholder: | 85 | * I'm trying to use the `coagent` library to do X. 86 | * I expect to see Y. 87 | * Instead, it does Z. 88 | validations: 89 | required: true 90 | 91 | - type: textarea 92 | id: reproduction 93 | validations: 94 | required: true 95 | attributes: 96 | label: Example Code 97 | description: | 98 | Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case. 99 | 100 | If a maintainer can copy it, run it, and see it right away, there's a much higher chance that you'll be able to get help. 101 | 102 | **Important!** 103 | 104 | * Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting). 105 | * INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```). 106 | * Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you. 107 | * Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. 108 | 109 | placeholder: | 110 | The following code: 111 | 112 | ```python 113 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 114 | from coagent.connector.phase import BasePhase 115 | from coagent.connector.schema import Message 116 | 117 | phase_name = "baseGroupPhase" 118 | phase = BasePhase( 119 | phase_name, embed_config=embed_config, llm_config=llm_config, 120 | ) 121 | 122 | query_content = "确认本地是否存在employee_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 123 | query = Message( 124 | role_name="human", role_type="user", tools=[], 125 | role_content=query_content, input_query=query_content, origin_query=query_content, 126 | ) 127 | 128 | output_message, output_memory = phase.step(query) 129 | ``` 130 | 131 | - type: textarea 132 | id: error 133 | validations: 134 | required: false 135 | attributes: 136 | label: Error Message and Stack Trace (if applicable) 137 | description: | 138 | If you are reporting an error, please include the full error message and stack trace. 139 | placeholder: | 140 | Exception + full stack trace 141 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | description: Report an issue related to the Codefuse documentation. 3 | title: "DOC: " 4 | labels: [02 - Documentation] 5 | 6 | body: 7 | - type: textarea 8 | attributes: 9 | label: "Issue with current documentation:" 10 | description: > 11 | Please make sure to leave a reference to the document/code you're 12 | referring to. 13 | 14 | - type: textarea 15 | attributes: 16 | label: "Idea or request for content:" 17 | description: > 18 | Please describe as clearly as possible what topics you think are missing 19 | from the current documentation. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/features.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 🚀 2 | description: Suggest a new idea for Codefuse! 3 | labels: ['03 New Features'] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | First, check out our [wiki page on Contributing](https://github.com/Significant-Gravitas/Nexus/wiki/Contributing) 9 | Please provide a searchable summary of the issue in the title above ⬆️. 10 | 11 | - type: checkboxes 12 | id: checks 13 | attributes: 14 | label: Checked other resources 15 | description: Please confirm and check all the following options. 16 | options: 17 | - label: I searched the Codefuse documentation with the integrated search. 18 | required: true 19 | - label: I used the GitHub search to find a similar question and didn't find it. 20 | required: true 21 | 22 | - type: textarea 23 | attributes: 24 | label: Summary 💡 25 | description: Describe how it should work. 26 | 27 | - type: textarea 28 | attributes: 29 | label: Examples 🌈 30 | description: Provide a link to other implementations, or screenshots of the expected behavior. 31 | 32 | - type: textarea 33 | attributes: 34 | label: Motivation 🔦 35 | description: What are you trying to accomplish? How has the lack of this feature affected you? Providing context helps us come up with a solution that is more useful in the real world. -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__ 2 | knowledge_base 3 | logs 4 | llm_models 5 | embedding_models 6 | jupyter_work 7 | model_config.py 8 | server_config.py 9 | internal_start.py 10 | code_base 11 | .DS_Store 12 | .idea 13 | data 14 | .pyc 15 | tests 16 | *egg-info 17 | build 18 | dist 19 | package.sh 20 | local_config.json 21 | muagent* -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | From python:3.9.18-bookworm 2 | 3 | WORKDIR /home/user 4 | 5 | COPY ./requirements.txt /home/user/docker_requirements.txt 6 | 7 | 8 | # RUN apt-get update 9 | # RUN apt-get install -y iputils-ping telnetd net-tools vim tcpdump 10 | # RUN echo telnet stream tcp nowait telnetd /usr/sbin/tcpd /usr/sbin/in.telnetd /etc/inetd.conf 11 | # RUN service inetutils-inetd start 12 | # service inetutils-inetd status 13 | 14 | RUN wget https://oss-cdn.nebula-graph.com.cn/package/3.6.0/nebula-graph-3.6.0.ubuntu1804.amd64.deb 15 | RUN dpkg -i nebula-graph-3.6.0.ubuntu1804.amd64.deb 16 | 17 | RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple 18 | RUN pip install -r /home/user/docker_requirements.txt 19 | 20 | CMD ["bash"] 21 | -------------------------------------------------------------------------------- /LEGAL.md: -------------------------------------------------------------------------------- 1 | Legal Disclaimer 2 | 3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail. 4 | 5 | 法律免责声明 6 | 7 | 关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 中文  |  English  3 |

4 | 5 | #

CodeFuse-ChatBot: Development by Private Knowledge Augmentation

6 | 7 |

8 | ZH doc 9 | EN doc 10 | License 11 | 12 | Open Issues 13 | 14 |

15 |

16 | 17 | CodeFuse-ChatBot是由蚂蚁CodeFuse团队开发的开源AI智能助手,致力于简化和优化软件开发生命周期中的各个环节。该项目结合了Multi-Agent的协同调度机制,并集成了丰富的工具库、代码库、知识库和沙盒环境,使得LLM模型能够在DevOps领域内有效执行和处理复杂任务。 18 | 19 | 20 | ## 🔔 更新 21 | - [2024.01.29] 开放可配置化的multi-agent框架:codefuse-muAgent,详情见[使用说明](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent) 22 | - [2023.12.26] 基于FastChat接入开源私有化大模型和大模型接口的能力开放 23 | - [2023.12.14] 量子位公众号专题报道:[文章链接](https://mp.weixin.qq.com/s/MuPfayYTk9ZW6lcqgMpqKA) 24 | - [2023.12.01] Multi-Agent和代码库检索功能开放 25 | - [2023.11.15] 增加基于本地代码库的问答增强模式 26 | - [2023.09.15] 本地/隔离环境的沙盒功能开放,基于爬虫实现指定url知识检索 27 | 28 | ## 📜 目录 29 | - [🤝 介绍](#-介绍) 30 | - [🎥 演示视频](#-演示视频) 31 | - [🧭 技术路线](#-技术路线) 32 | - [🌐 模型接入](#-模型接入) 33 | - [🚀 快速使用](#-快速使用) 34 | - [🤗 致谢](#-致谢) 35 | - [🗂 其他](#-其他) 36 | - [📱 联系我们](#-联系我们) 37 | - [✨ 点赞历史](#-点赞历史) 38 | 39 | ## 🤝 介绍 40 | 41 | 💡 本项目旨在通过检索增强生成(Retrieval Augmented Generation,RAG)、工具学习(Tool Learning)和沙盒环境来构建软件开发全生命周期的AI智能助手,涵盖设计、编码、测试、部署和运维等阶段。 逐渐从各处资料查询、独立分散平台操作的传统开发运维模式转变到大模型问答的智能化开发运维模式,改变人们的开发运维习惯。 42 | 43 | 本项目核心差异技术、功能点: 44 | - **🧠 智能调度核心:** 构建了体系链路完善的调度核心,支持多模式一键配置,简化操作流程。 [使用说明](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent) 45 | - **💻 代码整库分析:** 实现了仓库级的代码深入理解,以及项目文件级的代码编写与生成,提升了开发效率。 46 | - **📄 文档分析增强:** 融合了文档知识库与知识图谱,通过检索和推理增强,为文档分析提供了更深层次的支持。 47 | - **🔧 垂类专属知识:** 为DevOps领域定制的专属知识库,支持垂类知识库的自助一键构建,便捷实用。 48 | - **🤖 垂类模型兼容:** 针对DevOps领域的小型模型,保证了与DevOps相关平台的兼容性,促进了技术生态的整合。 49 | 50 | 🌍 依托于开源的 LLM 与 Embedding 模型,本项目可实现基于开源模型的离线私有部署。此外,本项目也支持 OpenAI API 的调用。[接入Demo](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/fastchat) 51 | 52 | 👥 核心研发团队长期专注于 AIOps + NLP 领域的研究。我们发起了 Codefuse-ai 项目,希望大家广泛贡献高质量的开发和运维文档,共同完善这套解决方案,以实现“让天下没有难做的开发”的目标。 53 | 54 |
55 | 图片 56 |
57 | 58 | 59 | ## 🎥 演示视频 60 | 61 | 为了帮助您更直观地了解 Codefuse-ChatBot 的功能和使用方法,我们录制了一系列演示视频。您可以通过观看这些视频,快速了解本项目的主要特性和操作流程。 62 | 63 | 64 | - 知识库导入和问答:[演示视频](https://www.youtube.com/watch?v=UGJdTGaVnNY&t=2s&ab_channel=HaotianZhu) 65 | - 本地代码库导入和问答:[演示视频](https://www.youtube.com/watch?v=ex5sbwGs3Kg) 66 | 67 | 68 | ## 🧭 技术路线 69 |
70 | 图片 71 |
72 | 73 | - 🧠 **Multi-Agent Schedule Core:** 多智能体调度核心,简易配置即可打造交互式智能体。 74 | - 🕷️ **Multi Source Web Crawl:** 多源网络爬虫,提供对指定 URL 的爬取功能,以搜集所需信息。 75 | - 🗂️ **Data Processor:** 数据处理器,轻松完成文档载入、数据清洗,及文本切分,整合不同来源的数据。 76 | - 🔤 **Text Embedding & Index:**:文本嵌入索引,用户可以轻松上传文件进行文档检索,优化文档分析过程。 77 | - 🗄️ **Vector Database & Graph Database:** 向量与图数据库,提供灵活强大的数据管理解决方案。 78 | - 📝 **Prompt Control & Management:**:Prompt 控制与管理,精确定义智能体的上下文环境。 79 | - 🚧 **SandBox:**:沙盒环境,安全地执行代码编译和动作。 80 | - 💬 **LLM:**:智能体大脑,支持多种开源模型和 LLM 接口。 81 | - 🛠️ **API Management::** API 管理工具,实现对开源组件和运维平台的快速集成。 82 | 83 | 具体实现明细见:[技术路线明细](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/roadmap) 84 | 项目计划跟进见:[Projects](https://github.com/orgs/codefuse-ai/projects/1) 85 | 86 | 87 | ## 🌐 模型接入 88 | 89 | 如果您需要集成特定的模型,请通过提交issue来告知我们您的需求。 90 | 91 | | model_name | model_size | gpu_memory | quantize | HFhub | ModelScope | 92 | | ------------------ | ---------- | ---------- | -------- | ----- | ---------- | 93 | | chatgpt | - | - | - | - | - | 94 | | codellama-34b-int4 | 34b | 20g | int4 | coming soon| [link](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) | 95 | 96 | 97 | 98 | ## 🚀 快速使用 99 | ### muagent-py 100 | 完整文档见:[CodeFuse-muAgent](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent) 101 | ``` 102 | pip install codefuse-muagent 103 | ``` 104 | 105 | ### 使用ChatBot 106 | 请自行安装 nvidia 驱动程序,本项目已在 Python 3.9.18,CUDA 11.7 环境下,Windows、X86 架构的 macOS 系统中完成测试。 107 | 108 | Docker安装、私有化LLM接入及相关启动问题见:[快速使用明细](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/quickstart) 109 | 110 | **对于 Apple Silicon(苹果M系列芯片),您可能需要首先通过brew install qpdf。** 111 | 112 | 1、python 环境准备 113 | 114 | - 推荐采用 conda 对 python 环境进行管理(可选) 115 | ```bash 116 | # 准备 conda 环境 117 | conda create --name devopsgpt python=3.9 118 | conda activate devopsgpt 119 | ``` 120 | 121 | - 安装相关依赖 122 | ```bash 123 | cd codefuse-chatbot 124 | # python=3.9,notebook用最新即可,python=3.8用notebook=6.5.6 125 | pip install -r requirements.txt 126 | ``` 127 | 128 | 2、启动服务 129 | ```bash 130 | # 完成server_config.py配置后,可一键启动 131 | cd examples 132 | bash start.sh 133 | # 开始在页面进行相关配置,然后打开`启动对话服务`即可 134 | ``` 135 |
136 | 图片 137 |
138 | 139 | 140 | 或者通过`start.py`进行启动[老版启动方式](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/start-detail) 141 | 更多LLM接入方法见[更多细节...](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/fastchat) 142 |
143 | 144 | 145 | ## 贡献指南 146 | 非常感谢您对 Codefuse 项目感兴趣,我们非常欢迎您对 Codefuse 项目的各种建议、意见(包括批评)、评论和贡献。 147 | 148 | 您对 Codefuse 的各种建议、意见、评论可以直接通过 GitHub 的 Issues 提出。 149 | 150 | 参与 Codefuse 项目并为其作出贡献的方法有很多:代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎,并将您加入贡献者列表。详见[Contribution Guide...](https://codefuse-ai.github.io/zh-CN/contribution/contribution) 151 | 152 | ## 🤗 致谢 153 | 154 | 本项目基于[langchain-chatchat](https://github.com/chatchat-space/Langchain-Chatchat)和[codebox-api](https://github.com/shroominic/codebox-api),在此深深感谢他们的开源贡献! 155 | 156 | ## 🗂 其他 157 | 158 | ### 📱 联系我们 159 |
160 | 图片 161 |
162 | 163 | ### ✨ 点赞历史 164 | [![Star History Chart](https://api.star-history.com/svg?repos=codefuse-ai/codefuse-chatbot&type=Date)](https://star-history.com/#codefuse-ai/codefuse-chatbot&Date) 165 | -------------------------------------------------------------------------------- /configs/default_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | 4 | 5 | # 6 | system_name = platform.system() 7 | 8 | 9 | # 日志存储路径 10 | LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "logs") 11 | # 知识库默认存储路径 12 | SOURCE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "sources") 13 | # 知识库默认存储路径 14 | KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "knowledge_base") 15 | # 代码库默认存储路径 16 | CB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "code_base") 17 | # nltk 模型存储路径 18 | NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "nltk_data") 19 | # 代码存储路径 20 | JUPYTER_WORK_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "jupyter_work") 21 | # WEB_CRAWL存储路径 22 | WEB_CRAWL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "knowledge_base") 23 | # NEBULA_DATA存储路径 24 | NEBULA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data/nebula_data") 25 | # 语言模型存储路径 26 | LOCAL_LLM_MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "llm_models") 27 | # 向量模型存储路径 28 | LOCAL_EM_MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "embedding_models") 29 | # CHROMA 存储路径 30 | CHROMA_PERSISTENT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data/chroma_data") 31 | 32 | for _path in [LOG_PATH, SOURCE_PATH, KB_ROOT_PATH, CB_ROOT_PATH, NLTK_DATA_PATH, JUPYTER_WORK_PATH, WEB_CRAWL_PATH, NEBULA_PATH, CHROMA_PERSISTENT_PATH, LOCAL_LLM_MODEL_DIR, LOCAL_EM_MODEL_DIR]: 33 | if not os.path.exists(_path): 34 | os.makedirs(_path, exist_ok=True) 35 | 36 | path_envt_dict = { 37 | "LOG_PATH": LOG_PATH, "SOURCE_PATH": SOURCE_PATH, "KB_ROOT_PATH": KB_ROOT_PATH, 38 | "NLTK_DATA_PATH":NLTK_DATA_PATH, "JUPYTER_WORK_PATH": JUPYTER_WORK_PATH, 39 | "WEB_CRAWL_PATH": WEB_CRAWL_PATH, "NEBULA_PATH": NEBULA_PATH, 40 | "CHROMA_PERSISTENT_PATH": CHROMA_PERSISTENT_PATH 41 | } 42 | for path_name, _path in path_envt_dict.items(): 43 | os.environ[path_name] = _path 44 | 45 | 46 | # 数据库默认存储路径。 47 | # 如果使用sqlite,可以直接修改DB_ROOT_PATH;如果使用其它数据库,请直接修改SQLALCHEMY_DATABASE_URI。 48 | DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db") 49 | SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}" 50 | 51 | # 可选向量库类型及对应配置 52 | kbs_config = { 53 | "faiss": { 54 | }, 55 | # "milvus": { 56 | # "host": "127.0.0.1", 57 | # "port": "19530", 58 | # "user": "", 59 | # "password": "", 60 | # "secure": False, 61 | # }, 62 | # "pg": { 63 | # "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat", 64 | # } 65 | } 66 | 67 | # 默认向量库类型。可选:faiss, milvus, pg. 68 | DEFAULT_VS_TYPE = "faiss" 69 | 70 | # 缓存向量库数量 71 | CACHED_VS_NUM = 1 72 | 73 | # 知识库中单段文本长度 74 | CHUNK_SIZE = 500 75 | 76 | # 知识库中相邻文本重合长度 77 | OVERLAP_SIZE = 50 78 | 79 | # 知识库匹配向量数量 80 | VECTOR_SEARCH_TOP_K = 5 81 | 82 | # 知识库匹配相关度阈值,取值范围在0-1之间,SCORE越小,相关度越高,取到1相当于不筛选,建议设置在0.5左右 83 | # Mac 可能存在无法使用normalized_L2的问题,因此调整SCORE_THRESHOLD至 0~1100 84 | FAISS_NORMALIZE_L2 = True if system_name in ["Linux", "Windows"] else False 85 | SCORE_THRESHOLD = 1 if system_name in ["Linux", "Windows"] else 1100 86 | 87 | # 搜索引擎匹配结题数量 88 | SEARCH_ENGINE_TOP_K = 5 89 | 90 | # 代码引擎匹配结题数量 91 | CODE_SEARCH_TOP_K = 1 92 | 93 | 94 | # API 是否开启跨域,默认为False,如果需要开启,请设置为True 95 | # is open cross domain 96 | OPEN_CROSS_DOMAIN = False 97 | 98 | # Bing 搜索必备变量 99 | # 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search 100 | # 具体申请方式请见 101 | # https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource 102 | # 使用python创建bing api 搜索实例详见: 103 | # https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python 104 | BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search" 105 | # 注意不是bing Webmaster Tools的api key, 106 | 107 | # 此外,如果是在服务器上,报Failed to establish a new connection: [Errno 110] Connection timed out 108 | # 是因为服务器加了防火墙,需要联系管理员加白名单,如果公司的服务器的话,就别想了GG 109 | BING_SUBSCRIPTION_KEY = "" 110 | 111 | # 是否开启中文标题加强,以及标题增强的相关配置 112 | # 通过增加标题判断,判断哪些文本为标题,并在metadata中进行标记; 113 | # 然后将文本与往上一级的标题进行拼合,实现文本信息的增强。 114 | ZH_TITLE_ENHANCE = False 115 | 116 | log_verbose = False -------------------------------------------------------------------------------- /configs/server_config.py.example: -------------------------------------------------------------------------------- 1 | from .model_config import LLM_MODEL, LLM_DEVICE 2 | import os, json 3 | 4 | try: 5 | cur_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) 6 | with open(os.path.join(cur_dir, "local_config.json"), "r") as f: 7 | update_config = json.load(f) 8 | except: 9 | update_config = {} 10 | 11 | # API 是否开启跨域,默认为False,如果需要开启,请设置为True 12 | # is open cross domain 13 | OPEN_CROSS_DOMAIN = False 14 | # 是否用容器来启动服务 15 | try: 16 | DOCKER_SERVICE = json.loads(os.environ["DOCKER_SERVICE"]) or update_config.get("DOCKER_SERVICE") or False 17 | except: 18 | DOCKER_SERVICE = True 19 | # 是否采用容器沙箱 20 | try: 21 | SANDBOX_DO_REMOTE = json.loads(os.environ["SANDBOX_DO_REMOTE"]) or update_config.get("SANDBOX_DO_REMOTE") or False 22 | except: 23 | SANDBOX_DO_REMOTE = True 24 | # 是否采用api服务来进行 25 | NO_REMOTE_API = True 26 | # 各服务器默认绑定host 27 | DEFAULT_BIND_HOST = "127.0.0.1" 28 | os.environ["DEFAULT_BIND_HOST"] = DEFAULT_BIND_HOST 29 | 30 | # 31 | CONTRAINER_NAME = "devopsgpt_webui" 32 | IMAGE_NAME = "devopsgpt:latest" 33 | 34 | # webui.py server 35 | WEBUI_SERVER = { 36 | "host": DEFAULT_BIND_HOST, 37 | "port": 8501, 38 | "docker_port": 8501 39 | } 40 | 41 | # api.py server 42 | API_SERVER = { 43 | "host": DEFAULT_BIND_HOST, 44 | "port": 7861, 45 | "docker_port": 7861 46 | } 47 | 48 | # sdfile_api.py server 49 | SDFILE_API_SERVER = { 50 | "host": DEFAULT_BIND_HOST, 51 | "port": 7862, 52 | "docker_port": 7862 53 | } 54 | 55 | # fastchat openai_api server 56 | FSCHAT_OPENAI_API = { 57 | "host": DEFAULT_BIND_HOST, 58 | "port": 8888, # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。 59 | "docker_port": 8888, # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。 60 | } 61 | 62 | # nebula conf 63 | NEBULA_HOST = DEFAULT_BIND_HOST 64 | NEBULA_PORT = 9669 65 | NEBULA_STORAGED_PORT = 9779 66 | NEBULA_USER = 'root' 67 | NEBULA_PASSWORD = '' 68 | NEBULA_GRAPH_SERVER = { 69 | "host": DEFAULT_BIND_HOST, 70 | "port": NEBULA_PORT, 71 | "docker_port": NEBULA_PORT 72 | } 73 | 74 | # sandbox api server 75 | SANDBOX_CONTRAINER_NAME = "devopsgpt_sandbox" 76 | SANDBOX_IMAGE_NAME = "devopsgpt:latest" 77 | SANDBOX_HOST = os.environ.get("SANDBOX_HOST") or update_config.get("SANDBOX_HOST") or DEFAULT_BIND_HOST # "172.25.0.3" 78 | SANDBOX_SERVER = { 79 | "host": f"http://{SANDBOX_HOST}", 80 | "port": 5050, 81 | "docker_port": 5050, 82 | "url": f"http://{SANDBOX_HOST}:5050", 83 | "do_remote": SANDBOX_DO_REMOTE, 84 | } 85 | 86 | # fastchat model_worker server 87 | # 这些模型必须是在model_config.llm_model_dict中正确配置的。 88 | # 在启动startup.py时,可用通过`--model-worker --model-name xxxx`指定模型,不指定则为LLM_MODEL 89 | # 建议使用chat模型,不要使用base,无法获取正确输出 90 | FSCHAT_MODEL_WORKERS = json.loads(os.environ.get("FSCHAT_MODEL_WORKERS")) if os.environ.get("FSCHAT_MODEL_WORKERS") else {} 91 | FSCHAT_MODEL_WORKERS = FSCHAT_MODEL_WORKERS or update_config.get("FSCHAT_MODEL_WORKERS") 92 | FSCHAT_MODEL_WORKERS = FSCHAT_MODEL_WORKERS or { 93 | "default": { 94 | "host": DEFAULT_BIND_HOST, 95 | "port": 20002, 96 | "device": LLM_DEVICE, 97 | # todo: 多卡加载需要配置的参数 98 | "gpus": None, 99 | "numgpus": 1, 100 | # 以下为非常用参数,可根据需要配置 101 | # "max_gpu_memory": "20GiB", 102 | # "load_8bit": False, 103 | # "cpu_offloading": None, 104 | # "gptq_ckpt": None, 105 | # "gptq_wbits": 16, 106 | # "gptq_groupsize": -1, 107 | # "gptq_act_order": False, 108 | # "awq_ckpt": None, 109 | # "awq_wbits": 16, 110 | # "awq_groupsize": -1, 111 | # "model_names": [LLM_MODEL], 112 | # "conv_template": None, 113 | # "limit_worker_concurrency": 5, 114 | # "stream_interval": 2, 115 | # "no_register": False, 116 | }, 117 | 'codellama_34b': {'host': DEFAULT_BIND_HOST, 'port': 20002}, 118 | 'Baichuan2-13B-Base': {'host': DEFAULT_BIND_HOST, 'port': 20003}, 119 | 'Baichuan2-13B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20004}, 120 | 'baichuan2-7b-base': {'host': DEFAULT_BIND_HOST, 'port': 20005}, 121 | 'baichuan2-7b-chat': {'host': DEFAULT_BIND_HOST, 'port': 20006}, 122 | 'internlm-7b-base': {'host': DEFAULT_BIND_HOST, 'port': 20007}, 123 | 'internlm-chat-7b': {'host': DEFAULT_BIND_HOST, 'port': 20008}, 124 | 'chatglm2-6b': {'host': DEFAULT_BIND_HOST, 'port': 20009}, 125 | 'qwen-14b-base': {'host': DEFAULT_BIND_HOST, 'port': 20010}, 126 | 'qwen-14b-chat': {'host': DEFAULT_BIND_HOST, 'port': 20011}, 127 | 'qwen-1-8B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20012}, 128 | 'Qwen-7B': {'host': DEFAULT_BIND_HOST, 'port': 20013}, 129 | 'Qwen-7B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20014}, 130 | 'qwen-7b-base-v1.1': {'host': DEFAULT_BIND_HOST, 'port': 20015}, 131 | 'qwen-7b-chat-v1.1': {'host': DEFAULT_BIND_HOST, 'port': 20016}, 132 | 'chatglm3-6b': {'host': DEFAULT_BIND_HOST, 'port': 20017}, 133 | 'chatglm3-6b-32k': {'host': DEFAULT_BIND_HOST, 'port': 20018}, 134 | 'chatglm3-6b-base': {'host': DEFAULT_BIND_HOST, 'port': 20019}, 135 | 'Qwen-72B-Chat-Int4': {'host': DEFAULT_BIND_HOST, 'port': 20020}, 136 | 'gpt-3.5-turbo': {'host': DEFAULT_BIND_HOST, 'port': 20021}, 137 | 'example': {'host': DEFAULT_BIND_HOST, 'port': 20022}, 138 | 'openai-api': {'host': DEFAULT_BIND_HOST, 'port': 20023} 139 | } 140 | # fastchat multi model worker server 141 | FSCHAT_MULTI_MODEL_WORKERS = { 142 | # todo 143 | } 144 | 145 | # fastchat controller server 146 | FSCHAT_CONTROLLER = { 147 | "host": DEFAULT_BIND_HOST, 148 | "port": 20001, 149 | "dispatch_method": "shortest_queue", 150 | } 151 | 152 | 153 | # 以下不要更改 154 | def fschat_controller_address() -> str: 155 | host = FSCHAT_CONTROLLER["host"] 156 | port = FSCHAT_CONTROLLER["port"] 157 | return f"http://{host}:{port}" 158 | 159 | 160 | def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str: 161 | if model := FSCHAT_MODEL_WORKERS.get(model_name): 162 | host = model["host"] 163 | port = model["port"] 164 | return f"http://{host}:{port}" 165 | 166 | 167 | def fschat_openai_api_address() -> str: 168 | host = FSCHAT_OPENAI_API["host"] 169 | port = FSCHAT_OPENAI_API["port"] 170 | return f"http://{host}:{port}" 171 | 172 | 173 | def api_address() -> str: 174 | host = API_SERVER["host"] 175 | port = API_SERVER["port"] 176 | return f"http://{host}:{port}" 177 | 178 | 179 | def webui_address() -> str: 180 | host = WEBUI_SERVER["host"] 181 | port = WEBUI_SERVER["port"] 182 | return f"http://{host}:{port}" 183 | -------------------------------------------------------------------------------- /configs/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def is_running_in_docker(): 4 | """ 5 | 检查当前代码是否在 Docker 容器中运行 6 | """ 7 | # 检查是否存在 /.dockerenv 文件 8 | if os.path.exists('/.dockerenv'): 9 | return True 10 | 11 | # 检查 cgroup 文件系统是否为 /docker/ 开头 12 | if os.path.exists("/proc/1/cgroup"): 13 | with open('/proc/1/cgroup', 'rt') as f: 14 | return '/docker/' in f.read() 15 | return False -------------------------------------------------------------------------------- /docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | docker build -t devopsgpt:latest . -------------------------------------------------------------------------------- /env_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install -r requirements.txt 4 | # torch-gpu 安装视具体配置操纵 5 | # pip install torch==2.0.1+cu118 cudatoolkit --index-url https://download.pytorch.org/whl/cu118 6 | 7 | # pip3 uninstall crypto 8 | # pip3 uninstall pycrypto 9 | # pip3 install pycryptodome -------------------------------------------------------------------------------- /examples/agent_examples/baseGroupPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, LLM_MODEL 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | from coagent.connector.phase import BasePhase 13 | from coagent.connector.schema import Message 14 | 15 | # 16 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT]) 17 | # log-level,print prompt和llm predict 18 | os.environ["log_verbose"] = "2" 19 | 20 | phase_name = "baseGroupPhase" 21 | llm_config = LLMConfig( 22 | model_name=LLM_MODEL, api_key=os.environ["OPENAI_API_KEY"], 23 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 24 | ) 25 | embed_config = EmbedConfig( 26 | embed_engine="model", embed_model="text2vec-base-chinese", 27 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 28 | ) 29 | 30 | phase = BasePhase( 31 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 32 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 33 | ) 34 | # round-1 35 | query_content = "确认本地是否存在employee_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 36 | # query_content = "帮我确认下127.0.0.1这个服务器的在10点是否存在异常,请帮我判断一下" 37 | query = Message( 38 | role_name="human", role_type="user", tools=[], 39 | role_content=query_content, input_query=query_content, origin_query=query_content, 40 | ) 41 | # phase.pre_print(query) 42 | output_message, output_memory = phase.step(query) 43 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/baseTaskPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 11 | 12 | from coagent.connector.phase import BasePhase 13 | from coagent.connector.schema import Message 14 | 15 | # log-level,print prompt or llm predict 16 | os.environ["log_verbose"] = "2" 17 | 18 | phase_name = "baseTaskPhase" 19 | llm_config = LLMConfig( 20 | model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 21 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 22 | ) 23 | embed_config = EmbedConfig( 24 | embed_engine="model", embed_model="text2vec-base-chinese", 25 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 26 | ) 27 | phase = BasePhase( 28 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 29 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 30 | ) 31 | # round-1 32 | query_content = "确认本地是否存在employee_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 33 | query = Message( 34 | role_name="human", role_type="user", 35 | role_content=query_content, input_query=query_content, origin_query=query_content, 36 | ) 37 | 38 | output_message, output_memory = phase.step(query) 39 | 40 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/codeChatPhaseLocal_example.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | ''' 3 | @author: 温进 4 | @file: codeChatPhaseLocal_example.py 5 | @time: 2024/1/31 下午4:32 6 | @desc: 7 | ''' 8 | import os, sys, requests 9 | from concurrent.futures import ThreadPoolExecutor 10 | from tqdm import tqdm 11 | 12 | import requests 13 | from typing import List 14 | 15 | src_dir = os.path.join( 16 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 17 | ) 18 | sys.path.append(src_dir) 19 | 20 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH 21 | from configs.server_config import SANDBOX_SERVER 22 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 23 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 24 | from coagent.connector.phase import BasePhase 25 | from coagent.connector.schema import Message, Memory 26 | from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler 27 | 28 | 29 | 30 | # log-level,print prompt和llm predict 31 | os.environ["log_verbose"] = "1" 32 | 33 | llm_config = LLMConfig( 34 | model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 35 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 36 | ) 37 | embed_config = EmbedConfig( 38 | embed_engine="model", embed_model="text2vec-base-chinese", 39 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 40 | ) 41 | 42 | 43 | # delete codebase 44 | codebase_name = 'client_nebula' 45 | code_path = '/Users/bingxu/Desktop/工作/大模型/chatbot/test_code_repo/client' 46 | code_path = "D://chromeDownloads/devopschat-bot/client_v2/client" 47 | use_nh = True 48 | do_interpret = False 49 | cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, 50 | llm_config=llm_config, embed_config=embed_config) 51 | cbh.delete_codebase(codebase_name=codebase_name) 52 | 53 | # initialize codebase 54 | cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH, 55 | llm_config=llm_config, embed_config=embed_config) 56 | cbh.import_code(do_interpret=do_interpret) 57 | 58 | 59 | 60 | # chat with codebase 61 | phase_name = "codeChatPhase" 62 | phase = BasePhase( 63 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 64 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 65 | ) 66 | 67 | # remove 这个函数是做什么的 => 基于标签 68 | # 有没有函数已经实现了从字符串删除指定字符串的功能,使用的话可以怎么使用,写个java代码 => 基于描述 69 | # 有根据我以下的需求用 java 开发一个方法:输入为字符串,将输入中的 .java 字符串给删除掉,然后返回新的字符串 => 基于描述 70 | 71 | ## 需要启动容器中的nebula,采用use_nh=True来构建代码库,是可以通过cypher来查询 72 | # round-1 73 | query_content = "代码一共有多少类" 74 | query = Message( 75 | role_name="human", role_type="user", 76 | role_content=query_content, input_query=query_content, origin_query=query_content, 77 | code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher" 78 | ) 79 | 80 | output_message1, _ = phase.step(query) 81 | print(output_message1) 82 | 83 | # round-2 84 | query_content = "代码库里有哪些函数,返回5个就行" 85 | query = Message( 86 | role_name="human", role_type="user", 87 | role_content=query_content, input_query=query_content, origin_query=query_content, 88 | code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher" 89 | ) 90 | output_message2, _ = phase.step(query) 91 | print(output_message2) 92 | 93 | 94 | # round-3 95 | query_content = "remove 这个函数是做什么的" 96 | query = Message( 97 | role_name="user", role_type="human", 98 | role_content=query_content, input_query=query_content, origin_query=query_content, 99 | code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="tag", 100 | use_nh=False, local_graph_path=CB_ROOT_PATH 101 | ) 102 | output_message3, output_memory3 = phase.step(query) 103 | print(output_memory3.to_str_messages(return_all=True, content_key="parsed_output_list")) 104 | 105 | # 106 | # # round-4 107 | query_content = "有没有函数已经实现了从字符串删除指定字符串的功能,使用的话可以怎么使用,写个java代码" 108 | query = Message( 109 | role_name="human", role_type="user", 110 | role_content=query_content, input_query=query_content, origin_query=query_content, 111 | code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="description", 112 | use_nh=False, local_graph_path=CB_ROOT_PATH 113 | ) 114 | output_message4, output_memory4 = phase.step(query) 115 | print(output_memory4.to_str_messages(return_all=True, content_key="parsed_output_list")) 116 | 117 | 118 | # # round-5 119 | query_content = "有根据我以下的需求用 java 开发一个方法:输入为字符串,将输入中的 .java 字符串给删除掉,然后返回新的字符串" 120 | query = Message( 121 | role_name="human", role_type="user", 122 | role_content=query_content, input_query=query_content, origin_query=query_content, 123 | code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="description", 124 | use_nh=False, local_graph_path=CB_ROOT_PATH 125 | ) 126 | output_message5, output_memory5 = phase.step(query) 127 | print(output_memory5.to_str_messages(return_all=True, content_key="parsed_output_list")) 128 | -------------------------------------------------------------------------------- /examples/agent_examples/codeChatPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | from coagent.connector.phase import BasePhase 13 | from coagent.connector.schema import Message, Memory 14 | 15 | # log-level,print prompt和llm predict 16 | os.environ["log_verbose"] = "2" 17 | 18 | phase_name = "codeChatPhase" 19 | llm_config = LLMConfig( 20 | model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 21 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 22 | ) 23 | embed_config = EmbedConfig( 24 | embed_engine="model", embed_model="text2vec-base-chinese", 25 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 26 | ) 27 | 28 | phase = BasePhase( 29 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 30 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 31 | ) 32 | # 代码一共有多少类 => 基于cypher 33 | # 代码库里有哪些函数,返回5个就行 => 基于cypher 34 | # remove 这个函数是做什么的 => 基于标签 35 | # 有没有函数已经实现了从字符串删除指定字符串的功能,使用的话可以怎么使用,写个java代码 => 基于描述 36 | # 有根据我以下的需求用 java 开发一个方法:输入为字符串,将输入中的 .java 字符串给删除掉,然后返回新的字符串 => 基于描述 37 | 38 | # round-1 39 | # query_content = "代码一共有多少类" 40 | # query = Message( 41 | # role_name="human", role_type="user", 42 | # role_content=query_content, input_query=query_content, origin_query=query_content, 43 | # code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher" 44 | # ) 45 | # 46 | # output_message1, _ = phase.step(query) 47 | # print(output_message1) 48 | 49 | # round-2 50 | # query_content = "代码库里有哪些函数,返回5个就行" 51 | # query = Message( 52 | # role_name="human", role_type="user", 53 | # role_content=query_content, input_query=query_content, origin_query=query_content, 54 | # code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher" 55 | # ) 56 | # output_message2, _ = phase.step(query) 57 | # print(output_message2) 58 | 59 | # 60 | # # round-3 61 | query_content = "remove 这个函数是做什么的" 62 | query = Message( 63 | role_name="user", role_type="human", 64 | role_content=query_content, input_query=query_content, origin_query=query_content, 65 | code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="tag" 66 | ) 67 | output_message3, _ = phase.step(query) 68 | print(output_message3) 69 | 70 | # 71 | # # round-4 72 | # query_content = "有没有函数已经实现了从字符串删除指定字符串的功能,使用的话可以怎么使用,写个java代码" 73 | # query = Message( 74 | # role_name="human", role_type="user", 75 | # role_content=query_content, input_query=query_content, origin_query=query_content, 76 | # code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="description" 77 | # ) 78 | # output_message4, _ = phase.step(query) 79 | # print(output_message4) 80 | # 81 | # # round-5 82 | # query_content = "有根据我以下的需求用 java 开发一个方法:输入为字符串,将输入中的 .java 字符串给删除掉,然后返回新的字符串" 83 | # query = Message( 84 | # role_name="human", role_type="user", 85 | # role_content=query_content, input_query=query_content, origin_query=query_content, 86 | # code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="description" 87 | # ) 88 | # output_message5, output_memory5 = phase.step(query) 89 | # print(output_message5) 90 | # 91 | # print(output_memory5.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/codeReactPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | from coagent.connector.phase import BasePhase 13 | from coagent.connector.schema import Message 14 | 15 | # log-level,print prompt和llm predict 16 | os.environ["log_verbose"] = "2" 17 | 18 | phase_name = "codeReactPhase" 19 | llm_config = LLMConfig( 20 | model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 21 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 22 | ) 23 | embed_config = EmbedConfig( 24 | embed_engine="model", embed_model="text2vec-base-chinese", 25 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 26 | ) 27 | phase = BasePhase( 28 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 29 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 30 | ) 31 | # round-1 32 | query_content = "确认本地是否存在book_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 33 | query = Message( 34 | role_name="human", role_type="user", 35 | role_content=query_content, input_query=query_content, origin_query=query_content, 36 | ) 37 | 38 | output_message, output_memory = phase.step(query) 39 | 40 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/codeToolReactPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, LLM_MODEL 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | 13 | from coagent.connector.phase import BasePhase 14 | from coagent.connector.schema import Message 15 | 16 | 17 | TOOL_SETS = [ 18 | "StockName", "StockInfo", 19 | ] 20 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT]) 21 | 22 | # log-level,print prompt和llm predict 23 | os.environ["log_verbose"] = "2" 24 | 25 | phase_name = "codeToolReactPhase" 26 | llm_config = LLMConfig( 27 | model_name="gpt-3.5-turbo-0613", api_key=os.environ["OPENAI_API_KEY"], 28 | api_base_url=os.environ["API_BASE_URL"], temperature=0.7 29 | ) 30 | embed_config = EmbedConfig( 31 | embed_engine="model", embed_model="text2vec-base-chinese", 32 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 33 | ) 34 | phase = BasePhase( 35 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 36 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 37 | ) 38 | 39 | query_content = "查询贵州茅台的股票代码,并查询截止到当前日期(2023年12月24日)的最近10天的每日时序数据,然后用代码画出折线图并分析" 40 | 41 | query = Message(role_name="human", role_type="user", input_query=query_content, role_content=query_content, origin_query=query_content, tools=tools) 42 | 43 | output_message, output_memory = phase.step(query) 44 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/docChatPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | sys.path.append(os.path.join(src_dir, "examples")) 8 | 9 | from configs.model_config import EMBEDDING_MODEL, CB_ROOT_PATH 10 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 11 | from configs.server_config import SANDBOX_SERVER 12 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 13 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 14 | from coagent.connector.phase import BasePhase 15 | from coagent.connector.schema import Message, Memory 16 | 17 | 18 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT]) 19 | llm_config = LLMConfig( 20 | model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 21 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 22 | ) 23 | embed_config = EmbedConfig( 24 | embed_engine="model", embed_model="text2vec-base-chinese", 25 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 26 | ) 27 | 28 | 29 | 30 | 31 | # create your knowledge base 32 | from io import BytesIO 33 | from pathlib import Path 34 | 35 | from coagent.service.kb_api import create_kb, upload_doc 36 | from coagent.service.service_factory import get_kb_details 37 | from coagent.utils.server_utils import run_async 38 | kb_list = {x["kb_name"]: x for x in get_kb_details(KB_ROOT_PATH)} 39 | 40 | # create a knowledge base 41 | kb_name = "example_test" 42 | data = { 43 | "knowledge_base_name": kb_name, 44 | "vector_store_type": "faiss", # default 45 | "kb_root_path": KB_ROOT_PATH, 46 | "embed_model": embed_config.embed_model, 47 | "embed_engine": embed_config.embed_engine, 48 | "embed_model_path": embed_config.embed_model_path, 49 | "model_device": embed_config.model_device, 50 | } 51 | run_async(create_kb(**data)) 52 | 53 | # add doc to knowledge base 54 | file = os.path.join("D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/sources/docs/langchain_text_10.jsonl") 55 | files = [file] 56 | # if embedding init failed, you can use override = True 57 | data = [{"override": True, "file": f, 58 | "knowledge_base_name": kb_name, "not_refresh_vs_cache": False, 59 | "kb_root_path": KB_ROOT_PATH, "embed_model": embed_config.embed_model, 60 | "embed_engine": embed_config.embed_engine, "embed_model_path": embed_config.embed_model_path, 61 | "model_device": embed_config.model_device, 62 | } 63 | for f in files] 64 | 65 | for k in data: 66 | file = Path(file).absolute().open("rb") 67 | filename = file.name 68 | 69 | from fastapi import UploadFile 70 | from tempfile import SpooledTemporaryFile 71 | 72 | temp_file = SpooledTemporaryFile(max_size=10 * 1024 * 1024) 73 | temp_file.write(file.read()) 74 | temp_file.seek(0) 75 | 76 | k.update({"file": UploadFile(file=temp_file, filename=filename),}) 77 | run_async(upload_doc(**k)) 78 | 79 | 80 | 81 | ## start to chat with knowledge base 82 | 83 | # log-level,print prompt和llm predict 84 | os.environ["log_verbose"] = "2" 85 | 86 | # set chat phase 87 | phase_name = "docChatPhase" 88 | phase = BasePhase( 89 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 90 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 91 | ) 92 | # round-1 93 | query_content = "langchain有哪些模块" 94 | query = Message( 95 | role_name="human", role_type="user", 96 | origin_query=query_content, 97 | doc_engine_name=kb_name, score_threshold=1.0, top_k=3 98 | ) 99 | 100 | output_message, output_memory = phase.step(query) 101 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) 102 | 103 | # round-2 104 | query_content = "提示(prompts)有什么用?" 105 | query = Message( 106 | role_name="human", role_type="user", 107 | origin_query=query_content, 108 | doc_engine_name=kb_name, score_threshold=1.0, top_k=3 109 | ) 110 | output_message, output_memory = phase.step(query) 111 | 112 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/metagpt_phase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | 13 | from coagent.connector.phase import BasePhase 14 | from coagent.connector.schema import Message 15 | 16 | # log-level,print prompt和llm predict 17 | os.environ["log_verbose"] = "0" 18 | 19 | phase_name = "metagpt_code_devlop" 20 | llm_config = LLMConfig( 21 | model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 22 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 23 | ) 24 | embed_config = EmbedConfig( 25 | embed_engine="model", embed_model="text2vec-base-chinese", 26 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 27 | ) 28 | phase = BasePhase( 29 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 30 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 31 | ) 32 | 33 | query_content = "create a snake game" 34 | query = Message(role_name="human", role_type="user", input_query=query_content, role_content=query_content, origin_query=query_content) 35 | 36 | output_message, output_memory = phase.step(query) 37 | 38 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/searchChatPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 11 | 12 | from coagent.connector.phase import BasePhase 13 | from coagent.connector.schema import Message, Memory 14 | 15 | 16 | 17 | # log-level,print prompt和llm predict 18 | os.environ["log_verbose"] = "2" 19 | 20 | phase_name = "searchChatPhase" 21 | llm_config = LLMConfig( 22 | model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 23 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 24 | ) 25 | embed_config = EmbedConfig( 26 | embed_engine="model", embed_model="text2vec-base-chinese", 27 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 28 | ) 29 | phase = BasePhase( 30 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 31 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 32 | ) 33 | 34 | # round-1 35 | query_content1 = "美国当前总统是谁?" 36 | query = Message( 37 | role_name="human", role_type="user", 38 | role_content=query_content1, input_query=query_content1, origin_query=query_content1, 39 | search_engine_name="duckduckgo", score_threshold=1.0, top_k=3 40 | ) 41 | 42 | output_message, output_memory = phase.step(query) 43 | 44 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) 45 | 46 | # round-2 47 | query_content2 = "美国上一任总统是谁,两个人有什么关系没?" 48 | query = Message( 49 | role_name="human", role_type="user", 50 | role_content=query_content2, input_query=query_content2, origin_query=query_content2, 51 | search_engine_name="duckduckgo", score_threshold=1.0, top_k=3 52 | ) 53 | output_message, output_memory = phase.step(query) 54 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/agent_examples/toolReactPhase_example.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH 9 | from configs.server_config import SANDBOX_SERVER 10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS 11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 12 | 13 | from coagent.connector.phase import BasePhase 14 | from coagent.connector.schema import Message 15 | 16 | # log-level,print prompt和llm predict 17 | os.environ["log_verbose"] = "2" 18 | 19 | phase_name = "toolReactPhase" 20 | llm_config = LLMConfig( 21 | model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 22 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 23 | ) 24 | embed_config = EmbedConfig( 25 | embed_engine="model", embed_model="text2vec-base-chinese", 26 | embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese") 27 | ) 28 | phase = BasePhase( 29 | phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH, 30 | embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH, 31 | ) 32 | 33 | 34 | # round-1 35 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT]) 36 | query_content = "帮我确认下127.0.0.1这个服务器的在10点是否存在异常,请帮我判断一下" 37 | query = Message( 38 | role_name="human", role_type="user", tools=tools, 39 | role_content=query_content, input_query=query_content, origin_query=query_content 40 | ) 41 | 42 | phase.pre_print(query) 43 | # output_message, output_memory = phase.step(query) 44 | 45 | # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) -------------------------------------------------------------------------------- /examples/auto_examples/agentchat_RetrievalChat.py: -------------------------------------------------------------------------------- 1 | # more use cases see ~/examples/agent_examples/docChatPhase_example.py 2 | -------------------------------------------------------------------------------- /examples/auto_examples/agentchat_function_call.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | # from configs.model_config import * 9 | from coagent.connector.phase import BasePhase 10 | from coagent.connector.agents import BaseAgent 11 | from coagent.connector.chains import BaseChain 12 | from coagent.connector.schema import ( 13 | Message, Memory, load_role_configs, load_phase_configs, load_chain_configs 14 | ) 15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS 16 | from coagent.connector.utils import parse_section 17 | import importlib 18 | 19 | 20 | # update new agent configs 21 | # tool learning 实现参考 ~/examples/agent_examples/toolReactPhase_example.py -------------------------------------------------------------------------------- /examples/auto_examples/agentchat_teachability.py: -------------------------------------------------------------------------------- 1 | # 暂未实现memory management相关操作 -------------------------------------------------------------------------------- /examples/auto_examples/agentchat_teaching.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | # from configs.model_config import * 9 | from coagent.connector.phase import BasePhase 10 | from coagent.connector.agents import BaseAgent 11 | from coagent.connector.chains import BaseChain 12 | from coagent.connector.schema import ( 13 | Message, Memory, load_role_configs, load_phase_configs, load_chain_configs 14 | ) 15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS 16 | from coagent.connector.utils import parse_section 17 | import importlib 18 | 19 | 20 | # update new agent configs 21 | auto_feedback_from_code_execution_PROMPT = """#### Code React Assistance Guidance 22 | 23 | You are a helpful AI assistant. Solve tasks using your coding and language skills. 24 | In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. 25 | 1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. 26 | 2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. 27 | Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. 28 | When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. 29 | If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. 30 | When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. 31 | Reply "stopped" in the end when everything is done. 32 | 33 | #### Response Process 34 | 35 | **Question:** First, clarify the problem to be solved. 36 | 37 | **Thoughts:** Based on the question and observations above, provide the plan for executing this step. 38 | 39 | **Action Status:** Set to 'stopped' or 'code_executing'. If it's 'stopped', the action is to provide the final answer to the original question. If it's 'code_executing', the action is to write the code. 40 | 41 | **Action:** 42 | ```python 43 | # Write your code here 44 | import os 45 | ... 46 | ``` 47 | 48 | **Observation:** Check the results and effects of the executed code. 49 | 50 | ... (Repeat this Thoughts/Action/Observation cycle as needed) 51 | 52 | **Thoughts:** I now know the final answer 53 | 54 | **Action Status:** stopped 55 | 56 | **Action:** The final answer to the original input question 57 | 58 | 59 | """ 60 | 61 | 62 | AGETN_CONFIGS.update({ 63 | "auto_feedback_from_code_execution": { 64 | "role": { 65 | "role_prompt": auto_feedback_from_code_execution_PROMPT, 66 | "role_type": "assistant", 67 | "role_name": "auto_feedback_from_code_execution", 68 | "role_desc": "", 69 | "agent_type": "ReactAgent" 70 | # "agent_type": "BaseAgent" 71 | }, 72 | "chat_turn": 5, 73 | "stop": "\n**Observation:**", 74 | "focus_agents": [], 75 | "focus_message_keys": [], 76 | }, 77 | }) 78 | # update new chain configs 79 | CHAIN_CONFIGS.update({ 80 | "auto_feedback_from_code_executionChain": { 81 | "chain_name": "auto_feedback_from_code_executionChain", 82 | "chain_type": "BaseChain", 83 | "agents": ["auto_feedback_from_code_execution"], 84 | "chat_turn": 1, 85 | "do_checker": False, 86 | "chain_prompt": "" 87 | } 88 | }) 89 | 90 | # update phase configs 91 | PHASE_CONFIGS.update({ 92 | "auto_feedback_from_code_executionPhase": { 93 | "phase_name": "auto_feedback_from_code_executionPhase", 94 | "phase_type": "BasePhase", 95 | "chains": ["auto_feedback_from_code_executionChain"], 96 | "do_summary": False, 97 | "do_search": False, 98 | "do_doc_retrieval": False, 99 | "do_code_retrieval": False, 100 | "do_tool_retrieval": False, 101 | "do_using_tool": False 102 | }, 103 | }) 104 | 105 | 106 | 107 | 108 | role_configs = load_role_configs(AGETN_CONFIGS) 109 | chain_configs = load_chain_configs(CHAIN_CONFIGS) 110 | phase_configs = load_phase_configs(PHASE_CONFIGS) 111 | 112 | agent_module = importlib.import_module("coagent.connector.agents") 113 | 114 | # 115 | phase_name = "auto_feedback_from_code_executionPhase" 116 | phase = BasePhase(phase_name, task = None, 117 | base_phase_config= PHASE_CONFIGS, 118 | base_chain_config= CHAIN_CONFIGS, 119 | base_role_config= AGETN_CONFIGS, 120 | ) 121 | 122 | # round-1 123 | query_content = """ 124 | Find arxiv papers that show how are people studying trust calibration in AI based systems 125 | """ 126 | query = Message( 127 | role_name="human", role_type="user", 128 | role_content=query_content, input_query=query_content, origin_query=query_content, 129 | code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher" 130 | ) 131 | 132 | output_message1, _ = phase.step(query) 133 | 134 | 135 | # 重复auto_gen 其余task即可 136 | task2 = "analyze the above the results to list the application domains studied by these papers " 137 | 138 | task3 = """Use this data to generate a bar chart of domains and number of papers in that domain and save to a file 139 | """ 140 | 141 | task4 = """Reflect on the sequence and create a recipe containing all the steps 142 | necessary and name for it. Suggest well-documented, generalized python function(s) 143 | to perform similar tasks for coding steps in future. Make sure coding steps and 144 | non-coding steps are never mixed in one function. In the docstr of the function(s), 145 | clarify what non-coding steps are needed to use the language skill of the assistant. 146 | """ -------------------------------------------------------------------------------- /examples/auto_examples/agentchat_web_info.py: -------------------------------------------------------------------------------- 1 | import os, sys, requests 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | # from configs.model_config import * 9 | from coagent.connector.phase import BasePhase 10 | from coagent.connector.agents import BaseAgent 11 | from coagent.connector.chains import BaseChain 12 | from coagent.connector.schema import ( 13 | Message, Memory, load_role_configs, load_phase_configs, load_chain_configs 14 | ) 15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS 16 | from coagent.connector.utils import parse_section 17 | import importlib 18 | 19 | 20 | # update new agent configs 21 | auto_feedback_from_code_execution_PROMPT = """#### Code React Assistance Guidance 22 | 23 | You are a helpful AI assistant. Solve tasks using your coding and language skills. 24 | In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute. 25 | 1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself. 26 | 2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly. 27 | Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill. 28 | When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user. 29 | If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try. 30 | When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible. 31 | Reply "stopped" in the end when everything is done. 32 | 33 | #### Response Process 34 | 35 | **Question:** First, clarify the problem to be solved. 36 | 37 | **Thoughts:** Based on the question and observations above, provide the plan for executing this step. 38 | 39 | **Action Status:** Set to 'stopped' or 'code_executing'. If it's 'stopped', the action is to provide the final answer to the original question. If it's 'code_executing', the action is to write the code. 40 | 41 | **Action:** 42 | ```python 43 | # Write your code here 44 | import os 45 | ... 46 | ``` 47 | 48 | **Observation:** Check the results and effects of the executed code. 49 | 50 | ... (Repeat this Thoughts/Action/Observation cycle as needed) 51 | 52 | **Thoughts:** I now know the final answer 53 | 54 | **Action Status:** stopped 55 | 56 | **Action:** The final answer to the original input question 57 | 58 | 59 | """ 60 | 61 | 62 | AGETN_CONFIGS.update({ 63 | "auto_feedback_from_code_execution": { 64 | "role": { 65 | "role_prompt": auto_feedback_from_code_execution_PROMPT, 66 | "role_type": "assistant", 67 | "role_name": "auto_feedback_from_code_execution", 68 | "role_desc": "", 69 | "agent_type": "ReactAgent" 70 | # "agent_type": "BaseAgent" 71 | }, 72 | "chat_turn": 5, 73 | "stop": "\n**Observation:**", 74 | "focus_agents": [], 75 | "focus_message_keys": [], 76 | }, 77 | }) 78 | # update new chain configs 79 | CHAIN_CONFIGS.update({ 80 | "auto_feedback_from_code_executionChain": { 81 | "chain_name": "auto_feedback_from_code_executionChain", 82 | "chain_type": "BaseChain", 83 | "agents": ["auto_feedback_from_code_execution"], 84 | "chat_turn": 1, 85 | "do_checker": False, 86 | "chain_prompt": "" 87 | } 88 | }) 89 | 90 | # update phase configs 91 | PHASE_CONFIGS.update({ 92 | "auto_feedback_from_code_executionPhase": { 93 | "phase_name": "auto_feedback_from_code_executionPhase", 94 | "phase_type": "BasePhase", 95 | "chains": ["auto_feedback_from_code_executionChain"], 96 | "do_summary": False, 97 | "do_search": False, 98 | "do_doc_retrieval": False, 99 | "do_code_retrieval": False, 100 | "do_tool_retrieval": False, 101 | "do_using_tool": False 102 | }, 103 | }) 104 | 105 | 106 | 107 | 108 | role_configs = load_role_configs(AGETN_CONFIGS) 109 | chain_configs = load_chain_configs(CHAIN_CONFIGS) 110 | phase_configs = load_phase_configs(PHASE_CONFIGS) 111 | 112 | agent_module = importlib.import_module("coagent.connector.agents") 113 | 114 | # 115 | phase_name = "auto_feedback_from_code_executionPhase" 116 | phase = BasePhase(phase_name, 117 | task = None, 118 | base_phase_config = PHASE_CONFIGS, 119 | base_chain_config = CHAIN_CONFIGS, 120 | base_role_config = AGETN_CONFIGS, 121 | ) 122 | 123 | # # round-1 124 | # query_content = """Reply TERMINATE if the task has been solved at full satisfaction. 125 | # Otherwise, reply CONTINUE, or the reason why the task is not solved yet.""" 126 | # query = Message( 127 | # role_name="human", role_type="user", 128 | # role_content=query_content, input_query=query_content, origin_query=query_content, 129 | # code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher" 130 | # ) 131 | 132 | # output_message1, _ = phase.step(query) 133 | 134 | # round-2 135 | # query_content = """Show me the YTD gain of 10 largest technology companies as of today.""" 136 | # query = Message( 137 | # role_name="human", role_type="user", 138 | # role_content=query_content, input_query=query_content, origin_query=query_content, 139 | # code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher" 140 | # ) 141 | 142 | # output_message1, _ = phase.step(query) 143 | 144 | -------------------------------------------------------------------------------- /examples/gptq.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | import os 3 | from os.path import isdir, isfile 4 | from pathlib import Path 5 | import sys 6 | 7 | from transformers import AutoTokenizer 8 | 9 | 10 | @dataclass 11 | class GptqConfig: 12 | ckpt: str = field( 13 | default=None, 14 | metadata={ 15 | "help": "Load quantized model. The path to the local GPTQ checkpoint." 16 | }, 17 | ) 18 | wbits: int = field(default=16, metadata={"help": "#bits to use for quantization"}) 19 | groupsize: int = field( 20 | default=-1, 21 | metadata={"help": "Groupsize to use for quantization; default uses full row."}, 22 | ) 23 | act_order: bool = field( 24 | default=True, 25 | metadata={"help": "Whether to apply the activation order GPTQ heuristic"}, 26 | ) 27 | 28 | 29 | def load_quant_by_autogptq(model): 30 | # qwen-72b-int4 use these code 31 | from modelscope import AutoTokenizer, AutoModelForCausalLM 32 | # Note: The default behavior now has injection attack prevention off. 33 | tokenizer = AutoTokenizer.from_pretrained(model, revision='master', trust_remote_code=True) 34 | model = AutoModelForCausalLM.from_pretrained( 35 | model, device_map="auto", 36 | trust_remote_code=True 37 | ).eval() 38 | return model, tokenizer 39 | # codellama-34b-int4 use these code 40 | # from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig 41 | # tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True) 42 | # model = AutoGPTQForCausalLM.from_quantized(model, inject_fused_attention=False,trust_remote_code=True, 43 | # inject_fused_mlp=False,use_cuda_fp16=True,disable_exllama=False,device_map='auto') 44 | # return model, tokenizer 45 | 46 | def load_gptq_quantized(model_name, gptq_config: GptqConfig): 47 | print("Loading GPTQ quantized model...") 48 | model, tokenizer = load_quant_by_autogptq(model_name) 49 | return model, tokenizer 50 | 51 | 52 | # def load_gptq_quantized(model_name, gptq_config: GptqConfig): 53 | # print("Loading GPTQ quantized model...") 54 | 55 | # try: 56 | # script_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 57 | # module_path = os.path.join(script_path, "repositories/GPTQ-for-LLaMa") 58 | 59 | # sys.path.insert(0, module_path) 60 | # from llama import load_quant 61 | # except ImportError as e: 62 | # print(f"Error: Failed to load GPTQ-for-LLaMa. {e}") 63 | # print("See https://github.com/lm-sys/FastChat/blob/main/docs/gptq.md") 64 | # sys.exit(-1) 65 | 66 | # tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) 67 | # # only `fastest-inference-4bit` branch cares about `act_order` 68 | # if gptq_config.act_order: 69 | # model = load_quant( 70 | # model_name, 71 | # find_gptq_ckpt(gptq_config), 72 | # gptq_config.wbits, 73 | # gptq_config.groupsize, 74 | # act_order=gptq_config.act_order, 75 | # ) 76 | # else: 77 | # # other branches 78 | # model = load_quant( 79 | # model_name, 80 | # find_gptq_ckpt(gptq_config), 81 | # gptq_config.wbits, 82 | # gptq_config.groupsize, 83 | # ) 84 | 85 | # return model, tokenizer 86 | 87 | 88 | def find_gptq_ckpt(gptq_config: GptqConfig): 89 | if Path(gptq_config.ckpt).is_file(): 90 | return gptq_config.ckpt 91 | 92 | # for ext in ["*.pt", "*.safetensors",]: 93 | for ext in ["*.pt", "*.bin",]: 94 | matched_result = sorted(Path(gptq_config.ckpt).glob(ext)) 95 | if len(matched_result) > 0: 96 | return str(matched_result[-1]) 97 | 98 | print("Error: gptq checkpoint not found") 99 | sys.exit(1) 100 | -------------------------------------------------------------------------------- /examples/model_workers/SparkApi.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import datetime 3 | import hashlib 4 | import hmac 5 | from urllib.parse import urlparse 6 | from datetime import datetime 7 | from time import mktime 8 | from urllib.parse import urlencode 9 | from wsgiref.handlers import format_date_time 10 | 11 | 12 | class Ws_Param(object): 13 | # 初始化 14 | def __init__(self, APPID, APIKey, APISecret, Spark_url): 15 | self.APPID = APPID 16 | self.APIKey = APIKey 17 | self.APISecret = APISecret 18 | self.host = urlparse(Spark_url).netloc 19 | self.path = urlparse(Spark_url).path 20 | self.Spark_url = Spark_url 21 | 22 | # 生成url 23 | def create_url(self): 24 | # 生成RFC1123格式的时间戳 25 | now = datetime.now() 26 | date = format_date_time(mktime(now.timetuple())) 27 | 28 | # 拼接字符串 29 | signature_origin = "host: " + self.host + "\n" 30 | signature_origin += "date: " + date + "\n" 31 | signature_origin += "GET " + self.path + " HTTP/1.1" 32 | 33 | # 进行hmac-sha256进行加密 34 | signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), 35 | digestmod=hashlib.sha256).digest() 36 | 37 | signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8') 38 | 39 | authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"' 40 | 41 | authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') 42 | 43 | # 将请求的鉴权参数组合为字典 44 | v = { 45 | "authorization": authorization, 46 | "date": date, 47 | "host": self.host 48 | } 49 | # 拼接鉴权参数,生成url 50 | url = self.Spark_url + '?' + urlencode(v) 51 | # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致 52 | return url 53 | 54 | 55 | def gen_params(appid, domain, question, temperature, max_token): 56 | """ 57 | 通过appid和用户的提问来生成请参数 58 | """ 59 | data = { 60 | "header": { 61 | "app_id": appid, 62 | "uid": "1234" 63 | }, 64 | "parameter": { 65 | "chat": { 66 | "domain": domain, 67 | "random_threshold": 0.5, 68 | "max_tokens": max_token, 69 | "auditing": "default", 70 | "temperature": temperature, 71 | } 72 | }, 73 | "payload": { 74 | "message": { 75 | "text": question 76 | } 77 | } 78 | } 79 | return data 80 | -------------------------------------------------------------------------------- /examples/model_workers/__init__.py: -------------------------------------------------------------------------------- 1 | ############################# Attention ######################## 2 | 3 | # The Code in model workers all copied from 4 | # https://github.com/chatchat-space/Langchain-Chatchat/blob/master/server/model_workers 5 | 6 | ################################################################# 7 | 8 | from .base import * 9 | from .zhipu import ChatGLMWorker 10 | from .minimax import MiniMaxWorker 11 | from .xinghuo import XingHuoWorker 12 | from .qianfan import QianFanWorker 13 | from .fangzhou import FangZhouWorker 14 | from .qwen import QwenWorker 15 | from .baichuan import BaiChuanWorker 16 | from .azure import AzureWorker 17 | from .tiangong import TianGongWorker 18 | from .openai import ExampleWorker 19 | 20 | 21 | IMPORT_MODEL_WORKERS = [ 22 | ChatGLMWorker, MiniMaxWorker, XingHuoWorker, QianFanWorker, FangZhouWorker, 23 | QwenWorker, BaiChuanWorker, AzureWorker, TianGongWorker, ExampleWorker 24 | ] 25 | 26 | MODEL_WORKER_SETS = [tool.__name__ for tool in IMPORT_MODEL_WORKERS] 27 | 28 | -------------------------------------------------------------------------------- /examples/model_workers/azure.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from fastchat.conversation import Conversation 3 | from .base import * 4 | # from server.utils import get_httpx_client 5 | from fastchat import conversation as conv 6 | import json, os 7 | from typing import List, Dict 8 | from loguru import logger 9 | # from configs import logger, log_verbose 10 | log_verbose = os.environ.get("log_verbose", False) 11 | 12 | 13 | class AzureWorker(ApiModelWorker): 14 | def __init__( 15 | self, 16 | *, 17 | controller_addr: str = None, 18 | worker_addr: str = None, 19 | model_names: List[str] = ["azure-api"], 20 | version: str = "gpt-35-turbo", 21 | **kwargs, 22 | ): 23 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 24 | kwargs.setdefault("context_len", 8000) #TODO 16K模型需要改成16384 25 | super().__init__(**kwargs) 26 | self.version = version 27 | 28 | def do_chat(self, params: ApiChatParams) -> Dict: 29 | params.load_config(self.model_names[0]) 30 | data = dict( 31 | messages=params.messages, 32 | temperature=params.temperature, 33 | max_tokens=params.max_tokens, 34 | stream=True, 35 | ) 36 | url = ("https://{}.openai.azure.com/openai/deployments/{}/chat/completions?api-version={}" 37 | .format(params.resource_name, params.deployment_name, params.api_version)) 38 | headers = { 39 | 'Content-Type': 'application/json', 40 | 'Accept': 'application/json', 41 | 'api-key': params.api_key, 42 | } 43 | 44 | text = "" 45 | if log_verbose: 46 | logger.info(f'{self.__class__.__name__}:url: {url}') 47 | logger.info(f'{self.__class__.__name__}:headers: {headers}') 48 | logger.info(f'{self.__class__.__name__}:data: {data}') 49 | 50 | with get_httpx_client() as client: 51 | with client.stream("POST", url, headers=headers, json=data) as response: 52 | for line in response.iter_lines(): 53 | if not line.strip() or "[DONE]" in line: 54 | continue 55 | if line.startswith("data: "): 56 | line = line[6:] 57 | resp = json.loads(line) 58 | if choices := resp["choices"]: 59 | if chunk := choices[0].get("delta", {}).get("content"): 60 | text += chunk 61 | yield { 62 | "error_code": 0, 63 | "text": text 64 | } 65 | else: 66 | self.logger.error(f"请求 Azure API 时发生错误:{resp}") 67 | 68 | def get_embeddings(self, params): 69 | # TODO: 支持embeddings 70 | print("embedding") 71 | print(params) 72 | 73 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 74 | # TODO: 确认模板是否需要修改 75 | return conv.Conversation( 76 | name=self.model_names[0], 77 | system_message="You are a helpful, respectful and honest assistant.", 78 | messages=[], 79 | roles=["user", "assistant"], 80 | sep="\n### ", 81 | stop_str="###", 82 | ) 83 | 84 | 85 | if __name__ == "__main__": 86 | import uvicorn 87 | from server.utils import MakeFastAPIOffline 88 | from fastchat.serve.base_model_worker import app 89 | 90 | worker = AzureWorker( 91 | controller_addr="http://127.0.0.1:20001", 92 | worker_addr="http://127.0.0.1:21008", 93 | ) 94 | sys.modules["fastchat.serve.model_worker"].worker = worker 95 | MakeFastAPIOffline(app) 96 | uvicorn.run(app, port=21008) 97 | -------------------------------------------------------------------------------- /examples/model_workers/baichuan.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import hashlib 4 | 5 | from fastchat.conversation import Conversation 6 | from .base import * 7 | # from server.utils import get_httpx_client 8 | from fastchat import conversation as conv 9 | import sys, os 10 | import json 11 | from typing import List, Literal, Dict 12 | from loguru import logger 13 | # from configs import logger, log_verbose 14 | log_verbose = os.environ.get("log_verbose", False) 15 | 16 | def calculate_md5(input_string): 17 | md5 = hashlib.md5() 18 | md5.update(input_string.encode('utf-8')) 19 | encrypted = md5.hexdigest() 20 | return encrypted 21 | 22 | 23 | class BaiChuanWorker(ApiModelWorker): 24 | def __init__( 25 | self, 26 | *, 27 | controller_addr: str = None, 28 | worker_addr: str = None, 29 | model_names: List[str] = ["baichuan-api"], 30 | version: Literal["Baichuan2-53B"] = "Baichuan2-53B", 31 | **kwargs, 32 | ): 33 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 34 | kwargs.setdefault("context_len", 32768) 35 | super().__init__(**kwargs) 36 | self.version = version 37 | 38 | def do_chat(self, params: ApiChatParams) -> Dict: 39 | params.load_config(self.model_names[0]) 40 | 41 | url = "https://api.baichuan-ai.com/v1/stream/chat" 42 | data = { 43 | "model": params.version, 44 | "messages": params.messages, 45 | "parameters": {"temperature": params.temperature} 46 | } 47 | 48 | json_data = json.dumps(data) 49 | time_stamp = int(time.time()) 50 | signature = calculate_md5(params.secret_key + json_data + str(time_stamp)) 51 | headers = { 52 | "Content-Type": "application/json", 53 | "Authorization": "Bearer " + params.api_key, 54 | "X-BC-Request-Id": "your requestId", 55 | "X-BC-Timestamp": str(time_stamp), 56 | "X-BC-Signature": signature, 57 | "X-BC-Sign-Algo": "MD5", 58 | } 59 | 60 | text = "" 61 | if log_verbose: 62 | logger.info(f'{self.__class__.__name__}:json_data: {json_data}') 63 | logger.info(f'{self.__class__.__name__}:url: {url}') 64 | logger.info(f'{self.__class__.__name__}:headers: {headers}') 65 | 66 | with get_httpx_client() as client: 67 | with client.stream("POST", url, headers=headers, json=data) as response: 68 | for line in response.iter_lines(): 69 | if not line.strip(): 70 | continue 71 | resp = json.loads(line) 72 | if resp["code"] == 0: 73 | text += resp["data"]["messages"][-1]["content"] 74 | yield { 75 | "error_code": resp["code"], 76 | "text": text 77 | } 78 | else: 79 | data = { 80 | "error_code": resp["code"], 81 | "text": resp["msg"], 82 | "error": { 83 | "message": resp["msg"], 84 | "type": "invalid_request_error", 85 | "param": None, 86 | "code": None, 87 | } 88 | } 89 | self.logger.error(f"请求百川 API 时发生错误:{data}") 90 | yield data 91 | 92 | def get_embeddings(self, params): 93 | # TODO: 支持embeddings 94 | print("embedding") 95 | print(params) 96 | 97 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 98 | # TODO: 确认模板是否需要修改 99 | return conv.Conversation( 100 | name=self.model_names[0], 101 | system_message="", 102 | messages=[], 103 | roles=["user", "assistant"], 104 | sep="\n### ", 105 | stop_str="###", 106 | ) 107 | 108 | 109 | if __name__ == "__main__": 110 | import uvicorn 111 | from server.utils import MakeFastAPIOffline 112 | from fastchat.serve.model_worker import app 113 | 114 | worker = BaiChuanWorker( 115 | controller_addr="http://127.0.0.1:20001", 116 | worker_addr="http://127.0.0.1:21007", 117 | ) 118 | sys.modules["fastchat.serve.model_worker"].worker = worker 119 | MakeFastAPIOffline(app) 120 | uvicorn.run(app, port=21007) 121 | # do_request() 122 | -------------------------------------------------------------------------------- /examples/model_workers/fangzhou.py: -------------------------------------------------------------------------------- 1 | from fastchat.conversation import Conversation 2 | from .base import * 3 | from fastchat import conversation as conv 4 | import sys, os 5 | from typing import List, Literal, Dict 6 | from loguru import logger 7 | # from configs import logger, log_verbose 8 | log_verbose = os.environ.get("log_verbose", False) 9 | 10 | 11 | class FangZhouWorker(ApiModelWorker): 12 | """ 13 | 火山方舟 14 | """ 15 | 16 | def __init__( 17 | self, 18 | *, 19 | model_names: List[str] = ["fangzhou-api"], 20 | controller_addr: str = None, 21 | worker_addr: str = None, 22 | version: Literal["chatglm-6b-model"] = "chatglm-6b-model", 23 | **kwargs, 24 | ): 25 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 26 | kwargs.setdefault("context_len", 16384) # TODO: 不同的模型有不同的大小 27 | super().__init__(**kwargs) 28 | self.version = version 29 | 30 | def do_chat(self, params: ApiChatParams) -> Dict: 31 | from volcengine.maas import MaasService 32 | 33 | params.load_config(self.model_names[0]) 34 | maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing') 35 | maas.set_ak(params.api_key) 36 | maas.set_sk(params.secret_key) 37 | 38 | # document: "https://www.volcengine.com/docs/82379/1099475" 39 | req = { 40 | "model": { 41 | "name": params.version, 42 | }, 43 | "parameters": { 44 | # 这里的参数仅为示例,具体可用的参数请参考具体模型的 API 说明 45 | "max_new_tokens": params.max_tokens, 46 | "temperature": params.temperature, 47 | }, 48 | "messages": params.messages, 49 | } 50 | 51 | text = "" 52 | if log_verbose: 53 | self.logger.info(f'{self.__class__.__name__}:maas: {maas}') 54 | for resp in maas.stream_chat(req): 55 | if error := resp.error: 56 | if error.code_n > 0: 57 | data = { 58 | "error_code": error.code_n, 59 | "text": error.message, 60 | "error": { 61 | "message": error.message, 62 | "type": "invalid_request_error", 63 | "param": None, 64 | "code": None, 65 | } 66 | } 67 | self.logger.error(f"请求方舟 API 时发生错误:{data}") 68 | yield data 69 | elif chunk := resp.choice.message.content: 70 | text += chunk 71 | yield {"error_code": 0, "text": text} 72 | else: 73 | data = { 74 | "error_code": 500, 75 | "text": f"请求方舟 API 时发生未知的错误: {resp}" 76 | } 77 | self.logger.error(data) 78 | yield data 79 | break 80 | 81 | def get_embeddings(self, params): 82 | # TODO: 支持embeddings 83 | print("embedding") 84 | print(params) 85 | 86 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 87 | return conv.Conversation( 88 | name=self.model_names[0], 89 | system_message="你是一个聪明、对人类有帮助的人工智能,你可以对人类提出的问题给出有用、详细、礼貌的回答。", 90 | messages=[], 91 | roles=["user", "assistant", "system"], 92 | sep="\n### ", 93 | stop_str="###", 94 | ) 95 | 96 | 97 | if __name__ == "__main__": 98 | import uvicorn 99 | from server.utils import MakeFastAPIOffline 100 | from fastchat.serve.model_worker import app 101 | 102 | worker = FangZhouWorker( 103 | controller_addr="http://127.0.0.1:20001", 104 | worker_addr="http://127.0.0.1:21005", 105 | ) 106 | sys.modules["fastchat.serve.model_worker"].worker = worker 107 | MakeFastAPIOffline(app) 108 | uvicorn.run(app, port=21005) 109 | -------------------------------------------------------------------------------- /examples/model_workers/openai.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | from fastchat.conversation import Conversation 3 | from .base import * 4 | from fastchat import conversation as conv 5 | import json 6 | from typing import List, Dict 7 | from loguru import logger 8 | # from configs import logger, log_verbose 9 | log_verbose = os.environ.get("log_verbose", False) 10 | import openai 11 | 12 | from langchain import PromptTemplate, LLMChain 13 | from langchain.prompts.chat import ChatPromptTemplate 14 | from langchain.chat_models import ChatOpenAI 15 | from langchain.schema import HumanMessage 16 | 17 | 18 | class ExampleWorker(ApiModelWorker): 19 | def __init__( 20 | self, 21 | *, 22 | controller_addr: str = None, 23 | worker_addr: str = None, 24 | model_names: List[str] = ["gpt-3.5-turbo"], 25 | version: str = "gpt-3.5", 26 | **kwargs, 27 | ): 28 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 29 | kwargs.setdefault("context_len", 16384) #TODO 16K模型需要改成16384 30 | super().__init__(**kwargs) 31 | self.version = version 32 | 33 | def do_chat(self, params: ApiChatParams) -> Dict: 34 | ''' 35 | yield output: {"error_code": 0, "text": ""} 36 | ''' 37 | params.load_config(self.model_names[0]) 38 | openai.api_key = params.api_key 39 | openai.api_base = params.api_base_url 40 | 41 | logger.error(f"{params.api_key}, {params.api_base_url}, {params.messages} {params.max_tokens},") 42 | # just for example 43 | prompt = "\n".join([f"{m['role']}:{m['content']}" for m in params.messages]) 44 | logger.error(f"{prompt}, {params.temperature}, {params.max_tokens}") 45 | try: 46 | model = ChatOpenAI( 47 | streaming=True, 48 | verbose=True, 49 | openai_api_key= params.api_key, 50 | openai_api_base=params.api_base_url, 51 | model_name=params.version 52 | ) 53 | chat_prompt = ChatPromptTemplate.from_messages([("human", "{input}")]) 54 | chain = LLMChain(prompt=chat_prompt, llm=model) 55 | content = chain({"input": prompt}) 56 | logger.info(content) 57 | except Exception as e: 58 | logger.error(f"{e}") 59 | yield {"error_code": 500, "text": "request error"} 60 | 61 | # return the text by yield for stream 62 | try: 63 | yield {"error_code": 0, "text": content["text"]} 64 | except: 65 | yield {"error_code": 500, "text": "request error"} 66 | 67 | def get_embeddings(self, params): 68 | # TODO: 支持embeddings 69 | print("embedding") 70 | print(params) 71 | 72 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 73 | # TODO: 确认模板是否需要修改 74 | return conv.Conversation( 75 | name=self.model_names[0], 76 | system_message="You are a helpful, respectful and honest assistant.", 77 | messages=[], 78 | roles=["user", "assistant", "system"], 79 | sep="\n### ", 80 | stop_str="###", 81 | ) 82 | 83 | 84 | if __name__ == "__main__": 85 | import uvicorn 86 | from coagent.utils.server_utils import MakeFastAPIOffline 87 | from fastchat.serve.base_model_worker import app 88 | 89 | worker = ExampleWorker( 90 | controller_addr="http://127.0.0.1:20001", 91 | worker_addr="http://127.0.0.1:21008", 92 | ) 93 | sys.modules["fastchat.serve.model_worker"].worker = worker 94 | uvicorn.run(app, port=21008) 95 | -------------------------------------------------------------------------------- /examples/model_workers/qwen.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import os 4 | from fastchat.conversation import Conversation 5 | from http import HTTPStatus 6 | from typing import List, Literal, Dict 7 | 8 | from fastchat import conversation as conv 9 | from .base import * 10 | from loguru import logger 11 | # from configs import logger, log_verbose 12 | log_verbose = os.environ.get("log_verbose", False) 13 | 14 | 15 | class QwenWorker(ApiModelWorker): 16 | DEFAULT_EMBED_MODEL = "text-embedding-v1" 17 | 18 | def __init__( 19 | self, 20 | *, 21 | version: Literal["qwen-turbo", "qwen-plus"] = "qwen-turbo", 22 | model_names: List[str] = ["qwen-api"], 23 | controller_addr: str = None, 24 | worker_addr: str = None, 25 | **kwargs, 26 | ): 27 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 28 | kwargs.setdefault("context_len", 16384) 29 | super().__init__(**kwargs) 30 | self.version = version 31 | 32 | def do_chat(self, params: ApiChatParams) -> Dict: 33 | import dashscope 34 | params.load_config(self.model_names[0]) 35 | if log_verbose: 36 | logger.info(f'{self.__class__.__name__}:params: {params}') 37 | 38 | gen = dashscope.Generation() 39 | responses = gen.call( 40 | model=params.version, 41 | temperature=params.temperature, 42 | api_key=params.api_key, 43 | messages=params.messages, 44 | result_format='message', # set the result is message format. 45 | stream=True, 46 | ) 47 | 48 | for resp in responses: 49 | if resp["status_code"] == 200: 50 | if choices := resp["output"]["choices"]: 51 | yield { 52 | "error_code": 0, 53 | "text": choices[0]["message"]["content"], 54 | } 55 | else: 56 | data = { 57 | "error_code": resp["status_code"], 58 | "text": resp["message"], 59 | "error": { 60 | "message": resp["message"], 61 | "type": "invalid_request_error", 62 | "param": None, 63 | "code": None, 64 | } 65 | } 66 | self.logger.error(f"请求千问 API 时发生错误:{data}") 67 | yield data 68 | 69 | def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: 70 | import dashscope 71 | params.load_config(self.model_names[0]) 72 | if log_verbose: 73 | logger.info(f'{self.__class__.__name__}:params: {params}') 74 | result = [] 75 | i = 0 76 | while i < len(params.texts): 77 | texts = params.texts[i:i+25] 78 | resp = dashscope.TextEmbedding.call( 79 | model=params.embed_model or self.DEFAULT_EMBED_MODEL, 80 | input=texts, # 最大25行 81 | api_key=params.api_key, 82 | ) 83 | if resp["status_code"] != 200: 84 | data = { 85 | "code": resp["status_code"], 86 | "msg": resp.message, 87 | "error": { 88 | "message": resp["message"], 89 | "type": "invalid_request_error", 90 | "param": None, 91 | "code": None, 92 | } 93 | } 94 | self.logger.error(f"请求千问 API 时发生错误:{data}") 95 | return data 96 | else: 97 | embeddings = [x["embedding"] for x in resp["output"]["embeddings"]] 98 | result += embeddings 99 | i += 25 100 | return {"code": 200, "data": result} 101 | 102 | def get_embeddings(self, params): 103 | # TODO: 支持embeddings 104 | print("embedding") 105 | print(params) 106 | 107 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 108 | # TODO: 确认模板是否需要修改 109 | return conv.Conversation( 110 | name=self.model_names[0], 111 | system_message="你是一个聪明、对人类有帮助的人工智能,你可以对人类提出的问题给出有用、详细、礼貌的回答。", 112 | messages=[], 113 | roles=["user", "assistant", "system"], 114 | sep="\n### ", 115 | stop_str="###", 116 | ) 117 | 118 | 119 | if __name__ == "__main__": 120 | import uvicorn 121 | from server.utils import MakeFastAPIOffline 122 | from fastchat.serve.model_worker import app 123 | 124 | worker = QwenWorker( 125 | controller_addr="http://127.0.0.1:20001", 126 | worker_addr="http://127.0.0.1:20007", 127 | ) 128 | sys.modules["fastchat.serve.model_worker"].worker = worker 129 | MakeFastAPIOffline(app) 130 | uvicorn.run(app, port=20007) 131 | -------------------------------------------------------------------------------- /examples/model_workers/tiangong.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import hashlib 4 | 5 | from fastchat.conversation import Conversation 6 | from .base import * 7 | from fastchat import conversation as conv 8 | import json 9 | from typing import List, Literal, Dict 10 | import requests 11 | 12 | 13 | 14 | class TianGongWorker(ApiModelWorker): 15 | def __init__( 16 | self, 17 | *, 18 | controller_addr: str = None, 19 | worker_addr: str = None, 20 | model_names: List[str] = ["tiangong-api"], 21 | version: Literal["SkyChat-MegaVerse"] = "SkyChat-MegaVerse", 22 | **kwargs, 23 | ): 24 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 25 | kwargs.setdefault("context_len", 32768) 26 | super().__init__(**kwargs) 27 | self.version = version 28 | 29 | def do_chat(self, params: ApiChatParams) -> Dict: 30 | params.load_config(self.model_names[0]) 31 | 32 | url = 'https://sky-api.singularity-ai.com/saas/api/v4/generate' 33 | data = { 34 | "messages": params.messages, 35 | "model": "SkyChat-MegaVerse" 36 | } 37 | timestamp = str(int(time.time())) 38 | sign_content = params.api_key + params.secret_key + timestamp 39 | sign_result = hashlib.md5(sign_content.encode('utf-8')).hexdigest() 40 | headers={ 41 | "app_key": params.api_key, 42 | "timestamp": timestamp, 43 | "sign": sign_result, 44 | "Content-Type": "application/json", 45 | "stream": "true" # or change to "false" 不处理流式返回内容 46 | } 47 | 48 | # 发起请求并获取响应 49 | response = requests.post(url, headers=headers, json=data, stream=True) 50 | 51 | text = "" 52 | # 处理响应流 53 | for line in response.iter_lines(chunk_size=None, decode_unicode=True): 54 | if line: 55 | # 处理接收到的数据 56 | # print(line.decode('utf-8')) 57 | resp = json.loads(line) 58 | if resp["code"] == 200: 59 | text += resp['resp_data']['reply'] 60 | yield { 61 | "error_code": 0, 62 | "text": text 63 | } 64 | else: 65 | data = { 66 | "error_code": resp["code"], 67 | "text": resp["code_msg"] 68 | } 69 | self.logger.error(f"请求天工 API 时出错:{data}") 70 | yield data 71 | 72 | def get_embeddings(self, params): 73 | # TODO: 支持embeddings 74 | print("embedding") 75 | print(params) 76 | 77 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 78 | # TODO: 确认模板是否需要修改 79 | return conv.Conversation( 80 | name=self.model_names[0], 81 | system_message="", 82 | messages=[], 83 | roles=["user", "system"], 84 | sep="\n### ", 85 | stop_str="###", 86 | ) 87 | 88 | 89 | -------------------------------------------------------------------------------- /examples/model_workers/xinghuo.py: -------------------------------------------------------------------------------- 1 | from fastchat.conversation import Conversation 2 | from .base import * 3 | from fastchat import conversation as conv 4 | import sys 5 | import json 6 | from model_workers import SparkApi 7 | import websockets 8 | from muagent.utils.server_utils import run_async, iter_over_async 9 | from typing import List, Dict 10 | import asyncio 11 | 12 | 13 | 14 | async def request(appid, api_key, api_secret, Spark_url, domain, question, temperature, max_token): 15 | wsParam = SparkApi.Ws_Param(appid, api_key, api_secret, Spark_url) 16 | wsUrl = wsParam.create_url() 17 | data = SparkApi.gen_params(appid, domain, question, temperature, max_token) 18 | print(data) 19 | async with websockets.connect(wsUrl) as ws: 20 | await ws.send(json.dumps(data, ensure_ascii=False)) 21 | finish = False 22 | while not finish: 23 | chunk = await ws.recv() 24 | response = json.loads(chunk) 25 | if response.get("header", {}).get("status") == 2: 26 | finish = True 27 | if text := response.get("payload", {}).get("choices", {}).get("text"): 28 | yield text[0]["content"] 29 | 30 | 31 | class XingHuoWorker(ApiModelWorker): 32 | def __init__( 33 | self, 34 | *, 35 | model_names: List[str] = ["xinghuo-api"], 36 | controller_addr: str = None, 37 | worker_addr: str = None, 38 | version: str = None, 39 | **kwargs, 40 | ): 41 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 42 | kwargs.setdefault("context_len", 8000) # TODO: V1模型的最大长度为4000,需要自行修改 43 | super().__init__(**kwargs) 44 | self.version = version 45 | 46 | def do_chat(self, params: ApiChatParams) -> Dict: 47 | # TODO: 当前每次对话都要重新连接websocket,确认是否可以保持连接 48 | params.load_config(self.model_names[0]) 49 | 50 | version_mapping = { 51 | "v1.5": {"domain": "general", "url": "ws://spark-api.xf-yun.com/v1.1/chat","max_tokens": 4000}, 52 | "v2.0": {"domain": "generalv2", "url": "ws://spark-api.xf-yun.com/v2.1/chat","max_tokens": 8000}, 53 | "v3.0": {"domain": "generalv3", "url": "ws://spark-api.xf-yun.com/v3.1/chat","max_tokens": 8000}, 54 | } 55 | 56 | def get_version_details(version_key): 57 | return version_mapping.get(version_key, {"domain": None, "url": None}) 58 | 59 | details = get_version_details(params.version) 60 | domain = details["domain"] 61 | Spark_url = details["url"] 62 | text = "" 63 | try: 64 | loop = asyncio.get_event_loop() 65 | except: 66 | loop = asyncio.new_event_loop() 67 | params.max_tokens = min(details["max_tokens"], params.max_tokens or 0) 68 | for chunk in iter_over_async( 69 | request(params.APPID, params.api_key, params.APISecret, Spark_url, domain, params.messages, 70 | params.temperature, params.max_tokens), 71 | loop=loop, 72 | ): 73 | if chunk: 74 | text += chunk 75 | yield {"error_code": 0, "text": text} 76 | 77 | def get_embeddings(self, params): 78 | # TODO: 支持embeddings 79 | print("embedding") 80 | print(params) 81 | 82 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 83 | # TODO: 确认模板是否需要修改 84 | return conv.Conversation( 85 | name=self.model_names[0], 86 | system_message="你是一个聪明的助手,请根据用户的提示来完成任务", 87 | messages=[], 88 | roles=["user", "assistant"], 89 | sep="\n### ", 90 | stop_str="###", 91 | ) 92 | 93 | 94 | if __name__ == "__main__": 95 | import uvicorn 96 | from server.utils import MakeFastAPIOffline 97 | from fastchat.serve.model_worker import app 98 | 99 | worker = XingHuoWorker( 100 | controller_addr="http://127.0.0.1:20001", 101 | worker_addr="http://127.0.0.1:21003", 102 | ) 103 | sys.modules["fastchat.serve.model_worker"].worker = worker 104 | MakeFastAPIOffline(app) 105 | uvicorn.run(app, port=21003) 106 | -------------------------------------------------------------------------------- /examples/model_workers/zhipu.py: -------------------------------------------------------------------------------- 1 | from fastchat.conversation import Conversation 2 | import os 3 | from .base import * 4 | from fastchat import conversation as conv 5 | import sys 6 | from typing import List, Dict, Iterator, Literal 7 | from loguru import logger 8 | # from configs import logger, log_verbose 9 | log_verbose = os.environ.get("log_verbose", False) 10 | 11 | class ChatGLMWorker(ApiModelWorker): 12 | DEFAULT_EMBED_MODEL = "text_embedding" 13 | 14 | def __init__( 15 | self, 16 | *, 17 | model_names: List[str] = ["zhipu-api"], 18 | controller_addr: str = None, 19 | worker_addr: str = None, 20 | version: Literal["chatglm_turbo"] = "chatglm_turbo", 21 | **kwargs, 22 | ): 23 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 24 | kwargs.setdefault("context_len", 32768) 25 | super().__init__(**kwargs) 26 | self.version = version 27 | 28 | def do_chat(self, params: ApiChatParams) -> Iterator[Dict]: 29 | # TODO: 维护request_id 30 | import zhipuai 31 | 32 | params.load_config(self.model_names[0]) 33 | zhipuai.api_key = params.api_key 34 | 35 | if log_verbose: 36 | logger.info(f'{self.__class__.__name__}:params: {params}') 37 | 38 | response = zhipuai.model_api.sse_invoke( 39 | model=params.version, 40 | prompt=params.messages, 41 | temperature=params.temperature, 42 | top_p=params.top_p, 43 | incremental=False, 44 | ) 45 | for e in response.events(): 46 | if e.event == "add": 47 | yield {"error_code": 0, "text": e.data} 48 | elif e.event in ["error", "interrupted"]: 49 | data = { 50 | "error_code": 500, 51 | "text": str(e), 52 | "error": { 53 | "message": str(e), 54 | "type": "invalid_request_error", 55 | "param": None, 56 | "code": None, 57 | } 58 | } 59 | self.logger.error(f"请求智谱 API 时发生错误:{data}") 60 | yield data 61 | 62 | def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict: 63 | import zhipuai 64 | 65 | params.load_config(self.model_names[0]) 66 | zhipuai.api_key = params.api_key 67 | 68 | embeddings = [] 69 | try: 70 | for t in params.texts: 71 | response = zhipuai.model_api.invoke(model=params.embed_model or self.DEFAULT_EMBED_MODEL, prompt=t) 72 | if response["code"] == 200: 73 | embeddings.append(response["data"]["embedding"]) 74 | else: 75 | self.logger.error(f"请求智谱 API 时发生错误:{response}") 76 | return response # dict with code & msg 77 | except Exception as e: 78 | self.logger.error(f"请求智谱 API 时发生错误:{data}") 79 | data = {"code": 500, "msg": f"对文本向量化时出错:{e}"} 80 | return data 81 | 82 | return {"code": 200, "data": embeddings} 83 | 84 | def get_embeddings(self, params): 85 | # TODO: 支持embeddings 86 | print("embedding") 87 | # print(params) 88 | 89 | def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation: 90 | # 这里的是chatglm api的模板,其它API的conv_template需要定制 91 | return conv.Conversation( 92 | name=self.model_names[0], 93 | system_message="你是一个聪明的助手,请根据用户的提示来完成任务", 94 | messages=[], 95 | roles=["Human", "Assistant", "System"], 96 | sep="\n###", 97 | stop_str="###", 98 | ) 99 | 100 | 101 | if __name__ == "__main__": 102 | import uvicorn 103 | from server.utils import MakeFastAPIOffline 104 | from fastchat.serve.model_worker import app 105 | 106 | worker = ChatGLMWorker( 107 | controller_addr="http://127.0.0.1:20001", 108 | worker_addr="http://127.0.0.1:21001", 109 | ) 110 | sys.modules["fastchat.serve.model_worker"].worker = worker 111 | MakeFastAPIOffline(app) 112 | uvicorn.run(app, port=21001) 113 | -------------------------------------------------------------------------------- /examples/sdfile_api.py: -------------------------------------------------------------------------------- 1 | import sys, os, json, traceback, uvicorn, argparse 2 | 3 | src_dir = os.path.join( 4 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 5 | ) 6 | sys.path.append(src_dir) 7 | 8 | from loguru import logger 9 | 10 | from fastapi import FastAPI 11 | from fastapi.middleware.cors import CORSMiddleware 12 | from fastapi import File, UploadFile 13 | 14 | from muagent.utils.server_utils import BaseResponse, ListResponse, DataResponse 15 | from configs.server_config import OPEN_CROSS_DOMAIN, SDFILE_API_SERVER 16 | from configs.model_config import JUPYTER_WORK_PATH 17 | 18 | 19 | VERSION = "v0.1.0" 20 | 21 | async def sd_upload_file(file: UploadFile = File(...), work_dir: str = JUPYTER_WORK_PATH): 22 | # 保存上传的文件到服务器 23 | try: 24 | content = await file.read() 25 | with open(os.path.join(work_dir, file.filename), "wb") as f: 26 | f.write(content) 27 | return {"data": True} 28 | except: 29 | return {"data": False} 30 | 31 | 32 | async def sd_download_file(filename: str, save_filename: str = "filename_to_download.ext", work_dir: str = JUPYTER_WORK_PATH): 33 | # 从服务器下载文件 34 | logger.debug(f"{os.path.join(work_dir, filename)}") 35 | return {"data": os.path.join(work_dir, filename), "filename": save_filename} 36 | # return {"data": FileResponse(os.path.join(work_dir, filename), filename=save_filename)} 37 | 38 | 39 | async def sd_list_files(work_dir: str = JUPYTER_WORK_PATH): 40 | # 去除目录 41 | return {"data": os.listdir(work_dir)} 42 | 43 | 44 | async def sd_delete_file(filename: str, work_dir: str = JUPYTER_WORK_PATH): 45 | # 去除目录 46 | try: 47 | os.remove(os.path.join(work_dir, filename)) 48 | return {"data": True} 49 | except: 50 | return {"data": False} 51 | 52 | 53 | def create_app(open_cross_domain, version=VERSION): 54 | app = FastAPI( 55 | title="DevOps-ChatBot API Server", 56 | version=version 57 | ) 58 | # MakeFastAPIOffline(app) 59 | # Add CORS middleware to allow all origins 60 | # 在config.py中设置OPEN_DOMAIN=True,允许跨域 61 | # set OPEN_DOMAIN=True in config.py to allow cross-domain 62 | if open_cross_domain: 63 | # if OPEN_CROSS_DOMAIN: 64 | app.add_middleware( 65 | CORSMiddleware, 66 | allow_origins=["*"], 67 | allow_credentials=True, 68 | allow_methods=["*"], 69 | allow_headers=["*"], 70 | ) 71 | 72 | app.post("/sdfiles/upload", 73 | tags=["files upload and download"], 74 | response_model=BaseResponse, 75 | summary="上传文件到沙盒" 76 | )(sd_upload_file) 77 | 78 | app.get("/sdfiles/download", 79 | tags=["files upload and download"], 80 | response_model=DataResponse, 81 | summary="从沙盒下载文件" 82 | )(sd_download_file) 83 | 84 | app.get("/sdfiles/list", 85 | tags=["files upload and download"], 86 | response_model=ListResponse, 87 | summary="从沙盒工作目录展示文件" 88 | )(sd_list_files) 89 | 90 | app.get("/sdfiles/delete", 91 | tags=["files upload and download"], 92 | response_model=BaseResponse, 93 | summary="从沙盒工作目录中删除文件" 94 | )(sd_delete_file) 95 | return app 96 | 97 | 98 | 99 | def run_api(host, port, open_cross_domain, **kwargs): 100 | app = create_app(open_cross_domain) 101 | if kwargs.get("ssl_keyfile") and kwargs.get("ssl_certfile"): 102 | uvicorn.run(app, 103 | host=host, 104 | port=port, 105 | ssl_keyfile=kwargs.get("ssl_keyfile"), 106 | ssl_certfile=kwargs.get("ssl_certfile"), 107 | ) 108 | else: 109 | uvicorn.run(app, host=host, port=port) 110 | 111 | 112 | if __name__ == "__main__": 113 | parser = argparse.ArgumentParser(prog='DevOps-ChatBot', 114 | description='About DevOps-ChatBot, local knowledge based LLM with langchain' 115 | ' | 基于本地知识库的 LLM 问答') 116 | parser.add_argument("--host", type=str, default="0.0.0.0") 117 | parser.add_argument("--port", type=int, default="7862") 118 | # parser.add_argument("--port", type=int, default=SDFILE_API_SERVER["port"]) 119 | parser.add_argument("--open_cross_domain", type=bool, default=False) 120 | parser.add_argument("--ssl_keyfile", type=str) 121 | parser.add_argument("--ssl_certfile", type=str) 122 | # 初始化消息 123 | args = parser.parse_args() 124 | args_dict = vars(args) 125 | run_api(host=args.host, 126 | port=args.port, 127 | open_cross_domain=args.open_cross_domain, 128 | ssl_keyfile=args.ssl_keyfile, 129 | ssl_certfile=args.ssl_certfile, 130 | ) -------------------------------------------------------------------------------- /examples/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | cp ../configs/model_config.py.example ../configs/model_config.py 5 | cp ../configs/server_config.py.example ../configs/server_config.py 6 | 7 | streamlit run webui_config.py --server.port 8510 8 | -------------------------------------------------------------------------------- /examples/stop.py: -------------------------------------------------------------------------------- 1 | import docker, sys, os 2 | from loguru import logger 3 | 4 | src_dir = os.path.join( 5 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 6 | ) 7 | sys.path.append(src_dir) 8 | 9 | from configs.server_config import ( 10 | SANDBOX_CONTRAINER_NAME, CONTRAINER_NAME, SANDBOX_SERVER, DOCKER_SERVICE 11 | ) 12 | 13 | from start import check_docker, check_process 14 | 15 | try: 16 | client = docker.from_env() 17 | except: 18 | client = None 19 | 20 | 21 | def stop_main(): 22 | # 23 | check_docker(client, SANDBOX_CONTRAINER_NAME, do_stop=True, ) 24 | check_process(f"port={SANDBOX_SERVER['port']}", do_stop=True) 25 | check_process(f"port=5050", do_stop=True) 26 | 27 | # 28 | check_docker(client, CONTRAINER_NAME, do_stop=True, ) 29 | check_process("api.py", do_stop=True) 30 | check_process("sdfile_api.py", do_stop=True) 31 | check_process("llm_api.py", do_stop=True) 32 | check_process("webui.py", do_stop=True) 33 | 34 | 35 | if __name__ == "__main__": 36 | stop_main() -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from configs.model_config import ONLINE_LLM_MODEL 4 | from configs.server_config import FSCHAT_MODEL_WORKERS 5 | from configs.model_config import llm_model_dict, LLM_DEVICE 6 | 7 | from loguru import logger 8 | 9 | 10 | 11 | def get_model_worker_config( 12 | model_name: str = None, 13 | fastchat_mdoel_workers: dict = FSCHAT_MODEL_WORKERS, 14 | online_llm_model: dict = ONLINE_LLM_MODEL, 15 | llm_model_dict: dict = llm_model_dict, 16 | llm_device: str = LLM_DEVICE 17 | ) -> dict: 18 | ''' 19 | 加载model worker的配置项。 20 | 优先级:FSCHAT_MODEL_WORKERS[model_name] > ONLINE_LLM_MODEL[model_name] > FSCHAT_MODEL_WORKERS["default"] 21 | ''' 22 | import model_workers 23 | 24 | config = fastchat_mdoel_workers.get("default", {}).copy() 25 | config.update(online_llm_model.get(model_name, {}).copy()) 26 | config.update(fastchat_mdoel_workers.get(model_name, {}).copy()) 27 | 28 | if model_name in online_llm_model: 29 | config["online_api"] = True 30 | if provider := config.get("provider"): 31 | try: 32 | config["worker_class"] = getattr(model_workers, provider) 33 | except Exception as e: 34 | msg = f"在线模型 ‘{model_name}’ 的provider没有正确配置" 35 | logger.error(f'{e.__class__.__name__}: {msg}') 36 | # 本地模型 37 | if model_name in llm_model_dict: 38 | path = llm_model_dict[model_name]["local_model_path"] 39 | config["model_path"] = path 40 | if path and os.path.isdir(path): 41 | config["model_path_exists"] = True 42 | config["device"] = llm_device 43 | 44 | # logger.debug(f"config: {config}") 45 | return config -------------------------------------------------------------------------------- /examples/webui.py: -------------------------------------------------------------------------------- 1 | # 运行方式: 2 | # 1. 安装必要的包:pip install streamlit-option-menu streamlit-chatbox>=1.1.6 3 | # 2. 运行本机fastchat服务:python server\llm_api.py 或者 运行对应的sh文件 4 | # 3. 运行API服务器:python server/api.py。如果使用api = ApiRequest(no_remote_api=True),该步可以跳过。 5 | # 4. 运行WEB UI:streamlit run webui.py --server.port 7860 6 | from loguru import logger 7 | import os 8 | import sys 9 | import streamlit as st 10 | from streamlit_option_menu import option_menu 11 | 12 | import multiprocessing 13 | 14 | src_dir = os.path.join( 15 | os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 16 | ) 17 | sys.path.append(src_dir) 18 | 19 | from webui import * 20 | from configs.model_config import VERSION, LLM_MODEL 21 | from configs.server_config import NO_REMOTE_API 22 | from configs.model_config import CB_ROOT_PATH 23 | 24 | from configs.model_config import embedding_model_dict, kbs_config, EMBEDDING_MODEL, DEFAULT_VS_TYPE, WEB_CRAWL_PATH 25 | 26 | 27 | api = ApiRequest(base_url="http://127.0.0.1:7861", no_remote_api=NO_REMOTE_API, cb_root_path=CB_ROOT_PATH) 28 | 29 | 30 | if __name__ == "__main__": 31 | st.set_page_config( 32 | "CodeFuse-ChatBot WebUI", 33 | os.path.join("../sources/imgs", "devops-chatbot.png"), 34 | initial_sidebar_state="expanded", 35 | menu_items={ 36 | 'Get Help': 'https://github.com/codefuse-ai/codefuse-chatbot', 37 | 'Report a bug': "https://github.com/codefuse-ai/codefuse-chatbot/issues", 38 | 'About': f"""欢迎使用 CodeFuse-ChatBot WebUI {VERSION}!""" 39 | } 40 | ) 41 | 42 | if not chat_box.chat_inited: 43 | st.toast( 44 | f"欢迎使用 [`CodeFuse-ChatBot`](https://github.com/codefuse-ai/codefuse-chatbot) ! \n\n" 45 | f"当前使用模型`{LLM_MODEL}`, 您可以开始提问了." 46 | ) 47 | 48 | pages = { 49 | "对话": { 50 | "icon": "chat", 51 | "func": dialogue_page, 52 | }, 53 | "知识库管理": { 54 | "icon": "hdd-stack", 55 | "func": knowledge_page, 56 | }, 57 | "代码知识库管理": { 58 | "icon": "hdd-stack", 59 | "func": code_page, 60 | }, 61 | # "Prompt管理": { 62 | # "icon": "hdd-stack", 63 | # "func": prompt_page, 64 | # }, 65 | } 66 | 67 | with st.sidebar: 68 | st.image( 69 | os.path.join( 70 | "../sources/imgs", 71 | "devops-chatbot.png" 72 | ), 73 | use_column_width=True 74 | ) 75 | st.caption( 76 | f"""

CodeFuse-ChatBot 当前版本:{VERSION}

""", 77 | unsafe_allow_html=True, 78 | ) 79 | options = list(pages) 80 | icons = [x["icon"] for x in pages.values()] 81 | 82 | default_index = 0 83 | selected_page = option_menu( 84 | "", 85 | options=options, 86 | icons=icons, 87 | # menu_icon="chat-quote", 88 | default_index=default_index, 89 | ) 90 | 91 | if selected_page in pages: 92 | pages[selected_page]["func"](api) 93 | # pages["对话"]["func"](api, ) 94 | # pages["知识库管理"]["func"](api, embedding_model_dict, kbs_config, EMBEDDING_MODEL, DEFAULT_VS_TYPE, WEB_CRAWL_PATH) 95 | # pages["代码知识库管理"]["func"](api, ) 96 | -------------------------------------------------------------------------------- /examples/webui/__init__.py: -------------------------------------------------------------------------------- 1 | from .dialogue import dialogue_page, chat_box 2 | from .document import knowledge_page 3 | from .code import code_page 4 | from .prompt import prompt_page 5 | from .utils import ApiRequest 6 | 7 | __all__ = [ 8 | "dialogue_page", "chat_box", "prompt_page", "knowledge_page", 9 | "ApiRequest", "code_page" 10 | ] -------------------------------------------------------------------------------- /examples/webui/prompt.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | import time 4 | from datetime import datetime 5 | import traceback 6 | from typing import Literal, Dict, Tuple 7 | from st_aggrid import AgGrid, JsCode 8 | from st_aggrid.grid_options_builder import GridOptionsBuilder 9 | import pandas as pd 10 | 11 | from .utils import * 12 | from muagent.utils.path_utils import * 13 | from muagent.service.service_factory import get_kb_details, get_kb_doc_details 14 | from muagent.orm import table_init 15 | 16 | 17 | 18 | def prompt_page(api: ApiRequest): 19 | # 判断表是否存在并进行初始化 20 | table_init() 21 | 22 | now = datetime.now() 23 | with st.sidebar: 24 | 25 | cols = st.columns(2) 26 | export_btn = cols[0] 27 | if cols[1].button( 28 | "清空prompt", 29 | use_container_width=True, 30 | ): 31 | st.experimental_rerun() 32 | 33 | export_btn.download_button( 34 | "导出记录", 35 | "测试prompt", 36 | file_name=f"{now:%Y-%m-%d %H.%M}_对话记录.md", 37 | mime="text/markdown", 38 | use_container_width=True, 39 | ) 40 | -------------------------------------------------------------------------------- /examples/webui/yamls/webui_en.yaml: -------------------------------------------------------------------------------- 1 | # This is an example of webui 2 | dialogue: 3 | mode_instruction: 请选择对话模式 4 | mode: 5 | - LLM Conversation 6 | - Knowledge Base Q&A 7 | - Code Knowledge Base Q&A 8 | - Search Engine Q&A 9 | - Agents Q&A 10 | history_length: History of Dialogue Turns 11 | text_mode_swtich: Switched to mode 12 | text_knowledgeBase_swtich: Current Knowledge Base" 13 | text_loaded_kbase: Loaded Knowledge Base 14 | text_loaded_cbase: Loaded Code Knowledge Base 15 | # Knowledge Base Q&A 16 | kbase_expander_name: 知识库配置 17 | kbase_selectbox_name: 请选择知识库: 18 | kbase_ninput_topk_name: 匹配知识条数: 19 | kbase_ninput_score_threshold_name: 知识匹配分数阈值: 20 | # Code Knowledge Base Q&A 21 | cbase_expander_name: 代码知识库配置 22 | cbase_selectbox_name: 请选择代码知识库: 23 | cbase_ninput_topk_name: 匹配代码条数: 24 | cbase_selectbox_type_name: 请选择查询模式: 25 | cbase_search_type_v1: 26 | - 基于 cypher 27 | - 基于标签 28 | - 基于描述 29 | cbase_search_type_v2: 30 | - 基于 cypher 31 | - 基于标签 32 | 33 | # Search Engine Q&A 34 | expander_search_name: 搜索引擎配置 35 | selectbox_search_name: 请选择搜索引擎 36 | ninput_search_topk_name: 匹配搜索结果条数: 37 | # Agents Q&A 38 | phase_expander_name: Phase管理 39 | phase_selectbox_name: 请选择待使用的执行链路 40 | phase_toggle_detailed_name: 是否使用明细信息进行agent交互 41 | phase_toggle_doToolUsing: 开启工具使用 42 | phase_multiselect_tools: 请选择待使用的工具 43 | phase_toggle_doSearch: 开启搜索增强 44 | phase_toggle_doDocRetrieval: 开启知识库检索增强 45 | phase_toggle_doCodeRetrieval: 开启代码检索增强 46 | 47 | sandbox: 48 | expander_name: 沙盒文件管理 49 | file_upload_name: 上传沙盒文件 50 | selectbox_name: 选择要处理文件 51 | button_upload_name: 点击上传 52 | button_download_name: 点击下载 53 | button_delete_name: 点击删除 54 | toggle_doCodeInterpreter: 开启代码解释器 55 | toggle_doAutoCodeExec: 自动执行代码 56 | 57 | expander_code_name: 代码编辑执行器 58 | textArea_code_name: 代码片段 59 | button_modify_code_name: 修改对话 60 | text_modify_code: 修改对话成功 61 | button_exec_code_name: 执行代码 62 | text_execing_code: 正在执行代码 63 | text_error_exec_code: code 不能为空 64 | 65 | 66 | chat: 67 | chat_placeholder: 请输入对话内容,换行请使用Ctrl+Enter 68 | chatbox_saying: 正在思考... 69 | chatbox_doc_querying: 正在查询知识库 70 | chatbox_code_querying: 正在查询代码知识库 71 | chatbox_searching: 正在执行搜索 72 | chatbox_search_result: 网络搜索结果 73 | chatbox_doc_result: 知识库匹配结果 74 | chatbox_code_result: 代码库匹配节点 75 | 76 | export: 77 | button_clear_conversation_name: 清空对话 78 | download_button_export_name: 导出记录 -------------------------------------------------------------------------------- /examples/webui/yamls/webui_zh.yaml: -------------------------------------------------------------------------------- 1 | # This is an example of webui 2 | dialogue: 3 | mode_instruction: 请选择对话模式 4 | mode: 5 | - LLM 对话 6 | - 知识库问答 7 | - 代码知识库问答 8 | - 搜索引擎问答 9 | - Agent问答 10 | history_length: 历史对话轮数 11 | text_mode_swtich: 已切换到模式 12 | text_knowledgeBase_swtich: 当前知识库 13 | text_loaded_kbase: 已加载知识库 14 | text_loaded_cbase: 已加载代码知识库 15 | # Knowledge Base Q&A 16 | kbase_expander_name: 知识库配置 17 | kbase_selectbox_name: 请选择知识库: 18 | kbase_ninput_topk_name: 匹配知识条数: 19 | kbase_ninput_score_threshold_name: 知识匹配分数阈值: 20 | # Code Knowledge Base Q&A 21 | cbase_expander_name: 代码知识库配置 22 | cbase_selectbox_name: 请选择代码知识库: 23 | cbase_ninput_topk_name: 匹配代码条数: 24 | cbase_selectbox_type_name: 请选择查询模式: 25 | cbase_search_type_v1: 26 | - 基于 cypher 27 | - 基于标签 28 | - 基于描述 29 | cbase_search_type_v2: 30 | - 基于 cypher 31 | - 基于标签 32 | 33 | # Search Engine Q&A 34 | expander_search_name: 搜索引擎配置 35 | selectbox_search_name: 请选择搜索引擎 36 | ninput_search_topk_name: 匹配搜索结果条数: 37 | # Agents Q&A 38 | phase_expander_name: Phase管理 39 | phase_selectbox_name: 请选择待使用的执行链路 40 | phase_toggle_detailed_name: 是否使用明细信息进行agent交互 41 | phase_toggle_doToolUsing: 开启工具使用 42 | phase_multiselect_tools: 请选择待使用的工具 43 | phase_toggle_doSearch: 开启搜索增强 44 | phase_toggle_doDocRetrieval: 开启知识库检索增强 45 | phase_toggle_doCodeRetrieval: 开启代码检索增强 46 | 47 | sandbox: 48 | expander_name: 沙盒文件管理 49 | file_upload_name: 上传沙盒文件 50 | selectbox_name: 选择要处理文件 51 | button_upload_name: 点击上传 52 | button_download_name: 点击下载 53 | button_delete_name: 点击删除 54 | toggle_doCodeInterpreter: 开启代码解释器 55 | toggle_doAutoCodeExec: 自动执行代码 56 | 57 | expander_code_name: 代码编辑执行器 58 | textArea_code_name: 代码片段 59 | button_modify_code_name: 修改对话 60 | text_modify_code: 修改对话成功 61 | button_exec_code_name: 执行代码 62 | text_execing_code: 正在执行代码 63 | text_error_exec_code: code 不能为空 64 | 65 | 66 | chat: 67 | chat_placeholder: 请输入对话内容,换行请使用Ctrl+Enter 68 | chatbox_saying: 正在思考... 69 | chatbox_doc_querying: 正在查询知识库 70 | chatbox_code_querying: 正在查询代码知识库 71 | chatbox_searching: 正在执行搜索 72 | chatbox_search_result: 网络搜索结果 73 | chatbox_doc_result: 知识库匹配结果 74 | chatbox_code_result: 代码库匹配节点 75 | 76 | export: 77 | button_clear_conversation_name: 清空对话 78 | download_button_export_name: 导出记录 -------------------------------------------------------------------------------- /nltk_data/corpora/cmudict/README: -------------------------------------------------------------------------------- 1 | The Carnegie Mellon Pronouncing Dictionary [cmudict.0.7a] 2 | 3 | ftp://ftp.cs.cmu.edu/project/speech/dict/ 4 | https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a 5 | 6 | Copyright (C) 1993-2008 Carnegie Mellon University. All rights reserved. 7 | 8 | File Format: Each line consists of an uppercased word, 9 | a counter (for alternative pronunciations), and a transcription. 10 | Vowels are marked for stress (1=primary, 2=secondary, 0=no stress). 11 | E.g.: NATURAL 1 N AE1 CH ER0 AH0 L 12 | 13 | The dictionary contains 127069 entries. Of these, 119400 words are assigned 14 | a unique pronunciation, 6830 words have two pronunciations, and 839 words have 15 | three or more pronunciations. Many of these are fast-speech variants. 16 | 17 | Phonemes: There are 39 phonemes, as shown below: 18 | 19 | Phoneme Example Translation Phoneme Example Translation 20 | ------- ------- ----------- ------- ------- ----------- 21 | AA odd AA D AE at AE T 22 | AH hut HH AH T AO ought AO T 23 | AW cow K AW AY hide HH AY D 24 | B be B IY CH cheese CH IY Z 25 | D dee D IY DH thee DH IY 26 | EH Ed EH D ER hurt HH ER T 27 | EY ate EY T F fee F IY 28 | G green G R IY N HH he HH IY 29 | IH it IH T IY eat IY T 30 | JH gee JH IY K key K IY 31 | L lee L IY M me M IY 32 | N knee N IY NG ping P IH NG 33 | OW oat OW T OY toy T OY 34 | P pee P IY R read R IY D 35 | S sea S IY SH she SH IY 36 | T tea T IY TH theta TH EY T AH 37 | UH hood HH UH D UW two T UW 38 | V vee V IY W we W IY 39 | Y yield Y IY L D Z zee Z IY 40 | ZH seizure S IY ZH ER 41 | 42 | (For NLTK, entries have been sorted so that, e.g. FIRE 1 and FIRE 2 43 | are contiguous, and not separated by FIRE'S 1.) 44 | 45 | Redistribution and use in source and binary forms, with or without 46 | modification, are permitted provided that the following conditions 47 | are met: 48 | 49 | 1. Redistributions of source code must retain the above copyright 50 | notice, this list of conditions and the following disclaimer. 51 | The contents of this file are deemed to be source code. 52 | 53 | 2. Redistributions in binary form must reproduce the above copyright 54 | notice, this list of conditions and the following disclaimer in 55 | the documentation and/or other materials provided with the 56 | distribution. 57 | 58 | This work was supported in part by funding from the Defense Advanced 59 | Research Projects Agency, the Office of Naval Research and the National 60 | Science Foundation of the United States of America, and by member 61 | companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge 62 | the contributions of many volunteers to the expansion and improvement of 63 | this dictionary. 64 | 65 | THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 66 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 67 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 68 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 69 | NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 70 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 71 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 72 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 73 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 74 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 75 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 76 | 77 | -------------------------------------------------------------------------------- /nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/czech.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/czech.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/danish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/danish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/dutch.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/dutch.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/english.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/english.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/estonian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/estonian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/finnish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/finnish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/french.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/french.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/german.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/german.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/greek.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/greek.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/italian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/italian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/malayalam.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/malayalam.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/norwegian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/norwegian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/polish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/polish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/portuguese.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/portuguese.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/russian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/russian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/slovene.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/slovene.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/spanish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/spanish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/swedish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/swedish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/PY3/turkish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/turkish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/czech.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/czech.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/danish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/danish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/dutch.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/dutch.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/estonian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/estonian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/finnish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/finnish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/french.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/french.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/german.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/german.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/italian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/italian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/malayalam.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/malayalam.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/norwegian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/norwegian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/polish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/polish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/portuguese.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/portuguese.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/russian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/russian.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/slovene.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/slovene.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/spanish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/spanish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/swedish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/swedish.pickle -------------------------------------------------------------------------------- /nltk_data/tokenizers/punkt/turkish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/turkish.pickle -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch<=2.0.1 2 | fschat==0.2.33 3 | nltk~=3.8.1 4 | uvicorn~=0.23.1 5 | starlette~=0.27.0 6 | # pydantic<=1.10.14 7 | unstructured[all-docs] 8 | pypdf 9 | duckduckgo-search 10 | pysocks 11 | accelerate 12 | websockets 13 | fake_useragent 14 | selenium 15 | jsonref 16 | 17 | # uncomment libs if you want to use corresponding vector store 18 | # pymilvus==2.1.3 # requires milvus==2.1.3 19 | # psycopg2 20 | # pgvector 21 | 22 | streamlit 23 | streamlit_option_menu 24 | streamlit-chatbox 25 | streamlit-aggrid 26 | # streamlit-antd-components>=0.1.11 27 | httpx 28 | tenacity<8.4.0 29 | 30 | codefuse-muagent 31 | # qwen model 32 | # protobuf==3.20.* 33 | transformers_stream_generator 34 | einops 35 | optimum 36 | # auto-gptq 37 | # modelscope 38 | 39 | # vllm model 40 | # vllm; sys_platform == "linux" 41 | 42 | # chatglm 43 | sentencepiece -------------------------------------------------------------------------------- /sources/docs/python_langchain_com_docs_get_started_introduction_text.jsonl: -------------------------------------------------------------------------------- 1 | {"url": "https://python.langchain.com/docs/get_started/introduction", "host_url": "https://python.langchain.com", "title": "Introduction | 🦜️🔗 Langchain", "all_text": "\n\nIntroduction | 🦜️🔗 Langchain\n\nSkip to main content🦜️🔗 LangChainDocsUse casesIntegrationsAPICommunityChat our docsLangSmithJS/TS DocsSearchCTRLKGet startedIntroductionInstallationQuickstartLangChain Expression LanguageInterfaceHow toCookbookLangChain Expression Language (LCEL)ModulesModel I/​ORetrievalChainsMemoryAgentsCallbacksModulesGuidesMoreGet startedIntroductionOn this pageIntroductionLangChain is a framework for developing applications powered by language models. It enables applications that:Are context-aware: connect a language model to sources of context (prompt instructions, few shot examples, content to ground its response in, etc.)Reason: rely on a language model to reason (about how to answer based on provided context, what actions to take, etc.)The main value props of LangChain are:Components: abstractions for working with language models, along with a collection of implementations for each abstraction. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or notOff-the-shelf chains: a structured assembly of components for accomplishing specific higher-level tasksOff-the-shelf chains make it easy to get started. For complex applications, components make it easy to customize existing chains and build new ones.Get started​Here’s how to install LangChain, set up your environment, and start building.We recommend following our Quickstart guide to familiarize yourself with the framework by building your first LangChain application.Note: These docs are for the LangChain Python package. For documentation on LangChain.js, the JS/TS version, head here.Modules​LangChain provides standard, extendable interfaces and external integrations for the following modules, listed from least to most complex:Model I/O​Interface with language modelsRetrieval​Interface with application-specific dataChains​Construct sequences of callsAgents​Let chains choose which tools to use given high-level directivesMemory​Persist application state between runs of a chainCallbacks​Log and stream intermediate steps of any chainExamples, ecosystem, and resources​Use cases​Walkthroughs and best-practices for common end-to-end use cases, like:Document question answeringChatbotsAnalyzing structured dataand much more...Guides​Learn best practices for developing with LangChain.Ecosystem​LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of integrations and dependent repos.Additional resources​Our community is full of prolific developers, creative builders, and fantastic teachers. Check out YouTube tutorials for great tutorials from folks in the community, and Gallery for a list of awesome LangChain projects, compiled by the folks at KyroLabs.Community​Head to the Community navigator to find places to ask questions, share feedback, meet other developers, and dream about the future of LLM’s.API reference​Head to the reference section for full documentation of all classes and methods in the LangChain Python package.PreviousGet startedNextInstallationGet startedModulesExamples, ecosystem, and resourcesUse casesGuidesEcosystemAdditional resourcesCommunityAPI referenceCommunityDiscordTwitterGitHubPythonJS/TSMoreHomepageBlogCopyright © 2023 LangChain, Inc.\n\n"} 2 | -------------------------------------------------------------------------------- /sources/docs_imgs/BaseAgent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/BaseAgent.png -------------------------------------------------------------------------------- /sources/docs_imgs/agent-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/agent-flow.png -------------------------------------------------------------------------------- /sources/docs_imgs/devops-chatbot-module-v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devops-chatbot-module-v2.png -------------------------------------------------------------------------------- /sources/docs_imgs/devops-chatbot-module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devops-chatbot-module.png -------------------------------------------------------------------------------- /sources/docs_imgs/devopsgpt_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devopsgpt_example.png -------------------------------------------------------------------------------- /sources/docs_imgs/devopsgpt_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devopsgpt_example2.png -------------------------------------------------------------------------------- /sources/docs_imgs/luban.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/luban.png -------------------------------------------------------------------------------- /sources/docs_imgs/objective.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/objective.png -------------------------------------------------------------------------------- /sources/docs_imgs/objective_v4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/objective_v4.png -------------------------------------------------------------------------------- /sources/docs_imgs/roadmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/roadmap.png -------------------------------------------------------------------------------- /sources/docs_imgs/roadmap2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/roadmap2.png -------------------------------------------------------------------------------- /sources/docs_imgs/webui_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/webui_config.png -------------------------------------------------------------------------------- /sources/docs_imgs/wechat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/wechat.png -------------------------------------------------------------------------------- /sources/imgs/devops-chatbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/devops-chatbot.png -------------------------------------------------------------------------------- /sources/imgs/devops-chatbot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/devops-chatbot2.png -------------------------------------------------------------------------------- /sources/imgs/docker_logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/docker_logs.png -------------------------------------------------------------------------------- /sources/imgs/fastapi_docs_020_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/fastapi_docs_020_0.png -------------------------------------------------------------------------------- /sources/readme_docs/coagent/agent-flow-en.md: -------------------------------------------------------------------------------- 1 | 2 | ## Attention 3 | Attention:The overall content is not yet complete, and further refinements to the flow and other Agent diagrams will be made in the future. 4 | 5 | ## Introduction to Core Connectors 6 | To facilitate everyone's understanding of the entire CoAgent link, we use a Flow format to detail how to build through configuration settings. 7 | 8 |
9 | 图片 10 |
11 | 12 | 13 |
Below, we will first introduce the related core components
14 | 15 | ### Agent 16 | At the design level of the Agent, we provide four basic types of Agents, which allows for the basic role settings of these Agents to meet the interaction and usage of a variety of common scenarios. 17 | 1. BaseAgent: Provides basic question and answer, tool usage, and code execution functions. It implements Input => Output according to the Prompt format. 18 | 19 |
20 | 图片 21 |
22 | 23 | 2. ExecutorAgent: Executes tasks in sequence from a task list based on the plan arranged by the User or the previous Agent, completing the related tasks. 24 | 3. ReactAgent: Provides standard React functionality, based on the issue to perform the current task. 25 | 4. electorAgent: Provides the functionality of choosing an Agent. 26 | 27 | It selects the appropriate Agent to respond based on the question from the User or the previous Agent. After output, the message is pushed into the memory pool, which is subsequently managed by the Memory Manager. 28 | 29 | ### Chain 30 | Basic Chain: BaseChain, which connects the interaction of agents, completing the management of related messages and memory. 31 | 32 | ### Phase 33 | Basic Phase: BasePhase, which connects the interaction of chains, completing the management of related messages and memory. 34 | 35 | ### Prompt Manager 36 | Creation of prompts for each agent in a Multi-Agent link: 37 | 38 | - By simply setting prompt_input_keys and prompt_output_keys, one can reuse the preset Prompt Context creation logic, thus achieving rapid configuration of the agent prompt. 39 | - The prompt manager module can also be redesigned with new key-context designs to implement a personalized Agent Prompt. 40 | 41 | ### Memory Manager 42 | Mainly used for the management of chat history, which is not yet completed: 43 | 44 | - Manages the reading and writing of chat history in the database, including user input, llm output, doc retrieval, code retrieval, search retrieval. 45 | - Summarizes key information from the chat history to form a summary context, which serves as prompt context. 46 | - Provides a search function to retrieve information related to the question from the chat history or the summary context, aiding in question and answer sessions. 47 | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/agent-flow.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## 注意 4 | 注意:整体内容未完善,后续还会完善flow和其它Agent的图例 5 | 6 | ## 核心Connector介绍 7 | 为了便于大家理解整个 CoAgent 的链路,我们采取 Flow 的形式来详细介绍如何通过配置构建 8 | 9 |
10 | 图片 11 |
12 | 13 | 14 |
下面,我们先介绍相关的核心组件
15 | 16 | ### Agent 17 | 在Agent设计层面,我们提供了四种基本的Agent类型,对这些Agent进行Role的基础设定,可满足多种通用场景的交互和使用 18 | 1. BaseAgent:提供基础问答、工具使用、代码执行的功能,根据Prompt格式实现 输入 => 输出 19 | 20 |
21 | 图片 22 |
23 | 24 | 2. ExecutorAgent:对任务清单进行顺序执行,根据 User 或 上一个Agent编排的计划,完成相关任务 25 | 3. ReactAgent:提供标准React的功能,根据问题实现当前任务 26 | 4. SelectorAgent:提供选择Agent的功能,根据User 或 上一个 Agent的问题选择合适的Agent来进行回答. 27 | 28 | 输出后将 message push 到 memory pool 之中,后续通过Memory Manager进行管理 29 | 30 | ### Chain 31 | 基础链路:BaseChain,串联agent的交互,完成相关message和memory的管理 32 | 33 | ### Phase 34 | 基础场景:BasePhase,串联chain的交互,完成相关message和memory的管理 35 | 36 | ### Prompt Manager 37 | Mutli-Agent链路中每一个agent的prompt创建 38 | - 通过对promtp_input_keys和promtp_output_keys对的简单设定,可以沿用预设 Prompt Context 创建逻辑,从而实现agent prompt快速配置 39 | - 也可以对prompt manager模块进行新的 key-context 设计,实现个性化的 Agent Prompt 40 | 41 | ### Memory Manager 42 | 主要用于 chat history 的管理,暂未完成 43 | - 将chat history在数据库进行读写管理,包括user input、 llm output、doc retrieval、code retrieval、search retrieval 44 | - 对 chat history 进行关键信息总结 summary context,作为 prompt context 45 | - 提供检索功能,检索 chat history 或者 summary context 中与问题相关信息,辅助问答 46 | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/coagent-en.md: -------------------------------------------------------------------------------- 1 | 2 | ## 简介 3 | To enhance the performance of large language models (LLMs) in terms of inference accuracy, the industry has seen various innovative approaches to utilizing LLMs. From the earliest Chain of Thought (CoT), Text of Thought (ToT), to Graph of Thought (GoT), these methods have continually expanded the capability boundaries of LLMs. In dealing with complex problems, we can use the ReAct process to select, invoke, and execute tool feedback, achieving multi-round tool usage and multi-step execution. 4 | 5 | However, for more complex scenarios, such as the development of intricate code, single-function LLM Agents are clearly insufficient. Thus, the community has begun to develop combinations of multiple Agents, such as projects focused on metaGPT, GPT-Engineer, chatDev in the development domain, and AutoGen projects focused on automating the construction of Agents and Agent dialogue. 6 | 7 | After in-depth analysis of these frameworks, it has been found that most Agent frameworks are highly coupled, with poor usability and extensibility. They achieve specific scenarios in preset environments, but expanding these scenarios is fraught with difficulty. 8 | 9 | Therefore, we aim to build an extensible, user-friendly Multi-Agent framework to support ChatBots in retrieving knowledge base information while assisting with various common tasks such as daily office work, data analysis, and development operations. 10 | 11 | This Multi-Agent framework project incorporates excellent design elements from multiple frameworks, such as the message pool from metaGPT and the agent selector from autogen. 12 | 13 |
14 | 图片 15 |
16 | 17 | The following modules will introduce the necessary components of the Multi Agent framework from five aspects: 18 | 19 | - **Agent Communication:** In the Multi-Agent framework, ensuring effective information exchange among Agents is crucial for managing context and improving Q&A efficiency. 20 | - Follow a straightforward and intuitive chain-based dialogue principle, arranging Agents in a linear fashion to form an execution chain. 21 | - Drawing from the Message Pool framework in metaGPT, Agents are allowed to push and subscribe to the Message Pool, making the chain more flexible. This is beneficial for fine-tuning the scenario of Prompt engineering but challenging to manage complex chain relationship analysis. 22 | 23 | - **Standard Operation Process (SOP)**: Standardizing the parsing and handling of LLM's generated results. 24 | - Define the input and output scope of an Agent, assembling and parsing relevant Actions and Statuses to ensure the stability of the framework. 25 | - Encapsulate a variety of fundamental Action execution modules, such as Tool Using, Planning, Coding, Direct Answering, final answer, etc., to meet the basic work requirements of an Agent. 26 | 27 | - **Plan and Executor**: Enhance LLM's tool usage, Agent scheduling, and code generation. Several basic chains have been set up, for example: 28 | - a. Single-round Q&A, which can also be expanded to forms like CoT, ToT, GoT, etc. 29 | - b. ReAct, a basic response decision-making process where the model sets SOP status to terminate the loop. 30 | - c. Task Planning - Executor, where the task is completed and can end. 31 | - **Long-short term memory Management**: The key difference between Multi-Agent and single Agent is that Multi-Agent needs to handle a large amount of communication information, similar to the process of human teamwork collaboration. Add an Agent specifically responsible for content summarization (similar to a meeting assistant) to summarize long-term memories and provide more effective information to the next Agent, rather than passing all content to the next one. 32 | - **Human-agent interaction**: In the face of complex scenarios, human intervention is required in the Agent interaction process to provide feedback. Through the aforementioned Long-short term memory Management and Agent Communication processes, enable the LLM to accurately understand human intentions, thereby completing tasks more effectively. 33 | 34 | In summary, these five elements together construct a Multi-Agent framework, ensuring closer and more efficient cooperation between Agents while also adapting to more complex task requirements and a variety of interaction scenarios. By combining multiple Agent chains to implement a complete and complex project launch scenario (Dev Phase), such as Demand Chain (CEO), Product Argument Chain (CPO, CFO, CTO), Engineer Group Chain (Selector, Developer1~N), QA Engineer Chain (Developer, Tester), Deploy Chain (Developer, Deployer). 35 | 36 | ## 模块分类 37 | - [connector](/sources/readme_docs/coagent/connector/connector_agent.md) 38 | - document_loaders 39 | - embeddings 40 | - llm_models 41 | - orm 42 | - sandbox 43 | - service 44 | - text_splitter 45 | - tools 46 | - utils 47 | 48 | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/coagent.md: -------------------------------------------------------------------------------- 1 | 2 | ## 📜 目录 3 | - [简介](#简介) 4 | - [模块分类](#模块分类) 5 | 6 | 7 | ## 简介 8 | 9 | 为了提高大型模型在推理准确性方面的表现,业界出现了多种创新的大型语言模型(LLM)玩法。从最早的CoT、ToT到GoT,这些方法不断拓展了LLM的能力边界。在处理复杂问题时,我们可以通过ReAct过程来选择、调用和执行工具反馈,同时实现多轮工具使用和多步骤执行。 10 | 11 | 但对于更复杂的场景,例如复杂代码的开发,单一功能的LLM Agent显然难以胜任。因此,社区开始发展出多Agent的组合玩法,比如专注于metaGPT、GPT-Engineer、chatDev等开发领域的项目,以及专注于自动化构建Agent和Agent对话的AutoGen项目。 12 | 13 | 经过对这些框架的深入分析,发现大多数的Agent框架整体耦合度较高,其易用性和可扩展性较差。在预设场景中实现特定场景,但想要进行场景扩展却困难重重。 14 | 15 | 因此,我们希望构建一个可扩展、易于使用的Multi-Agent框架,以支持ChatBot在获取知识库信息的同时,能够辅助完成日常办公、数据分析、开发运维等各种通用任务。 16 | 17 | 本项目的Mutli-Agent框架汲取兼容了多个框架的优秀设计,比如metaGPT中的消息池(message pool)、autogen中的代理选择器(agent selector)等。 18 | 19 |
20 | 图片 21 |
22 | 23 | 以下模块将从5个方面介绍Multi Agent框架所需要素: 24 | - Agent Communication在Multi Agent框架中,确保Agent可以有效地进行信息交流对于管理上下文以及提高问答效率至关重要。 25 | a. 遵循简洁直观易于理解的链式对话原则,将Agent以线性方式排列串连成一个执行链路。 26 | b. 借鉴metaGPT中的Message Pool框架,允许Agent对Message Pool进行推送和订阅,使链路更加灵活。有利于精细化Prompt工程的场景,但难以把握复杂链路的关系分析。 27 | - Standard Operation Process(SOP):对LLM的生成结果进行标准化解析和处理。 28 | a. 定义Agent的 Input 和 Output 范围,能够组装和解析相关Action和Status,保证框架运行的稳定性 29 | b. 封装多种基础Action执行模块,如Tool Using、Planning、Coding、Direct Answering、final answer等SOP标识,以满足Agent的基本工作需求。 30 | - Plan and Executor:增加LLM的Tool使用、Agent调度、代码的生成。设置了几种基本链路,例如: 31 | a. 单轮问答,也可以扩展到CoT、ToT、GoT等形式。 32 | b. ReAct,基础的响应决策过程,模型设置SOP 状态以终止循环 33 | c. TaskPlaning - Executor,任务完成即可结束 34 | - Long-short term memory Management:Multi-Agent与单Agent的关键区别在于,Multi-Agent需要处理大量的交流信息,类似人类团队协作的过程。增加一个专门负责内容总结(类似于会议助理)的Agent,对长期记忆进行总结并提更有效信息传递给下一位Agent,而非传递所有内容给下一位Agent。 35 | - Human-agent interaction:面对复杂场景时,需要人类介入Agent交互过程并提供反馈。通过上述 Long-short term memory Management 和 Agent Communication 过程,使LLM能准确理解人类的意图,从而更有效地完成任务。 36 | 37 | 总的来说,这五个要素共同构建了一个Multi Agent框架,确保Agent之间的协作更加紧密和高效,同时也能够适应更复杂的任务需求和更多样的交互场景。通过组合多个Agent链路来实现一个完整且复杂的项目上线场景(Dev Phase),如Demand Chain(CEO)、Product Arguement Chain(CPO、CFO、CTO)、Engineer Group Chain(Selector、Developer1~N)、QA Engineer Chain(Developer、Tester)、Deploy Chain(Developer、Deploer)。 38 | 39 | 40 | ## 模块分类 41 | - [connector](/sources/readme_docs/coagent/connector/connector_agent.md) 42 | - document_loaders 43 | - embeddings 44 | - llm_models 45 | - orm 46 | - sandbox 47 | - service 48 | - text_splitter 49 | - tools 50 | - utils 51 | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/connector/connector_agent.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Connector Agent 3 | slug: Connector Agent ZH 4 | url: "coagent/connector-agent-zh" 5 | aliases: 6 | - "/coagent/connector-agent-zh" 7 | --- 8 | 9 | 10 | ## 快速构建一个Agent 11 | - 首先增加openai配置,也可以是其它类似于openai接口的模型(通过fastchat启动) 12 | ``` 13 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH 14 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 15 | from coagent.connector.configs import AGETN_CONFIGS 16 | from coagent.connector.agents import BaseAgent 17 | from coagent.connector.schema import Message, load_role_configs 18 | 19 | 20 | os.environ["API_BASE_URL"] = OPENAI_API_BASE 21 | os.environ["OPENAI_API_KEY"] = "sk-xx" 22 | openai.api_key = "sk-xxx" 23 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659" 24 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659" 25 | ``` 26 | 27 | 28 | - 配置相关 LLM 和 Embedding Model 29 | ``` 30 | # LLM 和 Embedding Model 配置 31 | llm_config = LLMConfig( 32 | model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 33 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 34 | ) 35 | embed_config = EmbedConfig( 36 | embed_engine="model", embed_model="text2vec-base-chinese", 37 | embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese" 38 | ) 39 | ``` 40 | 41 | - 这里从已有的agent配置选一个role来做示例 42 | ``` 43 | # 从已有的配置中选择一个config,具体参数细节见下面 44 | role_configs = load_role_configs(AGETN_CONFIGS) 45 | agent_config = role_configs["general_planner"] 46 | # 生成agent实例 47 | base_agent = BaseAgent( 48 | role=agent_config.role, 49 | prompt_config = agent_config.prompt_config, 50 | prompt_manager_type=agent_config.prompt_manager_type, 51 | chat_turn=agent_config.chat_turn, 52 | focus_agents=[], 53 | focus_message_keys=[], 54 | llm_config=llm_config, 55 | embed_config=embed_config, 56 | jupyter_work_path=JUPYTER_WORK_PATH, 57 | kb_root_path=KB_ROOT_PATH, 58 | ) 59 | # round-1 60 | query_content = "确认本地是否存在employee_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 61 | query = Message( 62 | role_name="human", role_type="user", 63 | role_content=query_content, input_query=query_content, origin_query=query_content, 64 | ) 65 | 66 | output_message = base_agent.step(query) 67 | print(output_message.to_str_content(content_key="parsed_output_list")) 68 | ``` 69 | 70 | ## Agent 参数配置 71 | ``` 72 | # 配置结构在这个目录 73 | from coagent.connector.schema import Role, PromptField 74 | ``` 75 | 76 | 77 | ### Agent Config 78 | |Config Key Name| Type| Description| 79 | | ------------------ | ---------- | ---------- | 80 | |role| Role |角色描述| 81 | |prompt_config |List[PromptField] |Enum:PromptManager 也可以继承以上几种Agent然后去构造相关的Agent| 82 | |prompt_manager_type |String |Enum:PromptManager 也可以继承以上几种Agent然后去构造自定义的Enum:PromptManager| 83 | |focus_agents |List[String] |metagpt的逻辑,关注哪些agent生成的message,可选值范围为:role_name 84 | |focus_message_keys |List[String]| 额外增加的逻辑,关注message里面具体的 key 信息可选值范围为:agent 的 output_keys| 85 | |chat_turn |int |只针对ReactAgent有效| 86 | |llm_config |LLMConfig |大语言模型配置| 87 | |embed_config |EmbedConfig |向量模型配置| 88 | |sandbox_server |Dict |沙盒环境即notebook启动配置| 89 | |jupyter_work_path |str |沙盒环境的工作目录| 90 | |kb_root_path |str |memory的存储路径| 91 | |log_verbose |str |agent prompt&predict的日志打印级别| 92 | 93 | ### Role 94 | 95 | | Config Key Name | Type | Description | 96 | |------------------|------|--------------------| 97 | | role_type | str | 角色类型, Enum: system、user、assistant、function、observation、summary | 98 | | role_name | str | 角色名称 | 99 | | role_desc | str | 角色描述 | 100 | | agent_type | str | 代理类型 | 101 | | role_prompt | str | 角色提示 | 102 | | template_prompt | str | 模板提示 | 103 | 104 | 105 | ### PromptField 106 | 107 | | Config Key Name | Type | Description | 108 | |-----------------|------|-------------| 109 | | field_name | str | | 110 | | function_name | str | | 111 | | title | str | | 112 | | description | str | | 113 | | is_context | bool | | 114 | | omit_if_empty | bool | | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/connector/connector_chain.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Connector Chain 3 | slug: Connector Chain ZH 4 | url: "coagent/connector-chain-zh" 5 | aliases: 6 | - "/coagent/connector-chain-zh" 7 | --- 8 | 9 | ## 快速构建一个 agent chain 10 | - 首先增加openai配置,也可以是其它类似于openai接口的模型(通过fastchat启动) 11 | ``` 12 | # 设置openai的api-key 13 | import os, sys 14 | import openai 15 | import importlib 16 | 17 | os.environ["API_BASE_URL"] = OPENAI_API_BASE 18 | os.environ["OPENAI_API_KEY"] = "sk-xxxx" 19 | openai.api_key = "sk-xxxx" 20 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659" 21 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659" 22 | ``` 23 | 24 | - 配置相关 LLM 和 Embedding Model 25 | ``` 26 | # LLM 和 Embedding Model 配置 27 | llm_config = LLMConfig( 28 | model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 29 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 30 | ) 31 | embed_config = EmbedConfig( 32 | embed_engine="model", embed_model="text2vec-base-chinese", 33 | embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese" 34 | ) 35 | ``` 36 | 37 | 38 | - 这里从已有的agent配置选多个role组合成 agent chain 39 | ``` 40 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH 41 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 42 | from coagent.connector.configs import AGETN_CONFIGS 43 | from coagent.connector.chains import BaseChain 44 | from coagent.connector.schema import Message, load_role_configs 45 | 46 | # 构建 agent chain 链路 47 | role_configs = load_role_configs(AGETN_CONFIGS) 48 | agent_config = role_configs["general_planner"] 49 | role1 = role_configs["general_planner"] 50 | role2 = role_configs["executor"] 51 | agent_module = importlib.import_module("examples.connector.agents") 52 | agents = [ 53 | getattr(agent_module, role1.role.agent_type)( 54 | role=role1.role, 55 | prompt_config = role1.prompt_config, 56 | prompt_manager_type=role1.prompt_manager_type, 57 | chat_turn=role1.chat_turn, 58 | focus_agents=role1.focus_agents, 59 | focus_message_keys=role1.focus_message_keys, 60 | llm_config=llm_config, 61 | embed_config=embed_config, 62 | jupyter_work_path=JUPYTER_WORK_PATH, 63 | kb_root_path=KB_ROOT_PATH, 64 | ), 65 | getattr(agent_module, role2.role.agent_type)( 66 | role=role2.role, 67 | prompt_config = role2.prompt_config, 68 | prompt_manager_type=role2.prompt_manager_type, 69 | chat_turn=role2.chat_turn, 70 | focus_agents=role2.focus_agents, 71 | focus_message_keys=role2.focus_message_keys, 72 | llm_config=llm_config, 73 | embed_config=embed_config, 74 | jupyter_work_path=JUPYTER_WORK_PATH, 75 | kb_root_path=KB_ROOT_PATH, 76 | ), 77 | ] 78 | 79 | chain = BaseChain( 80 | agents, 81 | chat_turn=1, 82 | jupyter_work_path=JUPYTER_WORK_PATH, 83 | kb_root_path=KB_ROOT_PATH, 84 | llm_config=llm_config, 85 | embed_config=embed_config, 86 | ) 87 | ``` 88 | 89 | 90 | - 开始执行 91 | ``` 92 | # round-1 93 | query_content = "确认本地是否存在employee_data.csv,并查看它有哪些列和数据类型;然后画柱状图" 94 | query = Message( 95 | role_name="human", role_type="user", 96 | role_content=query_content, input_query=query_content, origin_query=query_content, 97 | ) 98 | 99 | output_message, output_memory = chain.step(query) 100 | print(output_memory.to_str_messages(content_key="parsed_output_list")) 101 | 102 | ``` 103 | 104 | 105 | ## Chain 参数配置 106 | |Config Key Name| Type |Description| 107 | | ------------------ | ---------- | ---------- | 108 | |agents| List[BaseAgent] | 109 | |llm_config |LLMConfig |大语言模型配置| 110 | |embed_config |EmbedConfig |向量模型配置| 111 | |sandbox_server |Dict |沙盒环境即notebook启动配置| 112 | |jupyter_work_path |str |沙盒环境的工作目录| 113 | |kb_root_path |str |memory的存储路径| 114 | |log_verbose |str |agent prompt&predict的日志打印级别| 115 | -------------------------------------------------------------------------------- /sources/readme_docs/coagent/connector/connector_memory.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Connector Memory 3 | slug: Connector Memory ZH 4 | url: "coagent/connector-memory-zh" 5 | aliases: 6 | - "/coagent/connector-memory-zh" 7 | --- 8 | 9 | 10 | ## Memory Manager 11 | 主要用于 chat history 的管理,暂未完成 12 | - 将chat history在数据库进行读写管理,包括user input、 llm output、doc retrieval、code retrieval、search retrieval 13 | - 对 chat history 进行关键信息总结 summary context,作为 prompt context 14 | - 提供检索功能,检索 chat history 或者 summary context 中与问题相关信息,辅助问答 15 | 16 | 17 | 18 | ## 使用示例 19 | 20 | ### 创建 memory manager 实例 21 | ``` 22 | import os 23 | import openai 24 | 25 | from coagent.base_configs.env_config import KB_ROOT_PATH 26 | from coagent.connector.memory_manager import BaseMemoryManager, LocalMemoryManager 27 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 28 | from coagent.connector.schema import Message 29 | 30 | os.environ["API_BASE_URL"] = OPENAI_API_BASE 31 | os.environ["OPENAI_API_KEY"] = "sk-xx" 32 | openai.api_key = "sk-xxx" 33 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659" 34 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659" 35 | 36 | # LLM 和 Embedding Model 配置 37 | llm_config = LLMConfig( 38 | model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 39 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 40 | ) 41 | embed_config = EmbedConfig( 42 | embed_engine="model", embed_model="text2vec-base-chinese", 43 | embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese" 44 | ) 45 | 46 | # 47 | phase_name = "test" 48 | memory_manager = LocalMemoryManager( 49 | unique_name=phase_name, 50 | do_init=True, 51 | kb_root_path = KB_ROOT_PATH, 52 | embed_config=embed_config, 53 | llm_config=llm_config 54 | ) 55 | ``` 56 | 57 | ### 支持Message管理 58 | 59 | ``` 60 | message1 = Message( 61 | role_name="test1", role_type="user", input_query="hello", origin_query="hello", 62 | parsed_output_list=[{"input": "hello"}] 63 | ) 64 | 65 | text = "hi! how can I help you?" 66 | message2 = Message( 67 | role_name="test2", role_type="assistant", input_query=text, origin_query=text, 68 | role_content=text, step_content=text, parsed_output_list=[{"answer": text}] 69 | ) 70 | 71 | text = "they say hello and hi to each other" 72 | message3 = Message( 73 | role_name="test3", role_type="summary", 74 | role_content=text, step_content=text, 75 | parsed_output_list=[{"summary": text}] 76 | ) 77 | 78 | ``` 79 | 80 | ### 支持 memory 检索 81 | ``` 82 | # embedding retrieval test 83 | text = "say hi, i want some help" 84 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "datetime")) 85 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "embedding")) 86 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "text")) 87 | 88 | ``` 89 | ### 支持 memory 总结 90 | ``` 91 | # recursive_summary test 92 | print(memory_manager.recursive_summary(local_memory_manager.recall_memory.messages, split_n=1)) 93 | ``` -------------------------------------------------------------------------------- /sources/readme_docs/coagent/connector/connector_phase.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Connector Phase 3 | slug: Connector Phase ZH 4 | url: "coagent/connector-phase-zh" 5 | aliases: 6 | - "/coagent/connector-phase-zh" 7 | --- 8 | 9 | 10 | 11 | ## 快速构建一个 agent phase 12 | - 首先增加openai配置,也可以是其它类似于openai接口的模型(通过fastchat启动) 13 | ``` 14 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH 15 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig 16 | from coagent.connector.configs import AGETN_CONFIGS 17 | from coagent.connector.phase import BasePhase 18 | from coagent.connector.schema import Message, load_role_configs 19 | 20 | 21 | os.environ["API_BASE_URL"] = OPENAI_API_BASE 22 | os.environ["OPENAI_API_KEY"] = "sk-xx" 23 | openai.api_key = "sk-xxx" 24 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659" 25 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659" 26 | ``` 27 | 28 | 29 | - 配置相关 LLM 和 Embedding Model 30 | ``` 31 | # LLM 和 Embedding Model 配置 32 | llm_config = LLMConfig( 33 | model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 34 | api_base_url=os.environ["API_BASE_URL"], temperature=0.3 35 | ) 36 | embed_config = EmbedConfig( 37 | embed_engine="model", embed_model="text2vec-base-chinese", 38 | embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese" 39 | ) 40 | ``` 41 | 42 | 43 | - 这里从已有的 phase 配置中选一个 phase 来做示例 44 | ``` 45 | # log-level,print prompt和llm predict 46 | os.environ["log_verbose"] = "2" 47 | 48 | phase_name = "searchChatPhase" 49 | phase = BasePhase( 50 | phase_name, embed_config=embed_config, llm_config=llm_config, 51 | ) 52 | 53 | # round-1 54 | query_content1 = "美国当前总统是谁?" 55 | query = Message( 56 | role_name="human", role_type="user", 57 | role_content=query_content1, input_query=query_content1, origin_query=query_content1, 58 | search_engine_name="duckduckgo", score_threshold=1.0, top_k=3 59 | ) 60 | 61 | output_message, output_memory = phase.step(query) 62 | 63 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) 64 | 65 | # round-2 66 | query_content2 = "美国上一任总统是谁,两个人有什么关系没?" 67 | query = Message( 68 | role_name="human", role_type="user", 69 | role_content=query_content2, input_query=query_content2, origin_query=query_content2, 70 | search_engine_name="duckduckgo", score_threshold=1.0, top_k=3 71 | ) 72 | output_message, output_memory = phase.step(query) 73 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list")) 74 | ``` 75 | 76 | 77 | 78 | ## Phase 参数配置 79 | |Config Key Name |Type |Description| 80 | | ------------------ | ---------- | ---------- | 81 | |phase_name| String| 场景名称| 82 | |phase_config|CompletePhaseConfig| 默认为None,可直接指定完整的phaseconfig, 暂未实现| 83 | |llm_config |LLMConfig |大语言模型配置| 84 | |embed_config |EmbedConfig |向量模型配置| 85 | |sandbox_server |Dict |沙盒环境即notebook启动配置| 86 | |jupyter_work_path |str |沙盒环境的工作目录| 87 | |kb_root_path |str |memory的存储路径| 88 | |log_verbose |str |agent prompt&predict的日志打印级别| 89 | | base_phase_config | Union[dict, str] | 默认配置:PHASE_CONFIGS,可通过实现对这个变量新增来实现自定义配置 | 90 | | base_chain_config | Union[dict, str] | 默认配置:CHAIN_CONFIGS,可通过实现对这个变量新增来实现自定义配置 | 91 | | base_role_config | Union[dict, str] | 默认配置:AGETN_CONFIGS,可通过实现对这个变量新增来实现自定义配置 | 92 | -------------------------------------------------------------------------------- /sources/readme_docs/contribution/contribute_guide.md: -------------------------------------------------------------------------------- 1 | 非常感谢您对 Codefuse 项目感兴趣,我们非常欢迎您对 Codefuse 项目的各种建议、意见(包括批评)、评论和贡献。 2 | 3 | 您对 Codefuse 的各种建议、意见、评论可以直接通过 GitHub 的 Issues 提出。 4 | 5 | 参与 Codefuse 项目并为其作出贡献的方法有很多:代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎,并将您加入贡献者列表. 6 | 7 | 进一步,有了足够的贡献后,您还可以有机会成为 Codefuse 的 Committer。 8 | 9 | 任何问题,您都可以联系我们得到及时解答,联系方式包括微信、Gitter(GitHub提供的即时聊天工具)、邮件等等。 10 | 11 | 12 | ## 初次接触 13 | 初次来到 Codefuse 社区,您可以: 14 | 15 | - 关注 Codefuse Github 代码库 16 | - 加入 Codefuse 相关的微信群 随时提问; 17 | 通过以上方式及时了解 Codefuse 项目的开发动态并为您关注的话题发表意见。 18 | 19 | 20 | ## 贡献方式 21 | 这份贡献指南并不仅仅关于编写代码。我们重视并感激在各个领域的帮助。以下是一些您可以贡献的方式 22 | - 文档 23 | - Issue 24 | - PR 25 | 26 | ### 改进文档 27 | 文档是您了解 Codefuse 的最主要的方式,也是我们最需要帮助的地方! 28 | 29 | 浏览文档,可以加深您对 Codefuse 的了解,也可以帮助您理解 Codefuse 的功能和技术细节,如果您发现文档有问题,请及时联系我们; 30 | 31 | 如果您对改进文档的质量感兴趣,不论是修订一个页面的地址、更正一个链接、以及写一篇更优秀的入门文档,我们都非常欢迎! 32 | 33 | 我们的文档大多数是使用 markdown 格式编写的,您可以直接通过在 GitHub 中的 docs/ 中修改并提交文档变更。如果提交代码变更,可以参阅 Pull Request。 34 | 35 | ### 如果发现了一个 Bug 或问题 36 | 如果发现了一个 Bug 或问题,您可以直接通过 GitHub 的 Issues 提一个新的 Issue,我们会有人定期处理。详情见[Issue Template](#issue-template) 37 | 38 | 您也可以通过阅读分析代码自己修复(当然在这之前最好能和我们交流下,或许已经有人在修复同样的问题了),然后提交一个 Pull Request。 39 | 40 | ### 修改代码和提交PR(Pull Request) 41 | 您可以下载代码,编译安装,部署运行试一试(可以参考编译文档,看看是否与您预想的一样工作。如果有问题,您可以直接联系我们,提 Issue 或者通过阅读和分析源代码自己修复。详情见[Contribution](#contribution) 42 | 43 | 无论是修复 Bug 还是增加 Feature,我们都非常欢迎。如果您希望给 Doris 提交代码,您需要从 GitHub 上 fork 代码库至您的项目空间下,为您提交的代码创建一个新的分支,添加源项目为upstream,并提交PR。 提交PR的方式可以参考文档 Pull Request。 44 | 45 | 46 | 47 | 48 | ## Issue Type 49 | Issue分为三种类型 50 | - Bug: 代码或者执行示例存在bug或缺少依赖导致无法正确执行 51 | - Documentation:文档表述存在争议、文档内容与代码不一致等 52 | - Feature:在当前代码基础继续演进的新功能 53 | 54 | ## Issue Template 55 | ### Issue: Bug Template 56 | 57 | **提交Issue前的确认清单** 58 |
要先确认是否查看 document、issue、discussion(github 功能) 等公开的文档信息 59 | - 我搜索了Codefuse相关的所有文档。 60 | - 我使用GitHub搜索寻找了一个类似的问题,但没有找到。 61 | - 我为这个问题添加了一个非常描述性的标题。 62 | 63 | **系统信息** 64 |
确认系统,如 mac -xx 、windwos-xx、linux-xx 65 | 66 | **代码版本** 67 |
确认代码版本或者分支,master、release等 68 | 69 | **问题描述** 70 |
描述您碰到的问题,想要实现的事情、或代码执行Bug 71 | 72 | **代码示例** 73 |
附上你的执行代码和相关配置,以便能够快速介入进行复现 74 | 75 | **报错信息、日志** 76 |
执行上述代码示例后的报错日志和相关信息 77 | 78 | **相关依赖的模块** 79 |
以chatbot项目为例 80 | - connector 81 | - codechat 82 | - sandbox 83 | - ... 84 | 85 | 86 | ### Issue: Documentation Template 87 | **Issue with current documentation:** 88 |
请帮忙指出当前文档中的问题、错别字或者令人困惑的地方 89 | 90 | **Idea or request for content** 91 |
您觉得合理的文档表述方式应该是什么样的 92 | 93 | 94 | ### Issue: Feature Template 95 | **提交Issue前的确认清单** 96 |
要先确认是否查看 document、issue、discussion(github 功能) 等公开的文档信息 97 | - 我搜索了Codefuse相关的所有文档。 98 | - 我使用GitHub Issue搜索寻找了一个类似的问题,但没有找到。 99 | - 我为这个问题添加了一个非常描述性的标题。 100 | 101 | **功能描述** 102 |
描述这个功能作何用途 103 | 104 | **相关示例** 105 |
提供参考的文档、仓库等信息,Please provide links to any relevant GitHub repos, papers, or other resources if relevant. 106 | 107 | **动机** 108 |
描述下这个feature的动机,为什么需要这个功能,提供足够的上下文信息帮助理解这个feature的诉求 109 | 110 | **Contribution** 111 |
你如何参与到这个feature的构建(如果参与的话) 112 | 113 | 114 | 115 | ## Contribution 116 | 117 | ### Pre-Checklist 118 | - 要先确认是否查看 document、issue、discussion(github 功能) 等公开的文档信息 119 | - 找到你想处理的GitHub问题。如果不存在,创建一个问题或草案PR,并请求维护者进行检查。 120 | - 检查相关的、相似的或重复的拉取请求。 121 | - 创建一个草案拉取请求。 122 | - 完成PR模板中的描述。 123 | - 链接任何被你的PR解决的GitHub问题。 124 | 125 | ### Description 126 | PR的描述信息,用简洁的语言表达PR完成的事情,具体规范见[Commit 格式规范](#commit-格式规范) 127 | 128 | ### Related Issue 129 | `#xx` if has 130 | 131 | ### Test Code with Result 132 | 请提供相关的测试代码如果有必要的话 133 | 134 | 135 | ## Commit 格式规范 136 | Commit 分为“标题”和“内容”。原则上标题全部小写。内容首字母大写。 137 | 138 | 139 | ### 标题 140 | commit message的标题:`[]() (#pr)` 141 | 142 | 143 | ### type 可选值 144 | 145 | 本次提交的类型,限定在以下类型(全小写) 146 | - fix:bug修复 147 | - feature:新增功能 148 | - feature-wip:开发中的功能,比如某功能的部分代码。 149 | - improvement:原有功能的优化和改进 150 | - style:代码风格调整 151 | - typo:代码或文档勘误 152 | - refactor:代码重构(不涉及功能变动) 153 | - performance/optimize:性能优化 154 | - test:单元测试的添加或修复 155 | - deps:第三方依赖库的修改 156 | - community:社区相关的修改,如修改 Github Issue 模板等。 157 | 158 | 几点说明: 159 | 160 | 如在一次提交中出现多种类型,需增加多个类型。 161 | 如代码重构带来了性能提升,可以同时添加 [refactor][optimize] 162 | 不得出现如上所列类型之外的其他类型。如有必要,需要将新增类型添加到这个文档中。 163 | 164 | ### scope 可选值 165 | 本次提交涉及的模块范围。因为功能模块繁多,在此仅罗列部分,后续根据需求不断完善。 166 |
以 chatbot的框架为例 167 | - connector 168 | - codechat 169 | - sandbox 170 | - ... 171 | 172 | 几点说明: 173 | 174 | 尽量使用列表中已存在的选项。如需添加,请及时更新本文档。 175 | 176 | ### subject 内容 177 | 标题需尽量清晰表明本次提交的主要内容。 178 | 179 | 180 | ## 示例 181 | comming soon 182 | 183 | 184 | ## Reference 185 | [doris-commit-format](https://doris.apache.org/zh-CN/community/how-to-contribute/commit-format-specification) -------------------------------------------------------------------------------- /sources/readme_docs/fastchat.md: -------------------------------------------------------------------------------- 1 | # 本地私有化/大模型接口接入 2 | 3 | 依托于开源的 LLM 与 Embedding 模型,本项目可实现基于开源模型的离线私有部署。此外,本项目也支持 OpenAI API 的调用。 4 | 5 | ## 本地私有化模型接入 6 | 7 |
模型地址配置示例,model_config.py配置修改 8 | 9 | ```bash 10 | # 建议:走huggingface接入,尽量使用chat模型,不要使用base,无法获取正确输出 11 | # 注意:当llm_model_dict和VLLM_MODEL_DICT同时存在时,优先启动VLLM_MODEL_DICT中的模型配置 12 | 13 | # llm_model_dict 配置接入示例如下 14 | 15 | # 1、若把模型放到 ~/codefuse-chatbot/llm_models 路径下 16 | # 若模型地址如下 17 | model_dir: ~/codefuse-chatbot/llm_models/THUDM/chatglm-6b 18 | 19 | # 参考配置如下 20 | llm_model_dict = { 21 | "chatglm-6b": { 22 | "local_model_path": "THUDM/chatglm-6b", 23 | "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" 24 | "api_key": "EMPTY" 25 | } 26 | } 27 | 28 | VLLM_MODEL_DICT = { 29 | 'chatglm2-6b': "THUDM/chatglm-6b", 30 | } 31 | 32 | # or 若模型地址如下 33 | model_dir: ~/codefuse-chatbot/llm_models/chatglm-6b 34 | llm_model_dict = { 35 | "chatglm-6b": { 36 | "local_model_path": "chatglm-6b", 37 | "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" 38 | "api_key": "EMPTY" 39 | } 40 | } 41 | 42 | VLLM_MODEL_DICT = { 43 | 'chatglm2-6b': "chatglm-6b", 44 | } 45 | 46 | # 2、若不想移动相关模型到 ~/codefuse-chatbot/llm_models 47 | # 同时删除 `模型路径重置` 以下的相关代码,具体见model_config.py 48 | # 若模型地址如下 49 | model_dir: ~/THUDM/chatglm-6b 50 | # 参考配置如下 51 | llm_model_dict = { 52 | "chatglm-6b": { 53 | "local_model_path": "your personl dir/THUDM/chatglm-6b", 54 | "api_base_url": "http://localhost:8888/v1", # "name"修改为fastchat服务中的"api_base_url" 55 | "api_key": "EMPTY" 56 | } 57 | } 58 | 59 | VLLM_MODEL_DICT = { 60 | 'chatglm2-6b': "your personl dir/THUDM/chatglm-6b", 61 | } 62 | ``` 63 | 64 | ```bash 65 | # 3、指定启动的模型服务,两者保持一致 66 | LLM_MODEL = "chatglm-6b" 67 | LLM_MODELs = ["chatglm-6b"] 68 | ``` 69 | 70 | ```bash 71 | # server_config.py配置修改, 若LLM_MODELS无多个模型配置不需要额外进行设置 72 | # 修改server_config.py#FSCHAT_MODEL_WORKERS的配置 73 | "model_name": {'host': DEFAULT_BIND_HOST, 'port': 20057} 74 | ``` 75 | 76 | 77 | 78 |
量化模型接入 79 | 80 | ```bash 81 | # 若需要支撑codellama-34b-int4模型,需要给fastchat打一个补丁 82 | cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py 83 | 84 | # 若需要支撑qwen-72b-int4模型,需要给fastchat打一个补丁 85 | cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py 86 | # 量化需修改llm_api.py的配置 87 | # examples/llm_api.py#559 取消注释 kwargs["gptq_wbits"] = 4 88 | ``` 89 | 90 | ## 公开大模型接口接入 91 | 92 | ```bash 93 | # model_config.py配置修改 94 | # ONLINE_LLM_MODEL 95 | # 其它接口开发来自于langchain-chatchat项目,缺少相关账号未经测试 96 | 97 | # 指定启动的模型服务,两者保持一致 98 | LLM_MODEL = "gpt-3.5-turbo" 99 | LLM_MODELs = ["gpt-3.5-turbo"] 100 | ``` 101 | 102 | 外部大模型接口接入示例 103 | 104 | ```bash 105 | # 1、实现新的模型接入类 106 | # 参考 ~/examples/model_workers/openai.py#ExampleWorker 107 | # 实现do_chat函数即可使用LLM的能力 108 | 109 | class XXWorker(ApiModelWorker): 110 | def __init__( 111 | self, 112 | *, 113 | controller_addr: str = None, 114 | worker_addr: str = None, 115 | model_names: List[str] = ["gpt-3.5-turbo"], 116 | version: str = "gpt-3.5", 117 | **kwargs, 118 | ): 119 | kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr) 120 | kwargs.setdefault("context_len", 16384) #TODO 16K模型需要改成16384 121 | super().__init__(**kwargs) 122 | self.version = version 123 | 124 | def do_chat(self, params: ApiChatParams) -> Dict: 125 | ''' 126 | 执行Chat的方法,默认使用模块里面的chat函数。 127 | :params.messages : [ 128 | {"role": "user", "content": "hello"}, 129 | {"role": "assistant", "content": "hello"} 130 | ] 131 | :params.xx: 详情见 ApiChatParams 132 | 要求返回形式:{"error_code": int, "text": str} 133 | ''' 134 | return {"error_code": 500, "text": f"{self.model_names[0]}未实现chat功能"} 135 | 136 | 137 | # 最后在 ~/examples/model_workers/__init__.py 中完成注册 138 | # from .xx import XXWorker 139 | 140 | # 2、通过已有模型接入类完成接入 141 | # 或者直接使用已有的相关大模型类进行使用(缺少相关账号测试,欢迎大家测试后提PR) 142 | ``` 143 | 144 | 145 | ```bash 146 | # model_config.py#ONLINE_LLM_MODEL 配置修改 147 | # 填写专属模型的 version、api_base_url、api_key、provider(与上述类名一致) 148 | ONLINE_LLM_MODEL = { 149 | # 线上模型。请在server_config中为每个在线API设置不同的端口 150 | 151 | "openai-api": { 152 | "model_name": "gpt-3.5-turbo", 153 | "api_base_url": "https://api.openai.com/v1", 154 | "api_key": "", 155 | "openai_proxy": "", 156 | }, 157 | "example": { 158 | "version": "gpt-3.5", # 采用openai接口做示例 159 | "api_base_url": "https://api.openai.com/v1", 160 | "api_key": "", 161 | "provider": "ExampleWorker", 162 | }, 163 | } 164 | ``` 165 | 166 | ## 启动大模型服务 167 | ```bash 168 | # start llm-service(可选) 单独启动大模型服务 169 | python examples/llm_api.py 170 | ``` 171 | 172 | ```bash 173 | # 启动测试 174 | import openai 175 | # openai.api_key = "EMPTY" # Not support yet 176 | openai.api_base = "http://127.0.0.1:8888/v1" 177 | 178 | # 选择你启动的模型 179 | model = "example" 180 | 181 | # create a chat completion 182 | completion = openai.ChatCompletion.create( 183 | model=model, 184 | messages=[{"role": "user", "content": "Hello! What is your name? "}], 185 | max_tokens=100, 186 | ) 187 | # print the completion 188 | print(completion.choices[0].message.content) 189 | 190 | # 正确输出后则确认LLM可正常接入 191 | ``` 192 | 193 | 194 | 195 | or 196 | 197 | ```bash 198 | # model_config.py#USE_FASTCHAT 判断是否进行fastchat接入本地模型 199 | USE_FASTCHAT = "gpt" not in LLM_MODEL 200 | python start.py #221 自动执行 python llm_api.py 201 | ``` -------------------------------------------------------------------------------- /sources/readme_docs/roadmap-en.md: -------------------------------------------------------------------------------- 1 | 2 | Roadmap Overview 3 | 4 | - [x] Sandbox Environment ✅ 5 | - [x] Isolated sandbox environment for code execution ✅ 6 | - [x] File upload and download ✅ 7 | - [ ] Support for Java execution environment ⬜ 8 | - [x] Vector Database & Retrieval ✅ 9 | - [x] Task retrieval ✅ 10 | - [x] Tool retrieval ✅ 11 | - [x] Prompt Management ✅ 12 | - [x] Memory Management ✅ 13 | - [x] Multi Agent Framework ✅ 14 | - [ ] PRD (Product Requirement Document), system analysis, interface design ⬜ 15 | - [ ] Generate code based on requirement documents, system analysis, and interface design ⬜ 16 | - [ ] Automated testing, automated debugger ⬜ 17 | - [ ] Operations process integration (ToolLearning) ⬜ 18 | - [ ] Fully automated end-to-end process ⬜ 19 | - [x] Integration with LLM based on fastchat ✅ 20 | - [x] Integration with Text Embedding based on sentencebert ✅ 21 | - [x] Improved vector loading speed ✅ 22 | - [x] Connector ✅ 23 | - [x] React Mode based on langchain ✅ 24 | - [x] Tool retrieval completed with langchain ✅ 25 | - [ ] General Capability for Web Crawl ⬜ 26 | - [x] Technical documentation: Zhihu, CSDN, Alibaba Cloud Developer Forum, Tencent Cloud Developer Forum, etc. ✅ 27 | - [ ] Issue document ⬜ 28 | - [ ] SDK Library Document ⬜ 29 | 30 | v0.0 31 | - [x] Sandbox Environment ✅ 32 | - [x] Isolated sandbox environment for code execution ✅ 33 | - [x] Integration with LLM based on fastchat ✅ 34 | - [x] Integration with Text Embedding based on sentencebert ✅ 35 | - [x] General Capability for Web Crawl: Technical documentation: Zhihu, CSDN, Alibaba Cloud Developer Forum, Tencent Cloud Developer Forum, etc. ✅ 36 | 37 | Done 38 |
39 | 40 | v0.1 41 | - [x] Sandbox Environment: File upload and download ✅ 42 | - [x] Vector Database & Retrieval ✅ 43 | - [x] Task retrieval ✅ 44 | - [x] Tool retrieval ✅ 45 | - [x] Connector ✅ 46 | - [x] React Mode based on langchain ✅ 47 | - [x] Integration with Text Embedding based on sentencebert: Improved vector loading speed ✅ 48 | 49 | Done 50 |
51 | 52 | v0.2 53 | - [x] Prompt Management ✅ 54 | - [x] Memory Management ✅ 55 | - [x] Vector Database & Retrieval ✅ 56 | 57 | Done 58 |
59 | 60 | v0.3 61 | - [x] Sandbox Environment ✅ 62 | - [ ] Support for Java execution environment ⬜ 63 | - [x] Multi Agent ✅ 64 | - [ ] PRD (Product Requirement Document), system analysis, interface design ⬜ 65 | - [ ] Generate code based on requirement documents, system analysis, and interface design ⬜ 66 | - [ ] Automated testing, automated debugger ⬜ 67 | - [ ] Operations process integration (ToolLearning) ⬜ 68 | - [ ] Fully automated end-to-end process ⬜ 69 | - [x] General Capability for Web Crawl ✅ 70 | - [ ] Issue document ⬜ 71 | - [ ] SDK Library Document ⬜ 72 | 73 | DDL: 2024.12.31 74 |
-------------------------------------------------------------------------------- /sources/readme_docs/roadmap.md: -------------------------------------------------------------------------------- 1 | 2 | ## RoadMap 3 | 4 |
5 | 图片 6 |
7 |
8 | 9 | 10 | 完整路线 11 | - [x] Sandbox 环境 ✅ 12 | - [x] 环境隔离的sandbox环境与代码执行 ✅ 13 | - [x] 上传、下载文件 ✅ 14 | - [ ] 支持java执行环境 15 | - [ ] Vector Database & Retrieval 16 | - [x] task retrieval ✅ 17 | - [x] tool retrieval ✅ 18 | - [x] Prompt Management ✅ 19 | - [x] memory Management ✅ 20 | - [x] Multi Agent ✅ 21 | - [ ] PRD需求文档、系分、接口设计 ⬜ 22 | - [ ] 根据需求文档、系分、接口设计生产代码 ⬜ 23 | - [ ] 自动测试、自动debugger ⬜ 24 | - [ ] 运维流程接入(ToolLearning)⬜ 25 | - [ ] 全流程自动 ⬜ 26 | - [x] 基于fastchat接入LLM ✅ 27 | - [x] 基于sentencebert接入Text Embedding ✅ 28 | - [x] 向量加载速度提升 ✅ 29 | - [x] Connector ✅ 30 | - [x] 基于langchain的react模式 ✅ 31 | - [x] 基于langchain完成tool检索 ✅ 32 | - [x] Web Crawl 通用能力 ✅ 33 | - [x] 技术文档: 知乎、csdn、阿里云开发者论坛、腾讯云开发者论坛等 ✅ 34 | - [ ] issue document ⬜ 35 | - [ ] SDK Library Document ⬜ 36 | 37 | 38 |

39 | 40 | - v0.0 41 | - [x] Sandbox 环境 ✅ 42 | - [x] 环境隔离的sandbox环境与代码执行 ✅ 43 | - [x] 基于fastchat接入LLM ✅ 44 | - [x] 基于sentencebert接入Text Embedding ✅ 45 | - [x] Web Crawl 通用能力:技术文档: 知乎、csdn、阿里云开发者论坛、腾讯云开发者论坛等 ✅ 46 |
47 | - v0.1 48 | - [x] Sandbox 环境: 上传、下载文件 ✅ 49 | - [x] Vector Database & Retrieval ✅ 50 | - [x] task retrieval ✅ 51 | - [x] tool retrieval ✅ 52 | - [x] Connector ✅ 53 | - [x] 基于langchain的react模式 ✅ 54 | - [x] 基于sentencebert接入Text Embedding: 向量加载速度提升 ✅ 55 | 56 | Done 57 |
58 | 59 | - v0.2 60 | - [x] Prompt Management ✅ 61 | - [x] memory Management ✅ 62 | - [x] Vector Database & Retrieval ✅ 63 | 64 | DDL: 2024.01.31 65 |
66 | 67 | - v0.3 68 | - [x] Sandbox 环境 ✅ 69 | - [ ] 支持java执行环境 ⬜ 70 | - [x] Multi Agent Framework ✅ 71 | - [ ] PRD需求文档、系分、接口设计 ⬜ 72 | - [ ] 根据需求文档、系分、接口设计生产代码 ⬜ 73 | - [ ] 自动测试、自动debugger ⬜ 74 | - [ ] 运维流程接入(ToolLearning) ⬜ 75 | - [ ] 全流程自动 ⬜ 76 | - [x] Web Crawl 通用能力 ✅ 77 | - [ ] issue document ⬜ 78 | - [ ] SDK Library Document ⬜ 79 | 80 | DDL: 2024.12.31 81 |
82 | -------------------------------------------------------------------------------- /sources/readme_docs/start-en.md: -------------------------------------------------------------------------------- 1 | 2 | If you need to deploy a privatized model, please install the NVIDIA driver yourself. 3 | 4 | ### Preparation of Python environment 5 | - It is recommended to use conda to manage the python environment (optional) 6 | ```bash 7 | # Prepare conda environment 8 | conda create --name Codefusegpt python=3.9 9 | conda activate Codefusegpt 10 | ``` 11 | 12 | - Install related dependencies 13 | ```bash 14 | cd Codefuse-ChatBot 15 | pip install -r requirements.txt 16 | ``` 17 | 18 | ### Sandbox Environment Preparation 19 | - Windows Docker installation: 20 | [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/) supports 64-bit versions of Windows 10 Pro with Hyper-V enabled (Hyper-V is not required for versions v1903 and above), or 64-bit versions of Windows 10 Home v1903 and above. 21 | - [【全面详细】Windows10 Docker安装详细教程](https://zhuanlan.zhihu.com/p/441965046) 22 | - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/windows) 23 | - [Handling 'Docker Desktop requires the Server service to be enabled'](https://blog.csdn.net/sunhy_csdn/article/details/106526991) 24 | - [安装wsl或者等报错提示](https://learn.microsoft.com/zh-cn/windows/wsl/install) 25 | 26 | - Linux Docker installation: 27 | Linux installation is relatively simple, please search Baidu/Google for installation guides. 28 | 29 | - Mac Docker installation 30 | - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/mac) 31 | 32 | ```bash 33 | # Build the image for the sandbox environment, see above for notebook version issues 34 | bash docker_build.sh 35 | ``` 36 | 37 | ### Model Download (Optional) 38 | 39 | If you need to use open-source LLM and Embedding models, you can download them from HuggingFace. 40 | Here we take THUDM/chatglm2-6b and text2vec-base-chinese as examples: 41 | 42 | ``` 43 | # install git-lfs 44 | git lfs install 45 | 46 | # install LLM-model 47 | git lfs clone https://huggingface.co/THUDM/chatglm2-6b 48 | cp ~/THUDM/chatglm2-6b ~/codefuse-chatbot/llm_models/ 49 | 50 | # install Embedding-model 51 | git lfs clone https://huggingface.co/shibing624/text2vec-base-chinese 52 | cp ~/shibing624/text2vec-base-chinese ~/codefuse-chatbot/embedding_models/ 53 | ``` 54 | 55 | 56 | 57 | ### Basic Configuration 58 | 59 | ```bash 60 | # Modify the basic configuration for service startup 61 | cd configs 62 | cp model_config.py.example model_config.py 63 | cp server_config.py.example server_config.py 64 | 65 | # model_config#11~12 If you need to use the OpenAI interface, the OpenAI interface key 66 | os.environ["OPENAI_API_KEY"] = "sk-xxx" 67 | # Replace with the api_base_url you need 68 | os.environ["API_BASE_URL"] = "https://api.openai.com/v1" 69 | 70 | # vi model_config#LLM_MODEL The language model you need to choose 71 | LLM_MODEL = "gpt-3.5-turbo" 72 | LLM_MODELs = ["gpt-3.5-turbo"] 73 | 74 | # vi model_config#EMBEDDING_MODEL The private vector model you need to choose 75 | EMBEDDING_ENGINE = 'model' 76 | EMBEDDING_MODEL = "text2vec-base" 77 | 78 | # Example of vector model access, modify model_config#embedding_model_dict 79 | # If the model directory is: 80 | model_dir: ~/codefuse-chatbot/embedding_models/shibing624/text2vec-base-chinese 81 | # Configure as follows 82 | "text2vec-base": "shibing624/text2vec-base-chinese" 83 | 84 | 85 | # vi server_config#8~14, It's recommended to use a container to start the service to prevent environment conflicts when installing other dependencies using the codeInterpreter feature 86 | DOCKER_SERVICE = True 87 | # Whether to use a container sandbox 88 | SANDBOX_DO_REMOTE = True 89 | ``` 90 | 91 | 92 | 93 | ### Starting the Service 94 | 95 | By default, only the webui-related services are started, and fastchat is not started (optional). 96 | 97 | ```bash 98 | # If you need to support the codellama-34b-int4 model, you need to patch fastchat 99 | # cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py 100 | # Modify examples/llm_api.py#258 to kwargs={"gptq_wbits": 4}, 101 | 102 | # start llm-service (optional) 103 | python examples/llm_api.py 104 | ``` 105 | For more LLM integration methods, see[more details...](sources/readme_docs/fastchat-en.md) 106 |
107 | 108 | ```bash 109 | # After completing the server_config.py configuration, you can start with one click 110 | cd examples 111 | python start.py 112 | ``` -------------------------------------------------------------------------------- /sources/readme_docs/start.md: -------------------------------------------------------------------------------- 1 | 2 | 如需使用私有化模型部署,请自行安装 nvidia 驱动程序。 3 | 4 | ### python 环境准备 5 | 6 | - 推荐采用 conda 对 python 环境进行管理(可选) 7 | ```bash 8 | # 准备 conda 环境 9 | conda create --name devopsgpt python=3.9 10 | conda activate devopsgpt 11 | ``` 12 | 13 | - 安装相关依赖 14 | ```bash 15 | cd codefuse-chatbot 16 | # python=3.9,notebook用最新即可,python=3.8用notebook=6.5.6 17 | pip install -r requirements.txt 18 | ``` 19 | 20 | ### 沙盒环境准备 21 | - windows Docker 安装: 22 | [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/) 支持 64 位版本的 Windows 10 Pro,且必须开启 Hyper-V(若版本为 v1903 及以上则无需开启 Hyper-V),或者 64 位版本的 Windows 10 Home v1903 及以上版本。 23 | 24 | - [【全面详细】Windows10 Docker安装详细教程](https://zhuanlan.zhihu.com/p/441965046) 25 | - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/windows) 26 | - [Docker Desktop requires the Server service to be enabled 处理](https://blog.csdn.net/sunhy_csdn/article/details/106526991) 27 | - [安装wsl或者等报错提示](https://learn.microsoft.com/zh-cn/windows/wsl/install) 28 | 29 | - Linux Docker 安装: 30 | Linux 安装相对比较简单,请自行 baidu/google 相关安装 31 | 32 | - Mac Docker 安装 33 | - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/mac) 34 | 35 | ```bash 36 | # 构建沙盒环境的镜像,notebook版本问题见上述 37 | bash docker_build.sh 38 | ``` 39 | 40 | ### 模型下载(可选) 41 | 42 | 如需使用开源 LLM 与 Embedding 模型可以从 HuggingFace 下载。 43 | 此处以 THUDM/chatglm2-6bm 和 text2vec-base-chinese 为例: 44 | 45 | ``` 46 | # install git-lfs 47 | git lfs install 48 | 49 | # install LLM-model 50 | git lfs clone https://huggingface.co/THUDM/chatglm2-6b 51 | cp ~/THUDM/chatglm2-6b ~/codefuse-chatbot/llm_models/ 52 | 53 | # install Embedding-model 54 | git lfs clone https://huggingface.co/shibing624/text2vec-base-chinese 55 | cp ~/shibing624/text2vec-base-chinese ~/codefuse-chatbot/embedding_models/ 56 | ``` 57 | 58 | 59 | ### 基础配置 60 | 61 | ```bash 62 | # 修改服务启动的基础配置 63 | cd configs 64 | cp model_config.py.example model_config.py 65 | cp server_config.py.example server_config.py 66 | 67 | # model_config#11~12 若需要使用openai接口,openai接口key 68 | os.environ["OPENAI_API_KEY"] = "sk-xxx" 69 | # 可自行替换自己需要的api_base_url 70 | os.environ["API_BASE_URL"] = "https://api.openai.com/v1" 71 | 72 | # vi model_config#LLM_MODEL 你需要选择的语言模型 73 | LLM_MODEL = "gpt-3.5-turbo" 74 | LLM_MODELs = ["gpt-3.5-turbo"] 75 | 76 | # vi model_config#EMBEDDING_MODEL 你需要选择的私有化向量模型 77 | EMBEDDING_ENGINE = 'model' 78 | EMBEDDING_MODEL = "text2vec-base" 79 | 80 | # 向量模型接入示例,修改 model_config#embedding_model_dict 81 | # 若模型地址为: 82 | model_dir: ~/codefuse-chatbot/embedding_models/shibing624/text2vec-base-chinese 83 | # 配置如下 84 | "text2vec-base": "shibing624/text2vec-base-chinese" 85 | 86 | # vi server_config#8~14, 推荐采用容器启动服务 87 | DOCKER_SERVICE = True 88 | # 是否采用容器沙箱 89 | SANDBOX_DO_REMOTE = True 90 | ``` 91 | 92 | ### 启动服务 93 | 94 | 默认只启动webui相关服务,未启动fastchat(可选)。 95 | ```bash 96 | # 若需要支撑codellama-34b-int4模型,需要给fastchat打一个补丁 97 | # cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py 98 | # examples/llm_api.py#258 修改为 kwargs={"gptq_wbits": 4}, 99 | 100 | # start llm-service(可选) 101 | python examples/llm_api.py 102 | ``` 103 | 更多LLM接入方法见[详情...](sources/readme_docs/fastchat.md) 104 |
105 | 106 | ```bash 107 | # 完成server_config.py配置后,可一键启动 108 | cd examples 109 | python start.py 110 | ``` -------------------------------------------------------------------------------- /tests/file_test.py: -------------------------------------------------------------------------------- 1 | import requests, os, sys 2 | # src_dir = os.path.join( 3 | # os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 4 | # ) 5 | # sys.path.append(src_dir) 6 | 7 | # from dev_opsgpt.utils.common_utils import st_load_file 8 | # from dev_opsgpt.sandbox.pycodebox import PyCodeBox 9 | # from examples.file_fastapi import upload_file, download_file 10 | # from pathlib import Path 11 | # import httpx 12 | # from loguru import logger 13 | # from io import BytesIO 14 | 15 | 16 | # def _parse_url(url: str, base_url: str) -> str: 17 | # if (not url.startswith("http") 18 | # and base_url 19 | # ): 20 | # part1 = base_url.strip(" /") 21 | # part2 = url.strip(" /") 22 | # return f"{part1}/{part2}" 23 | # else: 24 | # return url 25 | 26 | # base_url: str = "http://127.0.0.1:7861" 27 | # timeout: float = 60.0, 28 | # url = "/files/upload" 29 | # url = _parse_url(url, base_url) 30 | # logger.debug(url) 31 | # kwargs = {} 32 | # kwargs.setdefault("timeout", timeout) 33 | 34 | # import asyncio 35 | # file = "./torch_test.py" 36 | # upload_filename = st_load_file(file, filename="torch_test.py") 37 | # asyncio.run(upload_file(upload_filename)) 38 | 39 | import requests 40 | url = "http://127.0.0.1:7862/sdfiles/download?filename=torch_test.py&save_filename=torch_test.py" 41 | r = requests.get(url) 42 | print(type(r.text)) -------------------------------------------------------------------------------- /tests/torch_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | print(torch.__version__) 3 | print(torch.cuda.is_available()) -------------------------------------------------------------------------------- /web_crawler/main_test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from utils.WebCrawler import WebCrawler 3 | 4 | logging.basicConfig(level=logging.INFO) 5 | 6 | if __name__ == '__main__': 7 | # 保存地址,分别保存html源文件、处理后text文件 8 | html_dir = "data/html/tmp_csdn_122513786_html.jsonl" 9 | text_dir = "data/text/tmp_csdn_122513786_text.jsonl" 10 | # 下载网页数据 11 | # https://www.langchain.asia/ 12 | # https://blog.csdn.net/weixin_43791511/article/details/122513786 13 | # https://zhuanlan.zhihu.com/p/645400277 14 | # https://www.aliyun.com/?utm_content=se_1014243503 15 | # 'https://cloud.tencent.com/developer/article/1004500?from=15425' 16 | base_url = 'https://www.langchain.asia/' 17 | # 爬取方式: 18 | ## requests和selenium两种方式;requests为简单请求静态网址html内容,js动态数据无法获取; 19 | ## selenium为模拟人行为请求,可获取全部html数据,但请求时间较长10-20s单网页,尽量设置5s以上的time_sleep。 20 | reptile_lib = "requests" 21 | method = "get" # 目前只支持get请求 22 | time_sleep = 4 # 每两次请求间隔时间s 23 | wc = WebCrawler() 24 | # 爬取base_url单网址 25 | wc.webcrawler_single(html_dir=html_dir, 26 | text_dir=text_dir, 27 | base_url=base_url, 28 | reptile_lib=reptile_lib, 29 | method=method, 30 | time_sleep=time_sleep 31 | ) 32 | 33 | # # 爬取base_url页面所有网址,限制target_url_prefix为前缀,默认target_url_prefix=base_url 34 | # wc.webcrawler_1_degree(html_dir=html_dir, 35 | # text_dir=text_dir, 36 | # base_url=base_url, 37 | # reptile_lib=reptile_lib, 38 | # method=method, 39 | # time_sleep=time_sleep 40 | # ) 41 | -------------------------------------------------------------------------------- /web_crawler/utils/DocTokenizer.py: -------------------------------------------------------------------------------- 1 | 2 | import re 3 | 4 | # 非打印字符 5 | NON_PRINTING_CHARS_RE = re.compile( 6 | f"[{''.join(map(chr, list(range(0, 32)) + list(range(127, 160))))}]" 7 | ) 8 | 9 | class DocTokenizer(): 10 | ''' 11 | 文档text处理器。 12 | ''' 13 | 14 | def __init__(self): 15 | pass 16 | 17 | def doc_process(self, text): 18 | ''' 19 | 去除多余换行、去掉每行非打印字符和开头结尾空格 20 | ''' 21 | # 去除多余换行 22 | text = self.remove_excess_lines(text) 23 | # 将文本拆分成行 24 | lines = text.split("\n") 25 | # 去掉每一行的开头和结尾的空格 26 | lines = [self.remove_non_printing_char_line( 27 | line.strip()) for line in lines] 28 | # 将行重新组合成文本 29 | text_new = "\n".join(lines) 30 | return text_new 31 | 32 | def remove_excess_lines(self, text): 33 | ''' 34 | 将2个以上的换行符替换为2个,html解析text时会产生大量换行\n 35 | ''' 36 | pattern = r'\n\n+' 37 | return re.sub(pattern, '\n\n', text) 38 | 39 | def remove_non_printing_char_line(self, text): 40 | ''' 41 | 去除每一行的非打印字符 42 | ''' 43 | return NON_PRINTING_CHARS_RE.sub("", text) 44 | -------------------------------------------------------------------------------- /web_crawler/utils/Html2Text.py: -------------------------------------------------------------------------------- 1 | import time 2 | from bs4 import BeautifulSoup 3 | import logging 4 | import json 5 | import os 6 | from tqdm import tqdm 7 | import re 8 | from .DocTokenizer import DocTokenizer 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | 13 | class Html2Text(): 14 | '''从html中提取text文本内容。 15 | ''' 16 | 17 | def __init__(self): 18 | pass 19 | 20 | def html2text(self, 21 | target_content_tag={}, 22 | target_tag_list=[], 23 | html_dir=None, 24 | text_dir=None, 25 | mode="w", 26 | is_get_all_text=False 27 | ): 28 | ''' 29 | 从html中提取text文本内容,需要指定提取html中的tag标签。输入为地址,html文件保存在jsonl文件中,输出也需要指定地址。 30 | :param target_content_tag: html中正文content所在tag,字典格式限制长度为1,key为选中便签类型name/class/id,vaule为标签取值如div/title/article等 31 | :param target_tag_list: 指定提取html对应的tag文本,列表,每个元素都与target_content_tag格式相同 32 | :param is_get_all_text: True则将html页面所有text内容保存到all_text字典中;False不保存all_text 33 | :param html_dir: html数据地址,注意需要时jsonl格式,一行为一个json字典,有text/url/host_url三个字段 34 | :param text_dir: 将提取的text内容保存的地址,同样是jsonl格式。 35 | :return: None 36 | ''' 37 | assert isinstance(target_content_tag,dict), "target_content_tag请输入字典格式!" 38 | assert len(target_content_tag.keys()) <= 1,"target_content_tag属性字典只能指定唯一元素!" 39 | for _ in target_tag_list: 40 | assert isinstance(_, dict), "target_tag_list列表元素需要字典格式!" 41 | assert len(_.keys()) <= 1, "target_tag_list列表中的属性字典只能指定唯一元素!" 42 | # 创建保存目录 43 | os.makedirs(os.path.dirname(text_dir), exist_ok=True) 44 | # 读取文件 45 | logging.info("读取文件中……") 46 | html_dict_list = self.read_html_jsonl(html_dir) 47 | url_nums = len(html_dict_list) 48 | logging.info("共{url_nums}个html网址".format(url_nums=url_nums)) 49 | # 循环处理每行html数据:html提取content正文、指定tag内容 50 | text_dict_list = [] 51 | for html_dict in tqdm(html_dict_list, mininterval=1): 52 | # 是否获取全部text内容 53 | text_dict = self.get_text_dict( 54 | html_dict=html_dict, 55 | target_content_tag=target_content_tag, 56 | target_tag_list=target_tag_list, 57 | is_get_all_text=is_get_all_text 58 | ) 59 | text_dict_list.append(text_dict) 60 | logging.info("保存html提取的text内容……") 61 | self.save_text_jsonl(json_list=text_dict_list, 62 | file_path=text_dir, 63 | mode=mode) 64 | logging.info("保存成功!地址:%s" % text_dir) 65 | 66 | def get_text_dict(self, 67 | html_dict={}, 68 | target_content_tag={}, 69 | target_tag_list=[], 70 | is_get_all_text=True 71 | ): 72 | '''{"name":"div"} 73 | 提取html网页字符中的纯文本内容,采用BeautifulSoup.get_text()获取全部text文本,target_tag_list指定要提取文本的标签。 74 | :param html_dict: 网页返回的全部文本内容response.text和url 75 | :param target_content_tag: html中正文content所在tag,字典格式限制长度为1,key为选中便签类型name/class/id,vaule为标签取值如div/title/article等 76 | :param target_tag_list: 指定提取html对应的tag文本,列表,每个元素都与target_content_tag格式相同 77 | :return: text_content:{} 提取的text文本内容 78 | ''' 79 | # 格式定义 80 | assert isinstance(target_content_tag,dict), "target_content_tag请输入字典格式!" 81 | assert len(target_content_tag.keys()) <= 1,"target_content_tag属性字典只能指定唯一元素!" 82 | for _ in target_tag_list: 83 | assert isinstance(_, dict), "target_tag_list列表元素需要字典格式!" 84 | assert len(_.keys()) <= 1, "target_tag_list列表中的属性字典只能指定唯一元素!" 85 | # 提取html的内容 86 | html_content = html_dict['text'] 87 | url = html_dict['url'] 88 | host_url = html_dict['host_url'] 89 | # 创建BeautifulSoup对象 90 | soup = BeautifulSoup(html_content, 'html.parser') 91 | # 处理pre引用代码块,添```引用 92 | pre_tags = soup.find_all('code') 93 | for pre_tag in pre_tags: 94 | pre_tag.string = '\n```code\n' + pre_tag.get_text() + '\n```\n' 95 | # 提取HTML中的文本内容 96 | doc_tokenizer = DocTokenizer() 97 | text_dict = {} 98 | text_dict['url'] = url 99 | text_dict['host_url'] = host_url 100 | # 提取网页的title,不存在则置空 101 | try: 102 | text_dict['title'] = soup.title.text 103 | except: 104 | text_dict['title'] = None 105 | # 是否提取全部text,不区分标签 106 | if is_get_all_text: 107 | all_text = soup.get_text(separator="", strip=False) 108 | text_dict['all_text'] = doc_tokenizer.doc_process(all_text) 109 | # 提取正文tag,可以按照标签的class提取,或按照tag名提取 110 | if target_content_tag: 111 | text_dict["content"] = self.soup_find_all_text(soup=soup,doc_tokenizer=doc_tokenizer,attrs=target_content_tag) 112 | # 提取html中tag内容,每个tag独立作为字段保存 113 | for target_tag in target_tag_list: 114 | if target_tag: 115 | # 提取目标tag名 116 | tag_ = list(target_tag.values())[0] 117 | # 提取目标tag内容 118 | text_dict[tag_] = self.soup_find_all_text(soup,doc_tokenizer,attrs=target_tag) 119 | return text_dict 120 | 121 | def soup_find_all_text(self,soup,doc_tokenizer,attrs): 122 | assert isinstance(attrs,dict), "attrs请输入字典格式!" 123 | assert len(attrs.keys()) == 1,"attrs属性字典只能指定唯一元素!" 124 | if list(attrs.keys())[0]=="name": 125 | _tags = soup.find_all(name=attrs["name"]) 126 | else: 127 | _tags = soup.find_all(attrs=attrs) 128 | tags_text = "" 129 | for _tag in _tags: 130 | tag_text = _tag.get_text(separator="", strip=False) 131 | tag_text = doc_tokenizer.doc_process(tag_text) 132 | tags_text += tag_text.strip() + "\n\n" 133 | return tags_text 134 | 135 | def read_html_jsonl(self, file_name=None): 136 | ''' 137 | 读取html的josnl文件 138 | ''' 139 | html_dict_list = [] 140 | with open(file_name, "r", encoding="utf-8") as f: 141 | for k, line in enumerate(f): 142 | line = json.loads(line) 143 | html_dict_list.append(line) 144 | return html_dict_list 145 | 146 | def save_text_jsonl(self, json_list=[], file_path=None, mode="w"): 147 | ''' 148 | 将json_list保存成jsonl格式文件 149 | ''' 150 | with open(file_path, mode, encoding="utf-8") as f: 151 | for line in json_list: 152 | f.write(json.dumps(line, ensure_ascii=False) + "\n") 153 | -------------------------------------------------------------------------------- /web_crawler/utils/WebCrawler.py: -------------------------------------------------------------------------------- 1 | from .WebHtmlExtractor import WebHtmlExtractor 2 | import logging 3 | from .Html2Text import Html2Text 4 | 5 | 6 | class WebCrawler(): 7 | '''爬取url内容,分为requests和selenium两种方式;selenium需提前下载chrome浏览器与chromedriver,并配置路径。 8 | 安装selenium模拟访问网站,需安装并调试chromedriver,版本与电脑chrome需一致,且正确配置路径。mac电脑路径:打开finder,再按command+shift+G进入/usr/local/bin;windows可配置路径。 9 | ''' 10 | 11 | def __init__(self): 12 | pass 13 | 14 | def webcrawler_single(self, 15 | html_dir=None, 16 | text_dir=None, 17 | base_url=None, 18 | reptile_lib="requests", 19 | method="get", 20 | mode="w", 21 | time_sleep=4, 22 | time_out=10, 23 | target_content_tag={}, 24 | target_tag_list=[] 25 | ): 26 | ''' 27 | 爬取base_url页网址,分别保存html与解析处理的text 28 | :param html_dir: 保存html地址,jsonl文件 29 | :param text_dir: 将提取的text内容保存的地址,同样是jsonl格式。 30 | :param base_url: 目标网址 31 | :param reptile_lib: requests和selenium两种方式;requests为简单请求静态网址html内容,js动态数据无法获取;selenium为模拟人行为请求,可获取全部html数据,但请求时间较长,尽量设置5s以上的time_sleep,selenium需提前下载chrome浏览器与chromedriver,并配置路径。。 32 | :param method: requests请求有get/post两种,selenium只支持get 33 | :param time_sleep: 等待时间s 34 | :param time_out: 超时时长s 35 | :param target_content_tag: html中正文content所在tag,字典格式限制长度为1,key为选中便签类型name/class/id,vaule为标签取值如div/title/article等 36 | :param target_tag_list: 指定提取html对应的tag文本,列表,每个元素都与target_content_tag格式相同 37 | :return: None 38 | ''' 39 | assert method=="get","只支持get请求!" 40 | # 发送请求获取base_url结果:包含相关页面全部网址 41 | whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out) 42 | whe.save_url_html(base_url=base_url, reptile_lib=reptile_lib, method=method, html_dir=html_dir, mode=mode) 43 | # 读取文件 44 | h2t = Html2Text() 45 | # 读取并处理,只按照指定tag获取text,不获取全部text内容 46 | h2t.html2text(target_content_tag=target_content_tag, 47 | target_tag_list=target_tag_list, 48 | html_dir=html_dir, 49 | text_dir=text_dir, 50 | mode="w", 51 | is_get_all_text=True) 52 | 53 | def webcrawler_batch(self, 54 | html_dir=None, 55 | text_dir=None, 56 | target_url_list=[], 57 | reptile_lib="requests", 58 | method="get", 59 | mode="w", 60 | time_sleep=4, 61 | time_out=10, 62 | target_content_tag={}, 63 | target_tag_list=[] 64 | ): 65 | ''' 66 | 爬取base_url页网址,分别保存html与解析处理的text 67 | :param html_dir: 保存html地址,jsonl文件 68 | :param text_dir: 将提取的text内容保存的地址,同样是jsonl格式。 69 | :param base_url: 目标网址 70 | :param reptile_lib: requests和selenium两种方式;requests为简单请求静态网址html内容,js动态数据无法获取;selenium为模拟人行为请求,可获取全部html数据,但请求时间较长,尽量设置5s以上的time_sleep,selenium需提前下载chrome浏览器与chromedriver,并配置路径。。 71 | :param method: requests请求有get/post两种,selenium只支持get 72 | :param time_sleep: 等待时间s 73 | :param time_out: 超时时长s 74 | :param target_content_tag: html中正文content所在tag,字典格式限制长度为1,key为选中便签类型name/class/id,vaule为标签取值如div/title/article等 75 | :param target_tag_list: 指定提取html对应的tag文本,列表,每个元素都与target_content_tag格式相同 76 | :return: None 77 | ''' 78 | assert method=="get","只支持get请求!" 79 | # 发送请求获取base_url结果:包含相关页面全部网址 80 | whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out) 81 | # 循环调用 82 | try: 83 | for k,url in enumerate(target_url_list): 84 | mode_batch = mode if k==0 else "a" 85 | whe.save_url_html(base_url=url, reptile_lib=reptile_lib, method=method, html_dir=html_dir, mode=mode_batch) 86 | except: 87 | logging.warning("爬取停止!") 88 | # html中提取text信息,并对doc做基础处理 89 | h2t = Html2Text() 90 | h2t.html2text(target_content_tag=target_content_tag, 91 | target_tag_list=target_tag_list, 92 | html_dir=html_dir, 93 | text_dir=text_dir, 94 | mode="w", 95 | is_get_all_text=True) 96 | 97 | def webcrawler_1_degree(self, 98 | html_dir=None, 99 | text_dir=None, 100 | base_url=None, 101 | reptile_lib="requests", 102 | method="get", 103 | mode="w", 104 | time_sleep=4, 105 | time_out=10, 106 | target_content_tag={}, 107 | target_tag_list=[], 108 | target_url_prefix=None 109 | ): 110 | ''' 111 | 爬取base_url页面所有网址,限制target_url_prefix为前缀,默认target_url_prefix=base_url,分别保存html与解析处理的text。 112 | :param html_dir: 保存html地址,jsonl文件 113 | :param text_dir: 将提取的text内容保存的地址,同样是jsonl格式。 114 | :param base_url: 目标站点 115 | :param target_url_prefix: 基于base_url网址,1度跳转链接 且 以target_url_prefix开头。默认为target_url_prefix=base_url(请求返回的当前网址url,中文会自动转为编码)。 116 | :param reptile_lib: requests和selenium两种方式;requests为简单请求静态网址html内容,js动态数据无法获取;selenium为模拟人行为请求,可获取全部html数据,但请求时间较长,尽量设置5s以上的time_sleep。 117 | :param method: requests请求有get/post两种,selenium只支持get 118 | :param time_sleep: 等待时间s 119 | :param time_out: 超时时长s 120 | :param target_content_tag: html中正文content所在tag,字典格式限制长度为1,key为选中便签类型name/class/id,vaule为标签取值如div/title/article等 121 | :param target_tag_list: 指定提取html对应的tag文本,列表,每个元素都与target_content_tag格式相同 122 | :return: None 123 | ''' 124 | assert method == "get", "只支持get请求!" 125 | # 发送请求获取base_url结果:包含相关页面全部网址 126 | whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out) 127 | try: 128 | whe.save_1_jump_url_in_base(base_url=base_url, target_url_prefix=target_url_prefix, reptile_lib=reptile_lib, 129 | method=method, html_dir=html_dir, mode=mode) 130 | except: 131 | logging.warning("爬取停止!") 132 | # 读取文件 133 | h2t = Html2Text() 134 | # 读取并处理,只按照指定tag获取text,不获取全部text内容 135 | h2t.html2text(target_content_tag=target_content_tag, 136 | target_tag_list=target_tag_list, 137 | html_dir=html_dir, 138 | text_dir=text_dir, 139 | mode="w", 140 | is_get_all_text=True) 141 | --------------------------------------------------------------------------------