├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug.yml
    │   ├── documentation.yml
    │   └── features.yml
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── Dockerfile
├── LEGAL.md
├── LICENSE.md
├── README.md
├── README_en.md
├── configs
    ├── default_config.py
    ├── model_config.py.example
    ├── server_config.py.example
    └── utils.py
├── docker_build.sh
├── env_start.sh
├── examples
    ├── agent_examples
    │   ├── baseGroupPhase_example.py
    │   ├── baseTaskPhase_example.py
    │   ├── codeChatPhaseLocal_example.py
    │   ├── codeChatPhase_example.py
    │   ├── codeGenDoc_example.py
    │   ├── codeGenTestCases_example.py
    │   ├── codeReactPhase_example.py
    │   ├── codeRetrieval_example.py
    │   ├── codeToolReactPhase_example.py
    │   ├── docChatPhase_example.py
    │   ├── metagpt_phase_example.py
    │   ├── searchChatPhase_example.py
    │   └── toolReactPhase_example.py
    ├── api.py
    ├── auto_examples
    │   ├── agentchat_RetrievalChat.py
    │   ├── agentchat_function_call.py
    │   ├── agentchat_teachability.py
    │   ├── agentchat_teaching.py
    │   ├── agentchat_web_info.py
    │   └── auto_feedback_from_code_execution.py
    ├── gptq.py
    ├── llm_api.py
    ├── model_workers
    │   ├── SparkApi.py
    │   ├── __init__.py
    │   ├── azure.py
    │   ├── baichuan.py
    │   ├── base.py
    │   ├── fangzhou.py
    │   ├── minimax.py
    │   ├── openai.py
    │   ├── qianfan.py
    │   ├── qwen.py
    │   ├── tiangong.py
    │   ├── xinghuo.py
    │   └── zhipu.py
    ├── sdfile_api.py
    ├── start.py
    ├── start.sh
    ├── stop.py
    ├── utils.py
    ├── webui.py
    ├── webui
    │   ├── __init__.py
    │   ├── code.py
    │   ├── dialogue.py
    │   ├── document.py
    │   ├── prompt.py
    │   ├── utils.py
    │   └── yamls
    │   │   ├── webui_en.yaml
    │   │   └── webui_zh.yaml
    └── webui_config.py
├── nltk_data
    ├── corpora
    │   └── cmudict
    │   │   ├── README
    │   │   └── cmudict
    ├── taggers
    │   └── averaged_perceptron_tagger
    │   │   └── averaged_perceptron_tagger.pickle
    └── tokenizers
    │   └── punkt
    │       ├── PY3
    │           ├── README
    │           ├── czech.pickle
    │           ├── danish.pickle
    │           ├── dutch.pickle
    │           ├── english.pickle
    │           ├── estonian.pickle
    │           ├── finnish.pickle
    │           ├── french.pickle
    │           ├── german.pickle
    │           ├── greek.pickle
    │           ├── italian.pickle
    │           ├── malayalam.pickle
    │           ├── norwegian.pickle
    │           ├── polish.pickle
    │           ├── portuguese.pickle
    │           ├── russian.pickle
    │           ├── slovene.pickle
    │           ├── spanish.pickle
    │           ├── swedish.pickle
    │           └── turkish.pickle
    │       ├── README
    │       ├── czech.pickle
    │       ├── danish.pickle
    │       ├── dutch.pickle
    │       ├── english.pickle
    │       ├── estonian.pickle
    │       ├── finnish.pickle
    │       ├── french.pickle
    │       ├── german.pickle
    │       ├── greek.pickle
    │       ├── italian.pickle
    │       ├── malayalam.pickle
    │       ├── norwegian.pickle
    │       ├── polish.pickle
    │       ├── portuguese.pickle
    │       ├── russian.pickle
    │       ├── slovene.pickle
    │       ├── spanish.pickle
    │       ├── swedish.pickle
    │       └── turkish.pickle
├── requirements.txt
├── sources
    ├── docs
    │   ├── langchain_asia.jsonl
    │   ├── langchain_text_10.jsonl
    │   └── python_langchain_com_docs_get_started_introduction_text.jsonl
    ├── docs_imgs
    │   ├── BaseAgent.png
    │   ├── agent-flow.png
    │   ├── devops-chatbot-module-v2.png
    │   ├── devops-chatbot-module.png
    │   ├── devopsgpt_example.png
    │   ├── devopsgpt_example2.png
    │   ├── luban.png
    │   ├── objective.png
    │   ├── objective_v4.png
    │   ├── roadmap.png
    │   ├── roadmap2.png
    │   ├── webui_config.png
    │   └── wechat.png
    ├── imgs
    │   ├── devops-chatbot.png
    │   ├── devops-chatbot2.png
    │   ├── docker_logs.png
    │   └── fastapi_docs_020_0.png
    ├── readme_docs
    │   ├── coagent
    │   │   ├── agent-flow-en.md
    │   │   ├── agent-flow.md
    │   │   ├── coagent-en.md
    │   │   ├── coagent.md
    │   │   ├── connector
    │   │   │   ├── connector_agent.md
    │   │   │   ├── connector_chain.md
    │   │   │   ├── connector_memory.md
    │   │   │   ├── connector_phase.md
    │   │   │   ├── connector_prompt.md
    │   │   │   └── customed_examples.md
    │   │   ├── quick-start-en.md
    │   │   └── quick-start.md
    │   ├── contribution
    │   │   ├── contribute_guide.md
    │   │   └── contribute_guide_en.md
    │   ├── fastchat-en.md
    │   ├── fastchat.md
    │   ├── roadmap-en.md
    │   ├── roadmap.md
    │   ├── start-en.md
    │   └── start.md
    └── tool_datas
    │   └── stock.json
├── tests
    ├── file_test.py
    └── torch_test.py
└── web_crawler
    ├── data
        ├── html
        │   └── test_langchain_html.jsonl
        └── text
        │   └── test_langchain_text.jsonl
    ├── main_test.py
    └── utils
        ├── DocTokenizer.py
        ├── Html2Text.py
        ├── WebCrawler.py
        └── WebHtmlExtractor.py


/.github/ISSUE_TEMPLATE/bug.yml:
--------------------------------------------------------------------------------
  1 | name: "\U0001F41B Bug Report"
  2 | description: Report a bug in Codefuse. To report a security issue, please instead use the security option below. 
  3 | labels: ["01 Bug Report"]
  4 | body:
  5 |   - type: markdown
  6 |     attributes:
  7 |       value: >
  8 |         Thank you for taking the time to file a bug report.  
  9 |         
 10 |         Use this to report bugs in Codefuse. 
 11 | 
 12 |         If you're not certain that your issue is due to a bug in Codefuse, please use [GitHub Discussions](https://github.com/codefuse-ai/codefuse-chatbot/discussions)
 13 |         to ask for help with your issue.
 14 | 
 15 |         We warmly welcome any suggestions, opinions (including criticisms), comments, and contributions to the Codefuse project.
 16 | 
 17 |         Relevant links to check before filing a bug report to see if your issue has already been reported, fixed or
 18 |         if there's another way to solve your problem:
 19 |       
 20 |         [API Reference](https://codefuse-ai.github.io/),
 21 |         [GitHub search](https://github.com/codefuse-ai/codefuse-chatbot),
 22 |         [Chatbot Github Discussions](https://github.com/codefuse-ai/codefuse-chatbot/discussions),
 23 |         [Chatbot Github Issues](https://github.com/codefuse-ai/codefuse-chatbot/issues)
 24 | 
 25 |   - type: checkboxes
 26 |     id: checks
 27 |     attributes:
 28 |       label: Checked other resources
 29 |       description: Please confirm and check all the following options.
 30 |       options:
 31 |         - label: I searched the Codefuse documentation with the integrated search.
 32 |           required: true
 33 |         - label: I used the GitHub search to find a similar question and didn't find it.
 34 |           required: true
 35 |         - label: I am sure that this is a bug in Codefuse-Repos rather than my code.
 36 |           required: true
 37 |         - label: I added a very descriptive title to this issue.
 38 |           required: true
 39 | 
 40 |   - type: dropdown
 41 |     id: system-info
 42 |     attributes:
 43 |       label: System Info
 44 |       description: >
 45 |         Please select the operating system you were using to run codefuse-ai/repos when this problem occurred.
 46 |       options:
 47 |         - Windows
 48 |         - Linux
 49 |         - MacOS
 50 |         - Docker
 51 |         - Devcontainer / Codespace
 52 |         - Windows Subsystem for Linux (WSL)
 53 |         - Other
 54 |     validations:
 55 |       required: true
 56 |       nested_fields:
 57 |         - type: text
 58 |           attributes:
 59 |             label: Specify the system
 60 |             description: Please specify the system you are working on.
 61 | 
 62 |   - type: dropdown
 63 |     attributes:
 64 |       label: Code Version
 65 |       description: |
 66 |         Please select which version of Codefuse-Repos you were using when this issue occurred.
 67 |         **If you weren't please try with the **.
 68 |         If installed with git you can run `git branch` to see which version of codefuse-ai you are running.
 69 |       options:
 70 |         - Latest Release
 71 |         - Stable (branch)
 72 |         - Master (branch)
 73 |     validations:
 74 |       required: true
 75 | 
 76 |   - type: textarea
 77 |     id: description
 78 |     attributes:
 79 |       label: Description
 80 |       description: |
 81 |         What is the problem, question, or error?
 82 | 
 83 |         Write a short description telling what you are doing, what you expect to happen, and what is currently happening.
 84 |       placeholder: |
 85 |         * I'm trying to use the `coagent` library to do X.
 86 |         * I expect to see Y.
 87 |         * Instead, it does Z.
 88 |     validations:
 89 |       required: true
 90 | 
 91 |   - type: textarea
 92 |     id: reproduction
 93 |     validations:
 94 |       required: true
 95 |     attributes:
 96 |       label: Example Code
 97 |       description: |
 98 |         Please add a self-contained, [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example) with your use case.
 99 |         
100 |         If a maintainer can copy it, run it, and see it right away, there's a much higher chance that you'll be able to get help.
101 |         
102 |         **Important!** 
103 |         
104 |         * Use code tags (e.g., ```python ... ```) to correctly [format your code](https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting).
105 |         * INCLUDE the language label (e.g. `python`) after the first three backticks to enable syntax highlighting. (e.g., ```python rather than ```).
106 |         * Reduce your code to the minimum required to reproduce the issue if possible. This makes it much easier for others to help you.
107 |         * Avoid screenshots when possible, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
108 | 
109 |       placeholder: |
110 |         The following code: 
111 |         
112 |         ```python
113 |         from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
114 |         from coagent.connector.phase import BasePhase
115 |         from coagent.connector.schema import Message
116 | 
117 |         phase_name = "baseGroupPhase"
118 |         phase = BasePhase(
119 |             phase_name, embed_config=embed_config, llm_config=llm_config, 
120 |         )
121 | 
122 |         query_content = "确认本地是否存在employee_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
123 |         query = Message(
124 |             role_name="human", role_type="user", tools=[],
125 |             role_content=query_content, input_query=query_content, origin_query=query_content,
126 |             )
127 | 
128 |         output_message, output_memory = phase.step(query)
129 |         ```
130 | 
131 |   - type: textarea
132 |     id: error
133 |     validations:
134 |       required: false
135 |     attributes:
136 |       label: Error Message and Stack Trace (if applicable)
137 |       description: |
138 |         If you are reporting an error, please include the full error message and stack trace.
139 |       placeholder: |
140 |         Exception + full stack trace
141 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | description: Report an issue related to the Codefuse documentation.
 3 | title: "DOC: <Please write a comprehensive title after the 'DOC: ' prefix>"
 4 | labels: [02 - Documentation]
 5 | 
 6 | body:
 7 | - type: textarea
 8 |   attributes: 
 9 |     label: "Issue with current documentation:"
10 |     description: >
11 |       Please make sure to leave a reference to the document/code you're
12 |       referring to.
13 | 
14 | - type: textarea
15 |   attributes:
16 |     label: "Idea or request for content:"
17 |     description: >
18 |       Please describe as clearly as possible what topics you think are missing
19 |       from the current documentation.


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/features.yml:
--------------------------------------------------------------------------------
 1 | name: Feature request 🚀
 2 | description: Suggest a new idea for Codefuse!
 3 | labels: ['03 New Features']
 4 | body:
 5 |   - type: markdown
 6 |     attributes:
 7 |       value: |
 8 |         First, check out our [wiki page on Contributing](https://github.com/Significant-Gravitas/Nexus/wiki/Contributing)
 9 |         Please provide a searchable summary of the issue in the title above ⬆️.
10 | 
11 |   - type: checkboxes
12 |     id: checks
13 |     attributes:
14 |       label: Checked other resources
15 |       description: Please confirm and check all the following options.
16 |       options:
17 |         - label: I searched the Codefuse documentation with the integrated search.
18 |           required: true
19 |         - label: I used the GitHub search to find a similar question and didn't find it.
20 |           required: true
21 | 
22 |   - type: textarea
23 |     attributes:
24 |       label: Summary 💡
25 |       description: Describe how it should work.
26 | 
27 |   - type: textarea
28 |     attributes:
29 |       label: Examples 🌈
30 |       description: Provide a link to other implementations, or screenshots of the expected behavior.
31 |     
32 |   - type: textarea
33 |     attributes:
34 |       label: Motivation 🔦
35 |       description: What are you trying to accomplish? How has the lack of this feature affected you? Providing context helps us come up with a solution that is more useful in the real world.


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/.github/PULL_REQUEST_TEMPLATE.md


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/__pycache__
 2 | knowledge_base
 3 | logs
 4 | llm_models
 5 | embedding_models
 6 | jupyter_work
 7 | model_config.py
 8 | server_config.py
 9 | internal_start.py
10 | code_base
11 | .DS_Store
12 | .idea
13 | data
14 | .pyc
15 | tests
16 | *egg-info
17 | build
18 | dist
19 | package.sh
20 | local_config.json
21 | muagent*


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | From python:3.9.18-bookworm
 2 | 
 3 | WORKDIR /home/user
 4 | 
 5 | COPY ./requirements.txt /home/user/docker_requirements.txt
 6 | 
 7 | 
 8 | # RUN apt-get update
 9 | # RUN apt-get install -y iputils-ping telnetd net-tools vim tcpdump
10 | # RUN echo telnet stream tcp nowait telnetd /usr/sbin/tcpd /usr/sbin/in.telnetd /etc/inetd.conf
11 | # RUN service inetutils-inetd start
12 | # service inetutils-inetd status
13 | 
14 | RUN wget https://oss-cdn.nebula-graph.com.cn/package/3.6.0/nebula-graph-3.6.0.ubuntu1804.amd64.deb
15 | RUN dpkg -i nebula-graph-3.6.0.ubuntu1804.amd64.deb
16 | 
17 | RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
18 | RUN pip install -r /home/user/docker_requirements.txt
19 | 
20 | CMD ["bash"]
21 | 


--------------------------------------------------------------------------------
/LEGAL.md:
--------------------------------------------------------------------------------
1 | Legal Disclaimer
2 | 
3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4 | 
5 | 法律免责声明
6 | 
7 | 关于代码注释部分，中文注释为官方版本，其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致，当中文注释与其它语言注释存在不一致时，请以中文注释为准。


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="left">
  2 |     <a>中文</a>&nbsp ｜ &nbsp<a href="README_en.md">English&nbsp </a>
  3 | </p>
  4 | 
  5 | # <p align="center">CodeFuse-ChatBot: Development by Private Knowledge Augmentation</p>
  6 | 
  7 | <p align="center">
  8 |     <a href="README.md"><img src="https://img.shields.io/badge/文档-中文版-yellow.svg" alt="ZH doc"></a>
  9 |     <a href="README_en.md"><img src="https://img.shields.io/badge/document-English-yellow.svg" alt="EN doc"></a>
 10 |     <img src="https://img.shields.io/github/license/codefuse-ai/codefuse-chatbot" alt="License">
 11 |     <a href="https://github.com/codefuse-ai/codefuse-chatbot/issues">
 12 |       <img alt="Open Issues" src="https://img.shields.io/github/issues-raw/codefuse-ai/codefuse-chatbot" />
 13 |     </a>
 14 |     <br><br>
 15 | </p>
 16 | 
 17 | CodeFuse-ChatBot是由蚂蚁CodeFuse团队开发的开源AI智能助手，致力于简化和优化软件开发生命周期中的各个环节。该项目结合了Multi-Agent的协同调度机制，并集成了丰富的工具库、代码库、知识库和沙盒环境，使得LLM模型能够在DevOps领域内有效执行和处理复杂任务。
 18 | 
 19 | 
 20 | ## 🔔 更新
 21 | - [2024.01.29] 开放可配置化的multi-agent框架：codefuse-muAgent，详情见[使用说明](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent)
 22 | - [2023.12.26] 基于FastChat接入开源私有化大模型和大模型接口的能力开放
 23 | - [2023.12.14] 量子位公众号专题报道：[文章链接](https://mp.weixin.qq.com/s/MuPfayYTk9ZW6lcqgMpqKA)
 24 | - [2023.12.01] Multi-Agent和代码库检索功能开放
 25 | - [2023.11.15] 增加基于本地代码库的问答增强模式
 26 | - [2023.09.15] 本地/隔离环境的沙盒功能开放，基于爬虫实现指定url知识检索
 27 | 
 28 | ## 📜 目录
 29 | - [🤝 介绍](#-介绍)
 30 | - [🎥 演示视频](#-演示视频)
 31 | - [🧭 技术路线](#-技术路线)
 32 | - [🌐 模型接入](#-模型接入)
 33 | - [🚀 快速使用](#-快速使用)
 34 | - [🤗 致谢](#-致谢)
 35 | - [🗂 其他](#-其他)
 36 |   - [📱 联系我们](#-联系我们)
 37 |   - [✨ 点赞历史](#-点赞历史)
 38 | 
 39 | ## 🤝 介绍
 40 | 
 41 | 💡 本项目旨在通过检索增强生成（Retrieval Augmented Generation，RAG）、工具学习（Tool Learning）和沙盒环境来构建软件开发全生命周期的AI智能助手，涵盖设计、编码、测试、部署和运维等阶段。 逐渐从各处资料查询、独立分散平台操作的传统开发运维模式转变到大模型问答的智能化开发运维模式，改变人们的开发运维习惯。
 42 | 
 43 | 本项目核心差异技术、功能点：
 44 | - **🧠 智能调度核心：** 构建了体系链路完善的调度核心，支持多模式一键配置，简化操作流程。 [使用说明](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent)
 45 | - **💻 代码整库分析：** 实现了仓库级的代码深入理解，以及项目文件级的代码编写与生成，提升了开发效率。
 46 | - **📄 文档分析增强：** 融合了文档知识库与知识图谱，通过检索和推理增强，为文档分析提供了更深层次的支持。
 47 | - **🔧 垂类专属知识：** 为DevOps领域定制的专属知识库，支持垂类知识库的自助一键构建，便捷实用。
 48 | - **🤖 垂类模型兼容：** 针对DevOps领域的小型模型，保证了与DevOps相关平台的兼容性，促进了技术生态的整合。
 49 | 
 50 | 🌍 依托于开源的 LLM 与 Embedding 模型，本项目可实现基于开源模型的离线私有部署。此外，本项目也支持 OpenAI API 的调用。[接入Demo](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/fastchat)
 51 | 
 52 | 👥 核心研发团队长期专注于 AIOps + NLP 领域的研究。我们发起了 Codefuse-ai 项目，希望大家广泛贡献高质量的开发和运维文档，共同完善这套解决方案，以实现“让天下没有难做的开发”的目标。
 53 | 
 54 | <div align=center>
 55 |   <img src="sources/docs_imgs/objective_v4.png" alt="图片" width="600" height="333">
 56 | </div>
 57 | 
 58 | 
 59 | ## 🎥 演示视频
 60 | 
 61 | 为了帮助您更直观地了解 Codefuse-ChatBot 的功能和使用方法，我们录制了一系列演示视频。您可以通过观看这些视频，快速了解本项目的主要特性和操作流程。
 62 | 
 63 | 
 64 | - 知识库导入和问答：[演示视频](https://www.youtube.com/watch?v=UGJdTGaVnNY&t=2s&ab_channel=HaotianZhu)
 65 | - 本地代码库导入和问答：[演示视频](https://www.youtube.com/watch?v=ex5sbwGs3Kg)
 66 | 
 67 | 
 68 | ## 🧭 技术路线
 69 | <div align=center>
 70 |   <img src="sources/docs_imgs/devops-chatbot-module-v2.png" alt="图片" width="600" height="503">
 71 | </div>
 72 | 
 73 | - 🧠 **Multi-Agent Schedule Core:** 多智能体调度核心，简易配置即可打造交互式智能体。
 74 | - 🕷️ **Multi Source Web Crawl:** 多源网络爬虫，提供对指定 URL 的爬取功能，以搜集所需信息。
 75 | - 🗂️ **Data Processor:** 数据处理器，轻松完成文档载入、数据清洗，及文本切分，整合不同来源的数据。
 76 | - 🔤 **Text Embedding & Index:**：文本嵌入索引，用户可以轻松上传文件进行文档检索，优化文档分析过程。
 77 | - 🗄️ **Vector Database & Graph Database:** 向量与图数据库，提供灵活强大的数据管理解决方案。
 78 | - 📝 **Prompt Control & Management:**：Prompt 控制与管理，精确定义智能体的上下文环境。
 79 | - 🚧 **SandBox:**：沙盒环境，安全地执行代码编译和动作。
 80 | - 💬 **LLM:**：智能体大脑，支持多种开源模型和 LLM 接口。
 81 | - 🛠️ **API Management:：** API 管理工具，实现对开源组件和运维平台的快速集成。
 82 | 
 83 | 具体实现明细见：[技术路线明细](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/roadmap)
 84 | 项目计划跟进见：[Projects](https://github.com/orgs/codefuse-ai/projects/1)
 85 | 
 86 | 
 87 | ## 🌐 模型接入
 88 | 
 89 | 如果您需要集成特定的模型，请通过提交issue来告知我们您的需求。
 90 | 
 91 | |      model_name    | model_size | gpu_memory | quantize | HFhub | ModelScope |
 92 | | ------------------ | ---------- | ---------- | -------- | ----- | ---------- |
 93 | |        chatgpt     |    -       |    -       |     -    | -     | -          |
 94 | | codellama-34b-int4 |     34b    |    20g     |    int4  | coming soon| [link](https://modelscope.cn/models/codefuse-ai/CodeFuse-CodeLlama-34B-4bits/summary) |
 95 | 
 96 | 
 97 | 
 98 | ## 🚀 快速使用
 99 | ### muagent-py
100 | 完整文档见：[CodeFuse-muAgent](https://codefuse-ai.github.io/zh-CN/docs/api-docs/MuAgent/overview/multi-agent)
101 | ```
102 | pip install codefuse-muagent
103 | ```
104 | 
105 | ### 使用ChatBot
106 | 请自行安装 nvidia 驱动程序，本项目已在 Python 3.9.18，CUDA 11.7 环境下，Windows、X86 架构的 macOS 系统中完成测试。
107 | 
108 | Docker安装、私有化LLM接入及相关启动问题见：[快速使用明细](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/quickstart)
109 | 
110 | **对于 Apple Silicon（苹果M系列芯片），您可能需要首先通过brew install qpdf。**
111 | 
112 | 1、python 环境准备
113 | 
114 | - 推荐采用 conda 对 python 环境进行管理（可选）
115 | ```bash
116 | # 准备 conda 环境
117 | conda create --name devopsgpt python=3.9
118 | conda activate devopsgpt
119 | ```
120 | 
121 | - 安装相关依赖
122 | ```bash
123 | cd codefuse-chatbot
124 | # python=3.9，notebook用最新即可，python=3.8用notebook=6.5.6
125 | pip install -r requirements.txt
126 | ```
127 | 
128 | 2、启动服务
129 | ```bash
130 | # 完成server_config.py配置后，可一键启动
131 | cd examples
132 | bash start.sh
133 | # 开始在页面进行相关配置，然后打开`启动对话服务`即可
134 | ```
135 | <div align=center>
136 |   <img src="sources/docs_imgs/webui_config.png" alt="图片">
137 | </div>
138 | 
139 | 
140 | 或者通过`start.py`进行启动[老版启动方式](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/start-detail)
141 | 更多LLM接入方法见[更多细节...](https://codefuse-ai.github.io/zh-CN/docs/developer-docs/CodeFuse-ChatBot/master/fastchat)
142 | <br>
143 | 
144 | 
145 | ## 贡献指南
146 | 非常感谢您对 Codefuse 项目感兴趣，我们非常欢迎您对 Codefuse 项目的各种建议、意见（包括批评）、评论和贡献。
147 | 
148 | 您对 Codefuse 的各种建议、意见、评论可以直接通过 GitHub 的 Issues 提出。
149 | 
150 | 参与 Codefuse 项目并为其作出贡献的方法有很多：代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎，并将您加入贡献者列表。详见[Contribution Guide...](https://codefuse-ai.github.io/zh-CN/contribution/contribution)
151 | 
152 | ## 🤗 致谢
153 | 
154 | 本项目基于[langchain-chatchat](https://github.com/chatchat-space/Langchain-Chatchat)和[codebox-api](https://github.com/shroominic/codebox-api)，在此深深感谢他们的开源贡献！
155 | 
156 | ## 🗂 其他
157 | 
158 | ### 📱 联系我们
159 | <div align=center>
160 |   <img src="sources/docs_imgs/wechat.png" alt="图片", width="360">
161 | </div>
162 | 
163 | ### ✨ 点赞历史
164 | [![Star History Chart](https://api.star-history.com/svg?repos=codefuse-ai/codefuse-chatbot&type=Date)](https://star-history.com/#codefuse-ai/codefuse-chatbot&Date)
165 | 


--------------------------------------------------------------------------------
/configs/default_config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import platform
  3 | 
  4 | 
  5 | #
  6 | system_name = platform.system()
  7 | 
  8 | 
  9 | # 日志存储路径
 10 | LOG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "logs")
 11 | # 知识库默认存储路径
 12 | SOURCE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "sources")
 13 | # 知识库默认存储路径
 14 | KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "knowledge_base")
 15 | # 代码库默认存储路径
 16 | CB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "code_base")
 17 | # nltk 模型存储路径
 18 | NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "nltk_data")
 19 | # 代码存储路径
 20 | JUPYTER_WORK_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "jupyter_work")
 21 | # WEB_CRAWL存储路径
 22 | WEB_CRAWL_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "knowledge_base")
 23 | # NEBULA_DATA存储路径
 24 | NEBULA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data/nebula_data")
 25 | # 语言模型存储路径
 26 | LOCAL_LLM_MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "llm_models")
 27 | # 向量模型存储路径
 28 | LOCAL_EM_MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "embedding_models")
 29 | # CHROMA 存储路径
 30 | CHROMA_PERSISTENT_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "data/chroma_data")
 31 | 
 32 | for _path in [LOG_PATH, SOURCE_PATH, KB_ROOT_PATH, CB_ROOT_PATH, NLTK_DATA_PATH, JUPYTER_WORK_PATH, WEB_CRAWL_PATH, NEBULA_PATH, CHROMA_PERSISTENT_PATH, LOCAL_LLM_MODEL_DIR, LOCAL_EM_MODEL_DIR]:
 33 |     if not os.path.exists(_path):
 34 |         os.makedirs(_path, exist_ok=True)
 35 |         
 36 | path_envt_dict = {
 37 |     "LOG_PATH": LOG_PATH, "SOURCE_PATH": SOURCE_PATH, "KB_ROOT_PATH": KB_ROOT_PATH,
 38 |     "NLTK_DATA_PATH":NLTK_DATA_PATH, "JUPYTER_WORK_PATH": JUPYTER_WORK_PATH,
 39 |     "WEB_CRAWL_PATH": WEB_CRAWL_PATH, "NEBULA_PATH": NEBULA_PATH,
 40 |     "CHROMA_PERSISTENT_PATH": CHROMA_PERSISTENT_PATH
 41 |     }        
 42 | for path_name, _path in path_envt_dict.items():
 43 |     os.environ[path_name] = _path
 44 | 
 45 | 
 46 | # 数据库默认存储路径。
 47 | # 如果使用sqlite，可以直接修改DB_ROOT_PATH；如果使用其它数据库，请直接修改SQLALCHEMY_DATABASE_URI。
 48 | DB_ROOT_PATH = os.path.join(KB_ROOT_PATH, "info.db")
 49 | SQLALCHEMY_DATABASE_URI = f"sqlite:///{DB_ROOT_PATH}"
 50 | 
 51 | # 可选向量库类型及对应配置
 52 | kbs_config = {
 53 |     "faiss": {
 54 |     },
 55 |     # "milvus": {
 56 |     #     "host": "127.0.0.1",
 57 |     #     "port": "19530",
 58 |     #     "user": "",
 59 |     #     "password": "",
 60 |     #     "secure": False,
 61 |     # },
 62 |     # "pg": {
 63 |     #     "connection_uri": "postgresql://postgres:postgres@127.0.0.1:5432/langchain_chatchat",
 64 |     # }
 65 | }
 66 | 
 67 | # 默认向量库类型。可选：faiss, milvus, pg.
 68 | DEFAULT_VS_TYPE = "faiss"
 69 | 
 70 | # 缓存向量库数量
 71 | CACHED_VS_NUM = 1
 72 | 
 73 | # 知识库中单段文本长度
 74 | CHUNK_SIZE = 500
 75 | 
 76 | # 知识库中相邻文本重合长度
 77 | OVERLAP_SIZE = 50
 78 | 
 79 | # 知识库匹配向量数量
 80 | VECTOR_SEARCH_TOP_K = 5
 81 | 
 82 | # 知识库匹配相关度阈值，取值范围在0-1之间，SCORE越小，相关度越高，取到1相当于不筛选，建议设置在0.5左右
 83 | # Mac 可能存在无法使用normalized_L2的问题，因此调整SCORE_THRESHOLD至 0~1100
 84 | FAISS_NORMALIZE_L2 = True if system_name in ["Linux", "Windows"] else False
 85 | SCORE_THRESHOLD = 1 if system_name in ["Linux", "Windows"] else 1100
 86 | 
 87 | # 搜索引擎匹配结题数量
 88 | SEARCH_ENGINE_TOP_K = 5
 89 | 
 90 | # 代码引擎匹配结题数量
 91 | CODE_SEARCH_TOP_K = 1
 92 | 
 93 | 
 94 | # API 是否开启跨域，默认为False，如果需要开启，请设置为True
 95 | # is open cross domain
 96 | OPEN_CROSS_DOMAIN = False
 97 | 
 98 | # Bing 搜索必备变量
 99 | # 使用 Bing 搜索需要使用 Bing Subscription Key,需要在azure port中申请试用bing search
100 | # 具体申请方式请见
101 | # https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource
102 | # 使用python创建bing api 搜索实例详见:
103 | # https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/quickstarts/rest/python
104 | BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
105 | # 注意不是bing Webmaster Tools的api key，
106 | 
107 | # 此外，如果是在服务器上，报Failed to establish a new connection: [Errno 110] Connection timed out
108 | # 是因为服务器加了防火墙，需要联系管理员加白名单，如果公司的服务器的话，就别想了GG
109 | BING_SUBSCRIPTION_KEY = ""
110 | 
111 | # 是否开启中文标题加强，以及标题增强的相关配置
112 | # 通过增加标题判断，判断哪些文本为标题，并在metadata中进行标记；
113 | # 然后将文本与往上一级的标题进行拼合，实现文本信息的增强。
114 | ZH_TITLE_ENHANCE = False
115 | 
116 | log_verbose = False


--------------------------------------------------------------------------------
/configs/server_config.py.example:
--------------------------------------------------------------------------------
  1 | from .model_config import LLM_MODEL, LLM_DEVICE
  2 | import os, json
  3 | 
  4 | try:
  5 |     cur_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)))
  6 |     with open(os.path.join(cur_dir, "local_config.json"), "r") as f:
  7 |         update_config = json.load(f)
  8 | except:
  9 |     update_config = {}
 10 | 
 11 | # API 是否开启跨域，默认为False，如果需要开启，请设置为True
 12 | # is open cross domain
 13 | OPEN_CROSS_DOMAIN = False
 14 | # 是否用容器来启动服务
 15 | try:
 16 |     DOCKER_SERVICE = json.loads(os.environ["DOCKER_SERVICE"]) or update_config.get("DOCKER_SERVICE") or False
 17 | except:
 18 |     DOCKER_SERVICE = True
 19 | # 是否采用容器沙箱
 20 | try:
 21 |     SANDBOX_DO_REMOTE = json.loads(os.environ["SANDBOX_DO_REMOTE"]) or update_config.get("SANDBOX_DO_REMOTE") or False
 22 | except:
 23 |     SANDBOX_DO_REMOTE = True
 24 | # 是否采用api服务来进行
 25 | NO_REMOTE_API = True
 26 | # 各服务器默认绑定host
 27 | DEFAULT_BIND_HOST = "127.0.0.1"
 28 | os.environ["DEFAULT_BIND_HOST"] = DEFAULT_BIND_HOST
 29 | 
 30 | # 
 31 | CONTRAINER_NAME = "devopsgpt_webui"
 32 | IMAGE_NAME = "devopsgpt:latest"
 33 | 
 34 | # webui.py server
 35 | WEBUI_SERVER = {
 36 |     "host": DEFAULT_BIND_HOST,
 37 |     "port": 8501,
 38 |     "docker_port": 8501
 39 | }
 40 | 
 41 | # api.py server
 42 | API_SERVER = {
 43 |     "host": DEFAULT_BIND_HOST,
 44 |     "port": 7861,
 45 |     "docker_port": 7861
 46 | }
 47 | 
 48 | # sdfile_api.py server
 49 | SDFILE_API_SERVER = {
 50 |     "host": DEFAULT_BIND_HOST,
 51 |     "port": 7862,
 52 |     "docker_port": 7862
 53 | }
 54 | 
 55 | # fastchat openai_api server
 56 | FSCHAT_OPENAI_API = {
 57 |     "host": DEFAULT_BIND_HOST,
 58 |     "port": 8888,  # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。
 59 |     "docker_port": 8888,  # model_config.llm_model_dict中模型配置的api_base_url需要与这里一致。
 60 | }
 61 | 
 62 | # nebula conf
 63 | NEBULA_HOST = DEFAULT_BIND_HOST
 64 | NEBULA_PORT = 9669
 65 | NEBULA_STORAGED_PORT = 9779
 66 | NEBULA_USER = 'root'
 67 | NEBULA_PASSWORD = ''
 68 | NEBULA_GRAPH_SERVER = {
 69 |     "host": DEFAULT_BIND_HOST,
 70 |     "port": NEBULA_PORT,
 71 |     "docker_port": NEBULA_PORT
 72 | }
 73 | 
 74 | # sandbox api server
 75 | SANDBOX_CONTRAINER_NAME = "devopsgpt_sandbox"
 76 | SANDBOX_IMAGE_NAME = "devopsgpt:latest"
 77 | SANDBOX_HOST = os.environ.get("SANDBOX_HOST") or update_config.get("SANDBOX_HOST") or DEFAULT_BIND_HOST # "172.25.0.3"
 78 | SANDBOX_SERVER = {
 79 |     "host": f"http://{SANDBOX_HOST}",
 80 |     "port": 5050,
 81 |     "docker_port": 5050,
 82 |     "url": f"http://{SANDBOX_HOST}:5050",
 83 |     "do_remote": SANDBOX_DO_REMOTE,
 84 | }
 85 | 
 86 | # fastchat model_worker server
 87 | # 这些模型必须是在model_config.llm_model_dict中正确配置的。
 88 | # 在启动startup.py时，可用通过`--model-worker --model-name xxxx`指定模型，不指定则为LLM_MODEL
 89 | # 建议使用chat模型，不要使用base，无法获取正确输出
 90 | FSCHAT_MODEL_WORKERS = json.loads(os.environ.get("FSCHAT_MODEL_WORKERS")) if os.environ.get("FSCHAT_MODEL_WORKERS") else {}
 91 | FSCHAT_MODEL_WORKERS = FSCHAT_MODEL_WORKERS or update_config.get("FSCHAT_MODEL_WORKERS")
 92 | FSCHAT_MODEL_WORKERS = FSCHAT_MODEL_WORKERS or {
 93 |     "default": {
 94 |         "host": DEFAULT_BIND_HOST,
 95 |         "port": 20002,
 96 |         "device": LLM_DEVICE,
 97 |         # todo: 多卡加载需要配置的参数
 98 |         "gpus": None,
 99 |         "numgpus": 1,
100 |         # 以下为非常用参数，可根据需要配置
101 |         # "max_gpu_memory": "20GiB",
102 |         # "load_8bit": False,
103 |         # "cpu_offloading": None,
104 |         # "gptq_ckpt": None,
105 |         # "gptq_wbits": 16,
106 |         # "gptq_groupsize": -1,
107 |         # "gptq_act_order": False,
108 |         # "awq_ckpt": None,
109 |         # "awq_wbits": 16,
110 |         # "awq_groupsize": -1,
111 |         # "model_names": [LLM_MODEL],
112 |         # "conv_template": None,
113 |         # "limit_worker_concurrency": 5,
114 |         # "stream_interval": 2,
115 |         # "no_register": False,
116 |     },
117 |     'codellama_34b': {'host': DEFAULT_BIND_HOST, 'port': 20002},
118 |     'Baichuan2-13B-Base': {'host': DEFAULT_BIND_HOST, 'port': 20003},
119 |     'Baichuan2-13B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20004},
120 |     'baichuan2-7b-base': {'host': DEFAULT_BIND_HOST, 'port': 20005},
121 |     'baichuan2-7b-chat': {'host': DEFAULT_BIND_HOST, 'port': 20006},
122 |     'internlm-7b-base': {'host': DEFAULT_BIND_HOST, 'port': 20007},
123 |     'internlm-chat-7b': {'host': DEFAULT_BIND_HOST, 'port': 20008},
124 |     'chatglm2-6b': {'host': DEFAULT_BIND_HOST, 'port': 20009},
125 |     'qwen-14b-base': {'host': DEFAULT_BIND_HOST, 'port': 20010},
126 |     'qwen-14b-chat': {'host': DEFAULT_BIND_HOST, 'port': 20011},
127 |     'qwen-1-8B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20012},
128 |     'Qwen-7B': {'host': DEFAULT_BIND_HOST, 'port': 20013},
129 |     'Qwen-7B-Chat': {'host': DEFAULT_BIND_HOST, 'port': 20014},
130 |     'qwen-7b-base-v1.1': {'host': DEFAULT_BIND_HOST, 'port': 20015},
131 |     'qwen-7b-chat-v1.1': {'host': DEFAULT_BIND_HOST, 'port': 20016},
132 |     'chatglm3-6b': {'host': DEFAULT_BIND_HOST, 'port': 20017},
133 |     'chatglm3-6b-32k': {'host': DEFAULT_BIND_HOST, 'port': 20018},
134 |     'chatglm3-6b-base': {'host': DEFAULT_BIND_HOST, 'port': 20019},
135 |     'Qwen-72B-Chat-Int4': {'host': DEFAULT_BIND_HOST, 'port': 20020},
136 |     'gpt-3.5-turbo': {'host': DEFAULT_BIND_HOST, 'port': 20021},
137 |     'example': {'host': DEFAULT_BIND_HOST, 'port': 20022},
138 |     'openai-api': {'host': DEFAULT_BIND_HOST, 'port': 20023}
139 | }
140 | # fastchat multi model worker server
141 | FSCHAT_MULTI_MODEL_WORKERS = {
142 |     # todo
143 | }
144 | 
145 | # fastchat controller server
146 | FSCHAT_CONTROLLER = {
147 |     "host": DEFAULT_BIND_HOST,
148 |     "port": 20001,
149 |     "dispatch_method": "shortest_queue",
150 | }
151 | 
152 | 
153 | # 以下不要更改
154 | def fschat_controller_address() -> str:
155 |     host = FSCHAT_CONTROLLER["host"]
156 |     port = FSCHAT_CONTROLLER["port"]
157 |     return f"http://{host}:{port}"
158 | 
159 | 
160 | def fschat_model_worker_address(model_name: str = LLM_MODEL) -> str:
161 |     if model := FSCHAT_MODEL_WORKERS.get(model_name):
162 |         host = model["host"]
163 |         port = model["port"]
164 |         return f"http://{host}:{port}"
165 | 
166 | 
167 | def fschat_openai_api_address() -> str:
168 |     host = FSCHAT_OPENAI_API["host"]
169 |     port = FSCHAT_OPENAI_API["port"]
170 |     return f"http://{host}:{port}"
171 | 
172 | 
173 | def api_address() -> str:
174 |     host = API_SERVER["host"]
175 |     port = API_SERVER["port"]
176 |     return f"http://{host}:{port}"
177 | 
178 | 
179 | def webui_address() -> str:
180 |     host = WEBUI_SERVER["host"]
181 |     port = WEBUI_SERVER["port"]
182 |     return f"http://{host}:{port}"
183 | 


--------------------------------------------------------------------------------
/configs/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def is_running_in_docker():
 4 |     """
 5 |     检查当前代码是否在 Docker 容器中运行
 6 |     """
 7 |     # 检查是否存在 /.dockerenv 文件
 8 |     if os.path.exists('/.dockerenv'):
 9 |         return True
10 | 
11 |     # 检查 cgroup 文件系统是否为 /docker/ 开头
12 |     if os.path.exists("/proc/1/cgroup"):
13 |         with open('/proc/1/cgroup', 'rt') as f:
14 |             return '/docker/' in f.read()
15 |     return False


--------------------------------------------------------------------------------
/docker_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | docker build -t devopsgpt:latest .


--------------------------------------------------------------------------------
/env_start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pip install -r requirements.txt
4 | # torch-gpu 安装视具体配置操纵
5 | # pip install torch==2.0.1+cu118 cudatoolkit --index-url https://download.pytorch.org/whl/cu118
6 | 
7 | # pip3 uninstall crypto
8 | # pip3 uninstall pycrypto
9 | # pip3 install pycryptodome


--------------------------------------------------------------------------------
/examples/agent_examples/baseGroupPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, LLM_MODEL
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | from coagent.connector.phase import BasePhase
13 | from coagent.connector.schema import Message
14 | 
15 | #
16 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT])
17 | # log-level，print prompt和llm predict
18 | os.environ["log_verbose"] = "2"
19 | 
20 | phase_name = "baseGroupPhase"
21 | llm_config = LLMConfig(
22 |     model_name=LLM_MODEL, api_key=os.environ["OPENAI_API_KEY"], 
23 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
24 |     )
25 | embed_config = EmbedConfig(
26 |     embed_engine="model", embed_model="text2vec-base-chinese", 
27 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
28 |     )
29 | 
30 | phase = BasePhase(
31 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
32 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
33 | )
34 | # round-1
35 | query_content = "确认本地是否存在employee_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
36 | # query_content = "帮我确认下127.0.0.1这个服务器的在10点是否存在异常，请帮我判断一下"
37 | query = Message(
38 |     role_name="human", role_type="user", tools=[],
39 |     role_content=query_content, input_query=query_content, origin_query=query_content,
40 |     )
41 | # phase.pre_print(query)
42 | output_message, output_memory = phase.step(query)
43 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/baseTaskPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
11 | 
12 | from coagent.connector.phase import BasePhase
13 | from coagent.connector.schema import Message
14 | 
15 | # log-level，print prompt or llm predict
16 | os.environ["log_verbose"] = "2"
17 | 
18 | phase_name = "baseTaskPhase"
19 | llm_config = LLMConfig(
20 |     model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 
21 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
22 |     )
23 | embed_config = EmbedConfig(
24 |     embed_engine="model", embed_model="text2vec-base-chinese", 
25 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
26 |     )
27 | phase = BasePhase(
28 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
29 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
30 | )
31 | # round-1
32 | query_content = "确认本地是否存在employee_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
33 | query = Message(
34 |     role_name="human", role_type="user",
35 |     role_content=query_content, input_query=query_content, origin_query=query_content,
36 |     )
37 | 
38 | output_message, output_memory = phase.step(query)
39 | 
40 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/codeChatPhaseLocal_example.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | '''
  3 | @author: 温进
  4 | @file: codeChatPhaseLocal_example.py
  5 | @time: 2024/1/31 下午4:32
  6 | @desc:
  7 | '''
  8 | import os, sys, requests
  9 | from concurrent.futures import ThreadPoolExecutor
 10 | from tqdm import tqdm
 11 | 
 12 | import requests
 13 | from typing import List
 14 | 
 15 | src_dir = os.path.join(
 16 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 17 | )
 18 | sys.path.append(src_dir)
 19 | 
 20 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, CB_ROOT_PATH
 21 | from configs.server_config import SANDBOX_SERVER
 22 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
 23 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
 24 | from coagent.connector.phase import BasePhase
 25 | from coagent.connector.schema import Message, Memory
 26 | from coagent.codechat.codebase_handler.codebase_handler import CodeBaseHandler
 27 | 
 28 | 
 29 | 
 30 | # log-level，print prompt和llm predict
 31 | os.environ["log_verbose"] = "1"
 32 | 
 33 | llm_config = LLMConfig(
 34 |     model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"],
 35 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
 36 |     )
 37 | embed_config = EmbedConfig(
 38 |     embed_engine="model", embed_model="text2vec-base-chinese",
 39 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
 40 |     )
 41 | 
 42 | 
 43 | # delete codebase
 44 | codebase_name = 'client_nebula'
 45 | code_path = '/Users/bingxu/Desktop/工作/大模型/chatbot/test_code_repo/client'
 46 | code_path = "D://chromeDownloads/devopschat-bot/client_v2/client"
 47 | use_nh = True
 48 | do_interpret = False
 49 | cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
 50 |                       llm_config=llm_config, embed_config=embed_config)
 51 | cbh.delete_codebase(codebase_name=codebase_name)
 52 | 
 53 | # initialize codebase
 54 | cbh = CodeBaseHandler(codebase_name, code_path, crawl_type='dir', use_nh=use_nh, local_graph_path=CB_ROOT_PATH,
 55 |                       llm_config=llm_config, embed_config=embed_config)
 56 | cbh.import_code(do_interpret=do_interpret)
 57 | 
 58 | 
 59 | 
 60 | # chat with codebase
 61 | phase_name = "codeChatPhase"
 62 | phase = BasePhase(
 63 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
 64 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
 65 | )
 66 | 
 67 | # remove 这个函数是做什么的  => 基于标签
 68 | # 有没有函数已经实现了从字符串删除指定字符串的功能，使用的话可以怎么使用，写个java代码  => 基于描述
 69 | # 有根据我以下的需求用 java 开发一个方法：输入为字符串，将输入中的 .java 字符串给删除掉，然后返回新的字符串 => 基于描述
 70 | 
 71 | ## 需要启动容器中的nebula，采用use_nh=True来构建代码库，是可以通过cypher来查询
 72 | # round-1
 73 | query_content = "代码一共有多少类"
 74 | query = Message(
 75 |     role_name="human", role_type="user",
 76 |     role_content=query_content, input_query=query_content, origin_query=query_content,
 77 |     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher"
 78 |     )
 79 | 
 80 | output_message1, _ = phase.step(query)
 81 | print(output_message1)
 82 | 
 83 | # round-2
 84 | query_content = "代码库里有哪些函数，返回5个就行"
 85 | query = Message(
 86 |     role_name="human", role_type="user",
 87 |     role_content=query_content, input_query=query_content, origin_query=query_content,
 88 |     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher"
 89 |     )
 90 | output_message2, _ = phase.step(query)
 91 | print(output_message2)
 92 | 
 93 | 
 94 | # round-3
 95 | query_content = "remove 这个函数是做什么的"
 96 | query = Message(
 97 |     role_name="user", role_type="human",
 98 |     role_content=query_content, input_query=query_content, origin_query=query_content,
 99 |     code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="tag",
100 |     use_nh=False, local_graph_path=CB_ROOT_PATH
101 |     )
102 | output_message3, output_memory3 = phase.step(query)
103 | print(output_memory3.to_str_messages(return_all=True, content_key="parsed_output_list"))
104 | 
105 | #
106 | # # round-4
107 | query_content = "有没有函数已经实现了从字符串删除指定字符串的功能，使用的话可以怎么使用，写个java代码"
108 | query = Message(
109 |     role_name="human", role_type="user",
110 |     role_content=query_content, input_query=query_content, origin_query=query_content,
111 |     code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="description",
112 |     use_nh=False, local_graph_path=CB_ROOT_PATH
113 |     )
114 | output_message4, output_memory4 = phase.step(query)
115 | print(output_memory4.to_str_messages(return_all=True, content_key="parsed_output_list"))
116 | 
117 | 
118 | # # round-5
119 | query_content = "有根据我以下的需求用 java 开发一个方法：输入为字符串，将输入中的 .java 字符串给删除掉，然后返回新的字符串"
120 | query = Message(
121 |     role_name="human", role_type="user",
122 |     role_content=query_content, input_query=query_content, origin_query=query_content,
123 |     code_engine_name=codebase_name, score_threshold=1.0, top_k=3, cb_search_type="description",
124 |     use_nh=False, local_graph_path=CB_ROOT_PATH
125 |     )
126 | output_message5, output_memory5 = phase.step(query)
127 | print(output_memory5.to_str_messages(return_all=True, content_key="parsed_output_list"))
128 | 


--------------------------------------------------------------------------------
/examples/agent_examples/codeChatPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | from coagent.connector.phase import BasePhase
13 | from coagent.connector.schema import Message, Memory
14 | 
15 | # log-level，print prompt和llm predict
16 | os.environ["log_verbose"] = "2"
17 | 
18 | phase_name = "codeChatPhase"
19 | llm_config = LLMConfig(
20 |     model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 
21 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
22 |     )
23 | embed_config = EmbedConfig(
24 |     embed_engine="model", embed_model="text2vec-base-chinese",
25 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
26 |     )
27 | 
28 | phase = BasePhase(
29 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
30 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
31 | )
32 | # 代码一共有多少类 => 基于cypher
33 | # 代码库里有哪些函数，返回5个就行 => 基于cypher
34 | # remove 这个函数是做什么的  => 基于标签
35 | # 有没有函数已经实现了从字符串删除指定字符串的功能，使用的话可以怎么使用，写个java代码  => 基于描述
36 | # 有根据我以下的需求用 java 开发一个方法：输入为字符串，将输入中的 .java 字符串给删除掉，然后返回新的字符串 => 基于描述
37 | 
38 | # round-1
39 | # query_content = "代码一共有多少类"
40 | # query = Message(
41 | #     role_name="human", role_type="user",
42 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
43 | #     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher"
44 | #     )
45 | #
46 | # output_message1, _ = phase.step(query)
47 | # print(output_message1)
48 | 
49 | # round-2
50 | # query_content = "代码库里有哪些函数，返回5个就行"
51 | # query = Message(
52 | #     role_name="human", role_type="user",
53 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
54 | #     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="cypher"
55 | #     )
56 | # output_message2, _ = phase.step(query)
57 | # print(output_message2)
58 | 
59 | #
60 | # # round-3
61 | query_content = "remove 这个函数是做什么的"
62 | query = Message(
63 |     role_name="user", role_type="human",
64 |     role_content=query_content, input_query=query_content, origin_query=query_content,
65 |     code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="tag"
66 |     )
67 | output_message3, _ = phase.step(query)
68 | print(output_message3)
69 | 
70 | #
71 | # # round-4
72 | # query_content = "有没有函数已经实现了从字符串删除指定字符串的功能，使用的话可以怎么使用，写个java代码"
73 | # query = Message(
74 | #     role_name="human", role_type="user",
75 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
76 | #     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="description"
77 | #     )
78 | # output_message4, _ = phase.step(query)
79 | # print(output_message4)
80 | #
81 | # # round-5
82 | # query_content = "有根据我以下的需求用 java 开发一个方法：输入为字符串，将输入中的 .java 字符串给删除掉，然后返回新的字符串"
83 | # query = Message(
84 | #     role_name="human", role_type="user",
85 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
86 | #     code_engine_name="client_1", score_threshold=1.0, top_k=3, cb_search_type="description"
87 | #     )
88 | # output_message5, output_memory5 = phase.step(query)
89 | # print(output_message5)
90 | #
91 | # print(output_memory5.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/codeReactPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | from coagent.connector.phase import BasePhase
13 | from coagent.connector.schema import Message
14 | 
15 | # log-level，print prompt和llm predict
16 | os.environ["log_verbose"] = "2"
17 | 
18 | phase_name = "codeReactPhase"
19 | llm_config = LLMConfig(
20 |     model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 
21 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
22 |     )
23 | embed_config = EmbedConfig(
24 |     embed_engine="model", embed_model="text2vec-base-chinese", 
25 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
26 |     )
27 | phase = BasePhase(
28 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
29 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
30 | )
31 | # round-1
32 | query_content = "确认本地是否存在book_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
33 | query = Message(
34 |     role_name="human", role_type="user",
35 |     role_content=query_content, input_query=query_content, origin_query=query_content,
36 |     )
37 | 
38 | output_message, output_memory = phase.step(query)
39 | 
40 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/codeToolReactPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH, LLM_MODEL
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | 
13 | from coagent.connector.phase import BasePhase
14 | from coagent.connector.schema import Message
15 | 
16 | 
17 | TOOL_SETS = [
18 |      "StockName", "StockInfo", 
19 |     ]
20 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT])
21 | 
22 | # log-level，print prompt和llm predict
23 | os.environ["log_verbose"] = "2"
24 | 
25 | phase_name = "codeToolReactPhase"
26 | llm_config = LLMConfig(
27 |     model_name="gpt-3.5-turbo-0613", api_key=os.environ["OPENAI_API_KEY"], 
28 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.7
29 |     )
30 | embed_config = EmbedConfig(
31 |     embed_engine="model", embed_model="text2vec-base-chinese", 
32 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
33 |     )
34 | phase = BasePhase(
35 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
36 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
37 | )
38 | 
39 | query_content = "查询贵州茅台的股票代码，并查询截止到当前日期(2023年12月24日)的最近10天的每日时序数据，然后用代码画出折线图并分析"
40 | 
41 | query = Message(role_name="human", role_type="user", input_query=query_content, role_content=query_content, origin_query=query_content, tools=tools)
42 | 
43 | output_message, output_memory = phase.step(query)
44 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/docChatPhase_example.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | 
  3 | src_dir = os.path.join(
  4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  5 | )
  6 | sys.path.append(src_dir)
  7 | sys.path.append(os.path.join(src_dir, "examples"))
  8 | 
  9 | from configs.model_config import EMBEDDING_MODEL, CB_ROOT_PATH
 10 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 11 | from configs.server_config import SANDBOX_SERVER
 12 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
 13 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
 14 | from coagent.connector.phase import BasePhase
 15 | from coagent.connector.schema import Message, Memory
 16 | 
 17 | 
 18 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT])
 19 | llm_config = LLMConfig(
 20 |     model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 
 21 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
 22 |     )
 23 | embed_config = EmbedConfig(
 24 |     embed_engine="model", embed_model="text2vec-base-chinese", 
 25 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
 26 |     )
 27 | 
 28 | 
 29 | 
 30 | 
 31 | # create your knowledge base
 32 | from io import BytesIO
 33 | from pathlib import Path
 34 | 
 35 | from coagent.service.kb_api import create_kb, upload_doc
 36 | from coagent.service.service_factory import get_kb_details
 37 | from coagent.utils.server_utils import run_async
 38 | kb_list = {x["kb_name"]: x for x in get_kb_details(KB_ROOT_PATH)}
 39 | 
 40 | # create a knowledge base
 41 | kb_name = "example_test"
 42 | data = {
 43 |     "knowledge_base_name": kb_name,
 44 |     "vector_store_type": "faiss", # default
 45 |     "kb_root_path": KB_ROOT_PATH, 
 46 |     "embed_model": embed_config.embed_model,
 47 |     "embed_engine": embed_config.embed_engine, 
 48 |     "embed_model_path": embed_config.embed_model_path,
 49 |     "model_device": embed_config.model_device,
 50 | }
 51 | run_async(create_kb(**data))
 52 | 
 53 | # add doc to knowledge base
 54 | file = os.path.join("D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/sources/docs/langchain_text_10.jsonl")
 55 | files = [file]
 56 | # if embedding init failed, you can use override = True
 57 | data = [{"override": True, "file": f, 
 58 |          "knowledge_base_name": kb_name, "not_refresh_vs_cache": False,
 59 |          "kb_root_path": KB_ROOT_PATH, "embed_model": embed_config.embed_model,
 60 |          "embed_engine": embed_config.embed_engine, "embed_model_path": embed_config.embed_model_path,
 61 |          "model_device": embed_config.model_device,
 62 |          } 
 63 |          for f in files]
 64 | 
 65 | for k in data:
 66 |     file = Path(file).absolute().open("rb")
 67 |     filename = file.name
 68 | 
 69 |     from fastapi import UploadFile
 70 |     from tempfile import SpooledTemporaryFile
 71 | 
 72 |     temp_file = SpooledTemporaryFile(max_size=10 * 1024 * 1024)
 73 |     temp_file.write(file.read())
 74 |     temp_file.seek(0)
 75 |     
 76 |     k.update({"file": UploadFile(file=temp_file, filename=filename),})
 77 |     run_async(upload_doc(**k))
 78 | 
 79 | 
 80 | 
 81 | ## start to chat with knowledge base
 82 |     
 83 | # log-level，print prompt和llm predict
 84 | os.environ["log_verbose"] = "2"
 85 | 
 86 | # set chat phase
 87 | phase_name = "docChatPhase"
 88 | phase = BasePhase(
 89 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
 90 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
 91 | )
 92 | # round-1
 93 | query_content = "langchain有哪些模块"
 94 | query = Message(
 95 |     role_name="human", role_type="user", 
 96 |     origin_query=query_content,
 97 |     doc_engine_name=kb_name, score_threshold=1.0, top_k=3
 98 |     )
 99 | 
100 | output_message, output_memory = phase.step(query)
101 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
102 | 
103 | # round-2
104 | query_content = "提示（prompts）有什么用？"
105 | query = Message(
106 |     role_name="human", role_type="user",
107 |     origin_query=query_content,
108 |     doc_engine_name=kb_name, score_threshold=1.0, top_k=3
109 |     )
110 | output_message, output_memory = phase.step(query)
111 | 
112 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/metagpt_phase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | 
13 | from coagent.connector.phase import BasePhase
14 | from coagent.connector.schema import Message
15 | 
16 | # log-level，print prompt和llm predict
17 | os.environ["log_verbose"] = "0"
18 | 
19 | phase_name = "metagpt_code_devlop"
20 | llm_config = LLMConfig(
21 |     model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 
22 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
23 |     )
24 | embed_config = EmbedConfig(
25 |     embed_engine="model", embed_model="text2vec-base-chinese", 
26 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
27 |     )
28 | phase = BasePhase(
29 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
30 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
31 | )
32 | 
33 | query_content = "create a snake game"
34 | query = Message(role_name="human", role_type="user", input_query=query_content, role_content=query_content, origin_query=query_content)
35 | 
36 | output_message, output_memory = phase.step(query)
37 | 
38 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/searchChatPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
11 | 
12 | from coagent.connector.phase import BasePhase
13 | from coagent.connector.schema import Message, Memory
14 | 
15 | 
16 | 
17 | # log-level，print prompt和llm predict
18 | os.environ["log_verbose"] = "2"
19 | 
20 | phase_name = "searchChatPhase"
21 | llm_config = LLMConfig(
22 |     model_name="gpt-3.5-turbo", api_key=os.environ["OPENAI_API_KEY"], 
23 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
24 |     )
25 | embed_config = EmbedConfig(
26 |     embed_engine="model", embed_model="text2vec-base-chinese", 
27 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
28 |     )
29 | phase = BasePhase(
30 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
31 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
32 | )
33 | 
34 | # round-1
35 | query_content1 = "美国当前总统是谁？"
36 | query = Message(
37 |     role_name="human", role_type="user", 
38 |     role_content=query_content1, input_query=query_content1, origin_query=query_content1,
39 |     search_engine_name="duckduckgo", score_threshold=1.0, top_k=3
40 |     )
41 | 
42 | output_message, output_memory = phase.step(query)
43 | 
44 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
45 | 
46 | # round-2
47 | query_content2 = "美国上一任总统是谁，两个人有什么关系没？"
48 | query = Message(
49 |     role_name="human", role_type="user", 
50 |     role_content=query_content2, input_query=query_content2, origin_query=query_content2,
51 |     search_engine_name="duckduckgo", score_threshold=1.0, top_k=3
52 |     )
53 | output_message, output_memory = phase.step(query)
54 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/agent_examples/toolReactPhase_example.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | from configs.model_config import KB_ROOT_PATH, JUPYTER_WORK_PATH
 9 | from configs.server_config import SANDBOX_SERVER
10 | from coagent.tools import toLangchainTools, TOOL_DICT, TOOL_SETS
11 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
12 | 
13 | from coagent.connector.phase import BasePhase
14 | from coagent.connector.schema import Message
15 | 
16 | # log-level，print prompt和llm predict
17 | os.environ["log_verbose"] = "2"
18 | 
19 | phase_name = "toolReactPhase"
20 | llm_config = LLMConfig(
21 |     model_name="gpt-3.5-turbo",api_key=os.environ["OPENAI_API_KEY"], 
22 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
23 |     )
24 | embed_config = EmbedConfig(
25 |     embed_engine="model", embed_model="text2vec-base-chinese", 
26 |     embed_model_path=os.path.join(src_dir, "embedding_models/text2vec-base-chinese")
27 |     )
28 | phase = BasePhase(
29 |     phase_name, sandbox_server=SANDBOX_SERVER, jupyter_work_path=JUPYTER_WORK_PATH,
30 |     embed_config=embed_config, llm_config=llm_config, kb_root_path=KB_ROOT_PATH,
31 | )
32 | 
33 | 
34 | # round-1
35 | tools = toLangchainTools([TOOL_DICT[i] for i in TOOL_SETS if i in TOOL_DICT])
36 | query_content = "帮我确认下127.0.0.1这个服务器的在10点是否存在异常，请帮我判断一下"
37 | query = Message(
38 |     role_name="human", role_type="user", tools=tools,
39 |     role_content=query_content, input_query=query_content, origin_query=query_content
40 |     )
41 | 
42 | phase.pre_print(query)
43 | # output_message, output_memory = phase.step(query)
44 | 
45 | # print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))


--------------------------------------------------------------------------------
/examples/auto_examples/agentchat_RetrievalChat.py:
--------------------------------------------------------------------------------
1 | # more use cases see ~/examples/agent_examples/docChatPhase_example.py
2 | 


--------------------------------------------------------------------------------
/examples/auto_examples/agentchat_function_call.py:
--------------------------------------------------------------------------------
 1 | import os, sys, requests
 2 | 
 3 | src_dir = os.path.join(
 4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 5 | )
 6 | sys.path.append(src_dir)
 7 | 
 8 | # from configs.model_config import *
 9 | from coagent.connector.phase import BasePhase
10 | from coagent.connector.agents import BaseAgent
11 | from coagent.connector.chains import BaseChain
12 | from coagent.connector.schema import (
13 |     Message, Memory, load_role_configs, load_phase_configs, load_chain_configs
14 |     )
15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS
16 | from coagent.connector.utils import parse_section
17 | import importlib
18 | 
19 | 
20 | # update new agent configs
21 | # tool learning 实现参考 ~/examples/agent_examples/toolReactPhase_example.py


--------------------------------------------------------------------------------
/examples/auto_examples/agentchat_teachability.py:
--------------------------------------------------------------------------------
1 | # 暂未实现memory management相关操作


--------------------------------------------------------------------------------
/examples/auto_examples/agentchat_teaching.py:
--------------------------------------------------------------------------------
  1 | import os, sys, requests
  2 | 
  3 | src_dir = os.path.join(
  4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  5 | )
  6 | sys.path.append(src_dir)
  7 | 
  8 | # from configs.model_config import *
  9 | from coagent.connector.phase import BasePhase
 10 | from coagent.connector.agents import BaseAgent
 11 | from coagent.connector.chains import BaseChain
 12 | from coagent.connector.schema import (
 13 |     Message, Memory, load_role_configs, load_phase_configs, load_chain_configs
 14 |     )
 15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS
 16 | from coagent.connector.utils import parse_section
 17 | import importlib
 18 | 
 19 | 
 20 | # update new agent configs
 21 | auto_feedback_from_code_execution_PROMPT = """#### Code React Assistance Guidance
 22 | 
 23 | You are a helpful AI assistant. Solve tasks using your coding and language skills.
 24 | In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
 25 |     1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
 26 |     2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
 27 | Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
 28 | When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
 29 | If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
 30 | When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
 31 | Reply "stopped" in the end when everything is done.
 32 | 
 33 | #### Response Process
 34 | 
 35 | **Question:** First, clarify the problem to be solved.
 36 | 
 37 | **Thoughts:** Based on the question and observations above, provide the plan for executing this step.
 38 | 
 39 | **Action Status:** Set to 'stopped' or 'code_executing'. If it's 'stopped', the action is to provide the final answer to the original question. If it's 'code_executing', the action is to write the code.
 40 | 
 41 | **Action:** 
 42 | ```python
 43 | # Write your code here
 44 | import os
 45 | ...
 46 | ```
 47 | 
 48 | **Observation:** Check the results and effects of the executed code.
 49 | 
 50 | ... (Repeat this Thoughts/Action/Observation cycle as needed)
 51 | 
 52 | **Thoughts:** I now know the final answer
 53 | 
 54 | **Action Status:** stopped
 55 | 
 56 | **Action:** The final answer to the original input question
 57 | 
 58 | 
 59 | """
 60 | 
 61 | 
 62 | AGETN_CONFIGS.update({
 63 |     "auto_feedback_from_code_execution": {
 64 |         "role": {
 65 |             "role_prompt": auto_feedback_from_code_execution_PROMPT,
 66 |             "role_type": "assistant",
 67 |             "role_name": "auto_feedback_from_code_execution",
 68 |             "role_desc": "",
 69 |             "agent_type": "ReactAgent"
 70 |             # "agent_type": "BaseAgent"
 71 |         },
 72 |         "chat_turn": 5,
 73 |         "stop": "\n**Observation:**",
 74 |         "focus_agents": [],
 75 |         "focus_message_keys": [],
 76 |     },
 77 | })
 78 | # update new chain configs
 79 | CHAIN_CONFIGS.update({
 80 |     "auto_feedback_from_code_executionChain": {
 81 |         "chain_name": "auto_feedback_from_code_executionChain",
 82 |         "chain_type": "BaseChain",
 83 |         "agents": ["auto_feedback_from_code_execution"],
 84 |         "chat_turn": 1,
 85 |         "do_checker": False,
 86 |         "chain_prompt": ""
 87 |     }
 88 | })
 89 | 
 90 | # update phase configs
 91 | PHASE_CONFIGS.update({
 92 |     "auto_feedback_from_code_executionPhase": {
 93 |         "phase_name": "auto_feedback_from_code_executionPhase",
 94 |         "phase_type": "BasePhase",
 95 |         "chains": ["auto_feedback_from_code_executionChain"],
 96 |         "do_summary": False,
 97 |         "do_search": False,
 98 |         "do_doc_retrieval": False,
 99 |         "do_code_retrieval": False,
100 |         "do_tool_retrieval": False,
101 |         "do_using_tool": False
102 |     },
103 | })
104 | 
105 | 
106 | 
107 | 
108 | role_configs = load_role_configs(AGETN_CONFIGS)
109 | chain_configs = load_chain_configs(CHAIN_CONFIGS)
110 | phase_configs = load_phase_configs(PHASE_CONFIGS)
111 | 
112 | agent_module = importlib.import_module("coagent.connector.agents")
113 | 
114 | # 
115 | phase_name = "auto_feedback_from_code_executionPhase"
116 | phase = BasePhase(phase_name, task = None,
117 |             base_phase_config= PHASE_CONFIGS,
118 |             base_chain_config= CHAIN_CONFIGS,
119 |             base_role_config= AGETN_CONFIGS,
120 |             )
121 | 
122 | # round-1
123 | query_content = """
124 | Find arxiv papers that show how are people studying trust calibration in AI based systems 
125 | """
126 | query = Message(
127 |     role_name="human", role_type="user", 
128 |     role_content=query_content, input_query=query_content, origin_query=query_content,
129 |     code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher"
130 |     )
131 | 
132 | output_message1, _ = phase.step(query)
133 | 
134 | 
135 | # 重复auto_gen 其余task即可
136 | task2 = "analyze the above the results to list the application domains studied by these papers "
137 | 
138 | task3 = """Use this data to generate a bar chart of domains and number of papers in that domain and save to a file 
139 | """
140 | 
141 | task4 = """Reflect on the sequence and create a recipe containing all the steps 
142 | necessary and name for it. Suggest well-documented, generalized python function(s)
143 |  to perform similar tasks for coding steps in future. Make sure coding steps and 
144 |  non-coding steps are never mixed in one function. In the docstr of the function(s),
145 |  clarify what non-coding steps are needed to use the language skill of the assistant.
146 | """


--------------------------------------------------------------------------------
/examples/auto_examples/agentchat_web_info.py:
--------------------------------------------------------------------------------
  1 | import os, sys, requests
  2 | 
  3 | src_dir = os.path.join(
  4 |     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  5 | )
  6 | sys.path.append(src_dir)
  7 | 
  8 | # from configs.model_config import *
  9 | from coagent.connector.phase import BasePhase
 10 | from coagent.connector.agents import BaseAgent
 11 | from coagent.connector.chains import BaseChain
 12 | from coagent.connector.schema import (
 13 |     Message, Memory, load_role_configs, load_phase_configs, load_chain_configs
 14 |     )
 15 | from coagent.connector.configs import AGETN_CONFIGS, CHAIN_CONFIGS, PHASE_CONFIGS
 16 | from coagent.connector.utils import parse_section
 17 | import importlib
 18 | 
 19 | 
 20 | # update new agent configs
 21 | auto_feedback_from_code_execution_PROMPT = """#### Code React Assistance Guidance
 22 | 
 23 | You are a helpful AI assistant. Solve tasks using your coding and language skills.
 24 | In the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.
 25 |     1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time, check the operating system. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.
 26 |     2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.
 27 | Solve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.
 28 | When using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can't modify your code. So do not suggest incomplete code which requires users to modify. Don't use a code block if it's not intended to be executed by the user.
 29 | If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
 30 | When you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.
 31 | Reply "stopped" in the end when everything is done.
 32 | 
 33 | #### Response Process
 34 | 
 35 | **Question:** First, clarify the problem to be solved.
 36 | 
 37 | **Thoughts:** Based on the question and observations above, provide the plan for executing this step.
 38 | 
 39 | **Action Status:** Set to 'stopped' or 'code_executing'. If it's 'stopped', the action is to provide the final answer to the original question. If it's 'code_executing', the action is to write the code.
 40 | 
 41 | **Action:** 
 42 | ```python
 43 | # Write your code here
 44 | import os
 45 | ...
 46 | ```
 47 | 
 48 | **Observation:** Check the results and effects of the executed code.
 49 | 
 50 | ... (Repeat this Thoughts/Action/Observation cycle as needed)
 51 | 
 52 | **Thoughts:** I now know the final answer
 53 | 
 54 | **Action Status:** stopped
 55 | 
 56 | **Action:** The final answer to the original input question
 57 | 
 58 | 
 59 | """
 60 | 
 61 | 
 62 | AGETN_CONFIGS.update({
 63 |     "auto_feedback_from_code_execution": {
 64 |         "role": {
 65 |             "role_prompt": auto_feedback_from_code_execution_PROMPT,
 66 |             "role_type": "assistant",
 67 |             "role_name": "auto_feedback_from_code_execution",
 68 |             "role_desc": "",
 69 |             "agent_type": "ReactAgent"
 70 |             # "agent_type": "BaseAgent"
 71 |         },
 72 |         "chat_turn": 5,
 73 |         "stop": "\n**Observation:**",
 74 |         "focus_agents": [],
 75 |         "focus_message_keys": [],
 76 |     },
 77 | })
 78 | # update new chain configs
 79 | CHAIN_CONFIGS.update({
 80 |     "auto_feedback_from_code_executionChain": {
 81 |         "chain_name": "auto_feedback_from_code_executionChain",
 82 |         "chain_type": "BaseChain",
 83 |         "agents": ["auto_feedback_from_code_execution"],
 84 |         "chat_turn": 1,
 85 |         "do_checker": False,
 86 |         "chain_prompt": ""
 87 |     }
 88 | })
 89 | 
 90 | # update phase configs
 91 | PHASE_CONFIGS.update({
 92 |     "auto_feedback_from_code_executionPhase": {
 93 |         "phase_name": "auto_feedback_from_code_executionPhase",
 94 |         "phase_type": "BasePhase",
 95 |         "chains": ["auto_feedback_from_code_executionChain"],
 96 |         "do_summary": False,
 97 |         "do_search": False,
 98 |         "do_doc_retrieval": False,
 99 |         "do_code_retrieval": False,
100 |         "do_tool_retrieval": False,
101 |         "do_using_tool": False
102 |     },
103 | })
104 | 
105 | 
106 | 
107 | 
108 | role_configs = load_role_configs(AGETN_CONFIGS)
109 | chain_configs = load_chain_configs(CHAIN_CONFIGS)
110 | phase_configs = load_phase_configs(PHASE_CONFIGS)
111 | 
112 | agent_module = importlib.import_module("coagent.connector.agents")
113 | 
114 | # 
115 | phase_name = "auto_feedback_from_code_executionPhase"
116 | phase = BasePhase(phase_name,
117 |             task = None,
118 |             base_phase_config = PHASE_CONFIGS,
119 |             base_chain_config = CHAIN_CONFIGS,
120 |             base_role_config = AGETN_CONFIGS,
121 |             )
122 | 
123 | # # round-1
124 | # query_content = """Reply TERMINATE if the task has been solved at full satisfaction.
125 | # Otherwise, reply CONTINUE, or the reason why the task is not solved yet."""
126 | # query = Message(
127 | #     role_name="human", role_type="user", 
128 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
129 | #     code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher"
130 | #     )
131 | 
132 | # output_message1, _ = phase.step(query)
133 | 
134 | # round-2
135 | # query_content = """Show me the YTD gain of 10 largest technology companies as of today."""
136 | # query = Message(
137 | #     role_name="human", role_type="user", 
138 | #     role_content=query_content, input_query=query_content, origin_query=query_content,
139 | #     code_engine_name="client", score_threshold=1.0, top_k=3, cb_search_type="cypher"
140 | #     )
141 | 
142 | # output_message1, _ = phase.step(query)
143 | 
144 | 


--------------------------------------------------------------------------------
/examples/gptq.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass, field
  2 | import os
  3 | from os.path import isdir, isfile
  4 | from pathlib import Path
  5 | import sys
  6 | 
  7 | from transformers import AutoTokenizer
  8 | 
  9 | 
 10 | @dataclass
 11 | class GptqConfig:
 12 |     ckpt: str = field(
 13 |         default=None,
 14 |         metadata={
 15 |             "help": "Load quantized model. The path to the local GPTQ checkpoint."
 16 |         },
 17 |     )
 18 |     wbits: int = field(default=16, metadata={"help": "#bits to use for quantization"})
 19 |     groupsize: int = field(
 20 |         default=-1,
 21 |         metadata={"help": "Groupsize to use for quantization; default uses full row."},
 22 |     )
 23 |     act_order: bool = field(
 24 |         default=True,
 25 |         metadata={"help": "Whether to apply the activation order GPTQ heuristic"},
 26 |     )
 27 | 
 28 | 
 29 | def load_quant_by_autogptq(model):
 30 |     # qwen-72b-int4  use these code
 31 |     from modelscope import AutoTokenizer, AutoModelForCausalLM
 32 |     # Note: The default behavior now has injection attack prevention off.
 33 |     tokenizer = AutoTokenizer.from_pretrained(model, revision='master', trust_remote_code=True)
 34 |     model = AutoModelForCausalLM.from_pretrained(
 35 |         model, device_map="auto",
 36 |         trust_remote_code=True
 37 |     ).eval()
 38 |     return model, tokenizer
 39 |     # codellama-34b-int4  use these code
 40 |     # from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
 41 |     # tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, trust_remote_code=True)
 42 |     # model = AutoGPTQForCausalLM.from_quantized(model, inject_fused_attention=False,trust_remote_code=True,
 43 |     #         inject_fused_mlp=False,use_cuda_fp16=True,disable_exllama=False,device_map='auto')
 44 |     # return model, tokenizer
 45 | 
 46 | def load_gptq_quantized(model_name, gptq_config: GptqConfig):
 47 |     print("Loading GPTQ quantized model...")
 48 |     model, tokenizer = load_quant_by_autogptq(model_name)
 49 |     return model, tokenizer
 50 | 
 51 | 
 52 | # def load_gptq_quantized(model_name, gptq_config: GptqConfig):
 53 | #     print("Loading GPTQ quantized model...")
 54 | 
 55 | #     try:
 56 | #         script_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 57 | #         module_path = os.path.join(script_path, "repositories/GPTQ-for-LLaMa")
 58 | 
 59 | #         sys.path.insert(0, module_path)
 60 | #         from llama import load_quant
 61 | #     except ImportError as e:
 62 | #         print(f"Error: Failed to load GPTQ-for-LLaMa. {e}")
 63 | #         print("See https://github.com/lm-sys/FastChat/blob/main/docs/gptq.md")
 64 | #         sys.exit(-1)
 65 | 
 66 | #     tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
 67 | #     # only `fastest-inference-4bit` branch cares about `act_order`
 68 | #     if gptq_config.act_order:
 69 | #         model = load_quant(
 70 | #             model_name,
 71 | #             find_gptq_ckpt(gptq_config),
 72 | #             gptq_config.wbits,
 73 | #             gptq_config.groupsize,
 74 | #             act_order=gptq_config.act_order,
 75 | #         )
 76 | #     else:
 77 | #         # other branches
 78 | #         model = load_quant(
 79 | #             model_name,
 80 | #             find_gptq_ckpt(gptq_config),
 81 | #             gptq_config.wbits,
 82 | #             gptq_config.groupsize,
 83 | #         )
 84 | 
 85 | #     return model, tokenizer
 86 | 
 87 | 
 88 | def find_gptq_ckpt(gptq_config: GptqConfig):
 89 |     if Path(gptq_config.ckpt).is_file():
 90 |         return gptq_config.ckpt
 91 | 
 92 |     # for ext in ["*.pt", "*.safetensors",]:
 93 |     for ext in ["*.pt", "*.bin",]:
 94 |         matched_result = sorted(Path(gptq_config.ckpt).glob(ext))
 95 |         if len(matched_result) > 0:
 96 |             return str(matched_result[-1])
 97 | 
 98 |     print("Error: gptq checkpoint not found")
 99 |     sys.exit(1)
100 | 


--------------------------------------------------------------------------------
/examples/model_workers/SparkApi.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import datetime
 3 | import hashlib
 4 | import hmac
 5 | from urllib.parse import urlparse
 6 | from datetime import datetime
 7 | from time import mktime
 8 | from urllib.parse import urlencode
 9 | from wsgiref.handlers import format_date_time
10 | 
11 | 
12 | class Ws_Param(object):
13 |     # 初始化
14 |     def __init__(self, APPID, APIKey, APISecret, Spark_url):
15 |         self.APPID = APPID
16 |         self.APIKey = APIKey
17 |         self.APISecret = APISecret
18 |         self.host = urlparse(Spark_url).netloc
19 |         self.path = urlparse(Spark_url).path
20 |         self.Spark_url = Spark_url
21 | 
22 |     # 生成url
23 |     def create_url(self):
24 |         # 生成RFC1123格式的时间戳
25 |         now = datetime.now()
26 |         date = format_date_time(mktime(now.timetuple()))
27 | 
28 |         # 拼接字符串
29 |         signature_origin = "host: " + self.host + "\n"
30 |         signature_origin += "date: " + date + "\n"
31 |         signature_origin += "GET " + self.path + " HTTP/1.1"
32 | 
33 |         # 进行hmac-sha256进行加密
34 |         signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
35 |                                  digestmod=hashlib.sha256).digest()
36 | 
37 |         signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
38 | 
39 |         authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
40 | 
41 |         authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
42 | 
43 |         # 将请求的鉴权参数组合为字典
44 |         v = {
45 |             "authorization": authorization,
46 |             "date": date,
47 |             "host": self.host
48 |         }
49 |         # 拼接鉴权参数，生成url
50 |         url = self.Spark_url + '?' + urlencode(v)
51 |         # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
52 |         return url
53 | 
54 | 
55 | def gen_params(appid, domain, question, temperature, max_token):
56 |     """
57 |     通过appid和用户的提问来生成请参数
58 |     """
59 |     data = {
60 |         "header": {
61 |             "app_id": appid,
62 |             "uid": "1234"
63 |         },
64 |         "parameter": {
65 |             "chat": {
66 |                 "domain": domain,
67 |                 "random_threshold": 0.5,
68 |                 "max_tokens": max_token,
69 |                 "auditing": "default",
70 |                 "temperature": temperature,
71 |             }
72 |         },
73 |         "payload": {
74 |             "message": {
75 |                 "text": question
76 |             }
77 |         }
78 |     }
79 |     return data
80 | 


--------------------------------------------------------------------------------
/examples/model_workers/__init__.py:
--------------------------------------------------------------------------------
 1 | ############################# Attention ########################
 2 | 
 3 | # The Code in model workers all copied from 
 4 | # https://github.com/chatchat-space/Langchain-Chatchat/blob/master/server/model_workers
 5 | 
 6 | #################################################################
 7 | 
 8 | from .base import *
 9 | from .zhipu import ChatGLMWorker
10 | from .minimax import MiniMaxWorker
11 | from .xinghuo import XingHuoWorker
12 | from .qianfan import QianFanWorker
13 | from .fangzhou import FangZhouWorker
14 | from .qwen import QwenWorker
15 | from .baichuan import BaiChuanWorker
16 | from .azure import AzureWorker
17 | from .tiangong import TianGongWorker
18 | from .openai import ExampleWorker
19 | 
20 | 
21 | IMPORT_MODEL_WORKERS = [
22 |     ChatGLMWorker, MiniMaxWorker, XingHuoWorker, QianFanWorker, FangZhouWorker,
23 |     QwenWorker, BaiChuanWorker, AzureWorker, TianGongWorker, ExampleWorker
24 | ]
25 | 
26 | MODEL_WORKER_SETS = [tool.__name__ for tool in IMPORT_MODEL_WORKERS]
27 | 
28 | 


--------------------------------------------------------------------------------
/examples/model_workers/azure.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from fastchat.conversation import Conversation
 3 | from .base import *
 4 | # from server.utils import get_httpx_client
 5 | from fastchat import conversation as conv
 6 | import json, os
 7 | from typing import List, Dict
 8 | from loguru import logger
 9 | # from configs import logger, log_verbose
10 | log_verbose = os.environ.get("log_verbose", False)
11 | 
12 | 
13 | class AzureWorker(ApiModelWorker):
14 |     def __init__(
15 |             self,
16 |             *,
17 |             controller_addr: str = None,
18 |             worker_addr: str = None,
19 |             model_names: List[str] = ["azure-api"],
20 |             version: str = "gpt-35-turbo",
21 |             **kwargs,
22 |     ):
23 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
24 |         kwargs.setdefault("context_len", 8000) #TODO 16K模型需要改成16384
25 |         super().__init__(**kwargs)
26 |         self.version = version
27 | 
28 |     def do_chat(self, params: ApiChatParams) -> Dict:
29 |         params.load_config(self.model_names[0])
30 |         data = dict(
31 |             messages=params.messages,
32 |             temperature=params.temperature,
33 |             max_tokens=params.max_tokens,
34 |             stream=True,
35 |         )
36 |         url = ("https://{}.openai.azure.com/openai/deployments/{}/chat/completions?api-version={}"
37 |                .format(params.resource_name, params.deployment_name, params.api_version))
38 |         headers = {
39 |             'Content-Type': 'application/json',
40 |             'Accept': 'application/json',
41 |             'api-key': params.api_key,
42 |         }
43 | 
44 |         text = ""
45 |         if log_verbose:
46 |             logger.info(f'{self.__class__.__name__}:url: {url}')
47 |             logger.info(f'{self.__class__.__name__}:headers: {headers}')
48 |             logger.info(f'{self.__class__.__name__}:data: {data}')
49 | 
50 |         with get_httpx_client() as client:
51 |             with client.stream("POST", url, headers=headers, json=data) as response:
52 |                 for line in response.iter_lines():
53 |                     if not line.strip() or "[DONE]" in line:
54 |                         continue
55 |                     if line.startswith("data: "):
56 |                         line = line[6:]
57 |                     resp = json.loads(line)
58 |                     if choices := resp["choices"]:
59 |                         if chunk := choices[0].get("delta", {}).get("content"):
60 |                             text += chunk
61 |                             yield {
62 |                                     "error_code": 0,
63 |                                     "text": text
64 |                                 }
65 |                     else:
66 |                         self.logger.error(f"请求 Azure API 时发生错误：{resp}")
67 | 
68 |     def get_embeddings(self, params):
69 |         # TODO: 支持embeddings
70 |         print("embedding")
71 |         print(params)
72 | 
73 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
74 |         # TODO: 确认模板是否需要修改
75 |         return conv.Conversation(
76 |             name=self.model_names[0],
77 |             system_message="You are a helpful, respectful and honest assistant.",
78 |             messages=[],
79 |             roles=["user", "assistant"],
80 |             sep="\n### ",
81 |             stop_str="###",
82 |         )
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     import uvicorn
87 |     from server.utils import MakeFastAPIOffline
88 |     from fastchat.serve.base_model_worker import app
89 | 
90 |     worker = AzureWorker(
91 |         controller_addr="http://127.0.0.1:20001",
92 |         worker_addr="http://127.0.0.1:21008",
93 |     )
94 |     sys.modules["fastchat.serve.model_worker"].worker = worker
95 |     MakeFastAPIOffline(app)
96 |     uvicorn.run(app, port=21008)
97 | 


--------------------------------------------------------------------------------
/examples/model_workers/baichuan.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import time
  3 | import hashlib
  4 | 
  5 | from fastchat.conversation import Conversation
  6 | from .base import *
  7 | # from server.utils import get_httpx_client
  8 | from fastchat import conversation as conv
  9 | import sys, os
 10 | import json
 11 | from typing import List, Literal, Dict
 12 | from loguru import logger
 13 | # from configs import logger, log_verbose
 14 | log_verbose = os.environ.get("log_verbose", False)
 15 | 
 16 | def calculate_md5(input_string):
 17 |     md5 = hashlib.md5()
 18 |     md5.update(input_string.encode('utf-8'))
 19 |     encrypted = md5.hexdigest()
 20 |     return encrypted
 21 | 
 22 | 
 23 | class BaiChuanWorker(ApiModelWorker):
 24 |     def __init__(
 25 |         self,
 26 |         *,
 27 |         controller_addr: str = None,
 28 |         worker_addr: str = None,
 29 |         model_names: List[str] = ["baichuan-api"],
 30 |         version: Literal["Baichuan2-53B"] = "Baichuan2-53B",
 31 |         **kwargs,
 32 |     ):
 33 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
 34 |         kwargs.setdefault("context_len", 32768)
 35 |         super().__init__(**kwargs)
 36 |         self.version = version
 37 | 
 38 |     def do_chat(self, params: ApiChatParams) -> Dict:
 39 |         params.load_config(self.model_names[0])
 40 | 
 41 |         url = "https://api.baichuan-ai.com/v1/stream/chat"
 42 |         data = {
 43 |             "model": params.version,
 44 |             "messages": params.messages,
 45 |             "parameters": {"temperature": params.temperature}
 46 |         }
 47 | 
 48 |         json_data = json.dumps(data)
 49 |         time_stamp = int(time.time())
 50 |         signature = calculate_md5(params.secret_key + json_data + str(time_stamp))
 51 |         headers = {
 52 |             "Content-Type": "application/json",
 53 |             "Authorization": "Bearer " + params.api_key,
 54 |             "X-BC-Request-Id": "your requestId",
 55 |             "X-BC-Timestamp": str(time_stamp),
 56 |             "X-BC-Signature": signature,
 57 |             "X-BC-Sign-Algo": "MD5",
 58 |         }
 59 | 
 60 |         text = ""
 61 |         if log_verbose:
 62 |             logger.info(f'{self.__class__.__name__}:json_data: {json_data}')
 63 |             logger.info(f'{self.__class__.__name__}:url: {url}')
 64 |             logger.info(f'{self.__class__.__name__}:headers: {headers}')
 65 | 
 66 |         with get_httpx_client() as client:
 67 |             with client.stream("POST", url, headers=headers, json=data) as response:
 68 |                 for line in response.iter_lines():
 69 |                     if not line.strip():
 70 |                         continue
 71 |                     resp = json.loads(line)
 72 |                     if resp["code"] == 0:
 73 |                         text += resp["data"]["messages"][-1]["content"]
 74 |                         yield {
 75 |                             "error_code": resp["code"],
 76 |                             "text": text
 77 |                             }
 78 |                     else:
 79 |                         data = {
 80 |                             "error_code": resp["code"],
 81 |                             "text": resp["msg"],
 82 |                             "error": {
 83 |                                 "message": resp["msg"],
 84 |                                 "type": "invalid_request_error",
 85 |                                 "param": None,
 86 |                                 "code": None,
 87 |                             }
 88 |                         }
 89 |                         self.logger.error(f"请求百川 API 时发生错误：{data}")
 90 |                         yield data
 91 | 
 92 |     def get_embeddings(self, params):
 93 |         # TODO: 支持embeddings
 94 |         print("embedding")
 95 |         print(params)
 96 | 
 97 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
 98 |         # TODO: 确认模板是否需要修改
 99 |         return conv.Conversation(
100 |             name=self.model_names[0],
101 |             system_message="",
102 |             messages=[],
103 |             roles=["user", "assistant"],
104 |             sep="\n### ",
105 |             stop_str="###",
106 |         )
107 | 
108 | 
109 | if __name__ == "__main__":
110 |     import uvicorn
111 |     from server.utils import MakeFastAPIOffline
112 |     from fastchat.serve.model_worker import app
113 | 
114 |     worker = BaiChuanWorker(
115 |         controller_addr="http://127.0.0.1:20001",
116 |         worker_addr="http://127.0.0.1:21007",
117 |     )
118 |     sys.modules["fastchat.serve.model_worker"].worker = worker
119 |     MakeFastAPIOffline(app)
120 |     uvicorn.run(app, port=21007)
121 |     # do_request()
122 | 


--------------------------------------------------------------------------------
/examples/model_workers/fangzhou.py:
--------------------------------------------------------------------------------
  1 | from fastchat.conversation import Conversation
  2 | from .base import *
  3 | from fastchat import conversation as conv
  4 | import sys, os
  5 | from typing import List, Literal, Dict
  6 | from loguru import logger
  7 | # from configs import logger, log_verbose
  8 | log_verbose = os.environ.get("log_verbose", False)
  9 | 
 10 | 
 11 | class FangZhouWorker(ApiModelWorker):
 12 |     """
 13 |     火山方舟
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         *,
 19 |         model_names: List[str] = ["fangzhou-api"],
 20 |         controller_addr: str = None,
 21 |         worker_addr: str = None,
 22 |         version: Literal["chatglm-6b-model"] = "chatglm-6b-model",
 23 |         **kwargs,
 24 |     ):
 25 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
 26 |         kwargs.setdefault("context_len", 16384) # TODO: 不同的模型有不同的大小
 27 |         super().__init__(**kwargs)
 28 |         self.version = version
 29 | 
 30 |     def do_chat(self, params: ApiChatParams) -> Dict:
 31 |         from volcengine.maas import MaasService
 32 | 
 33 |         params.load_config(self.model_names[0])
 34 |         maas = MaasService('maas-api.ml-platform-cn-beijing.volces.com', 'cn-beijing')
 35 |         maas.set_ak(params.api_key)
 36 |         maas.set_sk(params.secret_key)
 37 | 
 38 |         # document: "https://www.volcengine.com/docs/82379/1099475"
 39 |         req = {
 40 |             "model": {
 41 |                 "name": params.version,
 42 |             },
 43 |             "parameters": {
 44 |                 # 这里的参数仅为示例，具体可用的参数请参考具体模型的 API 说明
 45 |                 "max_new_tokens": params.max_tokens,
 46 |                 "temperature": params.temperature,
 47 |             },
 48 |             "messages": params.messages,
 49 |         }
 50 | 
 51 |         text = ""
 52 |         if log_verbose:
 53 |             self.logger.info(f'{self.__class__.__name__}:maas: {maas}')
 54 |         for resp in maas.stream_chat(req):
 55 |             if error := resp.error:
 56 |                 if error.code_n > 0:
 57 |                     data = {
 58 |                             "error_code": error.code_n,
 59 |                             "text": error.message,
 60 |                             "error": {
 61 |                                 "message": error.message,
 62 |                                 "type": "invalid_request_error",
 63 |                                 "param": None,
 64 |                                 "code": None,
 65 |                             }
 66 |                         }
 67 |                     self.logger.error(f"请求方舟 API 时发生错误：{data}")
 68 |                     yield data
 69 |                 elif chunk := resp.choice.message.content:
 70 |                     text += chunk
 71 |                     yield {"error_code": 0, "text": text}
 72 |             else:
 73 |                 data = {
 74 |                     "error_code": 500,
 75 |                     "text": f"请求方舟 API 时发生未知的错误: {resp}"
 76 |                 }
 77 |                 self.logger.error(data)
 78 |                 yield data
 79 |                 break
 80 | 
 81 |     def get_embeddings(self, params):
 82 |         # TODO: 支持embeddings
 83 |         print("embedding")
 84 |         print(params)
 85 | 
 86 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
 87 |         return conv.Conversation(
 88 |             name=self.model_names[0],
 89 |             system_message="你是一个聪明、对人类有帮助的人工智能，你可以对人类提出的问题给出有用、详细、礼貌的回答。",
 90 |             messages=[],
 91 |             roles=["user", "assistant", "system"],
 92 |             sep="\n### ",
 93 |             stop_str="###",
 94 |         )
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     import uvicorn
 99 |     from server.utils import MakeFastAPIOffline
100 |     from fastchat.serve.model_worker import app
101 | 
102 |     worker = FangZhouWorker(
103 |         controller_addr="http://127.0.0.1:20001",
104 |         worker_addr="http://127.0.0.1:21005",
105 |     )
106 |     sys.modules["fastchat.serve.model_worker"].worker = worker
107 |     MakeFastAPIOffline(app)
108 |     uvicorn.run(app, port=21005)
109 | 


--------------------------------------------------------------------------------
/examples/model_workers/openai.py:
--------------------------------------------------------------------------------
 1 | import sys, os
 2 | from fastchat.conversation import Conversation
 3 | from .base import *
 4 | from fastchat import conversation as conv
 5 | import json
 6 | from typing import List, Dict
 7 | from loguru import logger
 8 | # from configs import logger, log_verbose
 9 | log_verbose = os.environ.get("log_verbose", False)
10 | import openai
11 | 
12 | from langchain import PromptTemplate, LLMChain
13 | from langchain.prompts.chat import ChatPromptTemplate
14 | from langchain.chat_models import ChatOpenAI
15 | from langchain.schema import HumanMessage
16 | 
17 | 
18 | class ExampleWorker(ApiModelWorker):
19 |     def __init__(
20 |             self,
21 |             *,
22 |             controller_addr: str = None,
23 |             worker_addr: str = None,
24 |             model_names: List[str] = ["gpt-3.5-turbo"],
25 |             version: str = "gpt-3.5",
26 |             **kwargs,
27 |     ):
28 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
29 |         kwargs.setdefault("context_len", 16384) #TODO 16K模型需要改成16384
30 |         super().__init__(**kwargs)
31 |         self.version = version
32 | 
33 |     def do_chat(self, params: ApiChatParams) -> Dict:
34 |         '''
35 |         yield output: {"error_code": 0, "text": ""}
36 |         '''
37 |         params.load_config(self.model_names[0])
38 |         openai.api_key = params.api_key
39 |         openai.api_base = params.api_base_url
40 | 
41 |         logger.error(f"{params.api_key}, {params.api_base_url}, {params.messages} {params.max_tokens},")
42 |         # just for example
43 |         prompt = "\n".join([f"{m['role']}:{m['content']}" for m in params.messages])
44 |         logger.error(f"{prompt}, {params.temperature}, {params.max_tokens}")
45 |         try:
46 |             model = ChatOpenAI(
47 |                 streaming=True,
48 |                 verbose=True,
49 |                 openai_api_key= params.api_key,
50 |                 openai_api_base=params.api_base_url,
51 |                 model_name=params.version
52 |             )
53 |             chat_prompt = ChatPromptTemplate.from_messages([("human", "{input}")])
54 |             chain = LLMChain(prompt=chat_prompt, llm=model)
55 |             content = chain({"input": prompt})
56 |             logger.info(content)
57 |         except Exception as e:
58 |             logger.error(f"{e}")
59 |             yield {"error_code": 500, "text": "request error"}
60 | 
61 |         # return the text by yield for stream
62 |         try:
63 |             yield {"error_code": 0, "text": content["text"]}
64 |         except:
65 |             yield {"error_code": 500, "text": "request error"}
66 | 
67 |     def get_embeddings(self, params):
68 |         # TODO: 支持embeddings
69 |         print("embedding")
70 |         print(params)
71 | 
72 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
73 |         # TODO: 确认模板是否需要修改
74 |         return conv.Conversation(
75 |             name=self.model_names[0],
76 |             system_message="You are a helpful, respectful and honest assistant.",
77 |             messages=[],
78 |             roles=["user", "assistant", "system"],
79 |             sep="\n### ",
80 |             stop_str="###",
81 |         )
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     import uvicorn
86 |     from coagent.utils.server_utils import MakeFastAPIOffline
87 |     from fastchat.serve.base_model_worker import app
88 | 
89 |     worker = ExampleWorker(
90 |         controller_addr="http://127.0.0.1:20001",
91 |         worker_addr="http://127.0.0.1:21008",
92 |     )
93 |     sys.modules["fastchat.serve.model_worker"].worker = worker
94 |     uvicorn.run(app, port=21008)
95 | 


--------------------------------------------------------------------------------
/examples/model_workers/qwen.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import sys
  3 | import os
  4 | from fastchat.conversation import Conversation
  5 | from http import HTTPStatus
  6 | from typing import List, Literal, Dict
  7 | 
  8 | from fastchat import conversation as conv
  9 | from .base import *
 10 | from loguru import logger
 11 | # from configs import logger, log_verbose
 12 | log_verbose = os.environ.get("log_verbose", False)
 13 | 
 14 | 
 15 | class QwenWorker(ApiModelWorker):
 16 |     DEFAULT_EMBED_MODEL = "text-embedding-v1"
 17 | 
 18 |     def __init__(
 19 |         self,
 20 |         *,
 21 |         version: Literal["qwen-turbo", "qwen-plus"] = "qwen-turbo",
 22 |         model_names: List[str] = ["qwen-api"],
 23 |         controller_addr: str = None,
 24 |         worker_addr: str = None,
 25 |         **kwargs,
 26 |     ):
 27 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
 28 |         kwargs.setdefault("context_len", 16384)
 29 |         super().__init__(**kwargs)
 30 |         self.version = version
 31 | 
 32 |     def do_chat(self, params: ApiChatParams) -> Dict:
 33 |         import dashscope
 34 |         params.load_config(self.model_names[0])
 35 |         if log_verbose:
 36 |             logger.info(f'{self.__class__.__name__}:params: {params}')
 37 | 
 38 |         gen = dashscope.Generation()
 39 |         responses = gen.call(
 40 |             model=params.version,
 41 |             temperature=params.temperature,
 42 |             api_key=params.api_key,
 43 |             messages=params.messages,
 44 |             result_format='message',  # set the result is message format.
 45 |             stream=True,
 46 |         )
 47 | 
 48 |         for resp in responses:
 49 |             if resp["status_code"] == 200:
 50 |                 if choices := resp["output"]["choices"]:
 51 |                     yield {
 52 |                         "error_code": 0,
 53 |                         "text": choices[0]["message"]["content"],
 54 |                     }
 55 |             else:
 56 |                 data = {
 57 |                     "error_code": resp["status_code"],
 58 |                     "text": resp["message"],
 59 |                     "error": {
 60 |                         "message": resp["message"],
 61 |                         "type": "invalid_request_error",
 62 |                         "param": None,
 63 |                         "code": None,
 64 |                     }
 65 |                 }
 66 |                 self.logger.error(f"请求千问 API 时发生错误：{data}")
 67 |                 yield data
 68 | 
 69 |     def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
 70 |         import dashscope
 71 |         params.load_config(self.model_names[0])
 72 |         if log_verbose:
 73 |             logger.info(f'{self.__class__.__name__}:params: {params}')
 74 |         result = []
 75 |         i = 0
 76 |         while i < len(params.texts):
 77 |             texts = params.texts[i:i+25]
 78 |             resp = dashscope.TextEmbedding.call(
 79 |                 model=params.embed_model or self.DEFAULT_EMBED_MODEL,
 80 |                 input=texts, # 最大25行
 81 |                 api_key=params.api_key,
 82 |             )
 83 |             if resp["status_code"] != 200:
 84 |                 data = {
 85 |                             "code": resp["status_code"],
 86 |                             "msg": resp.message,
 87 |                             "error": {
 88 |                                 "message": resp["message"],
 89 |                                 "type": "invalid_request_error",
 90 |                                 "param": None,
 91 |                                 "code": None,
 92 |                             }
 93 |                         }
 94 |                 self.logger.error(f"请求千问 API 时发生错误：{data}")
 95 |                 return data
 96 |             else:
 97 |                 embeddings = [x["embedding"] for x in resp["output"]["embeddings"]]
 98 |                 result += embeddings
 99 |             i += 25
100 |         return {"code": 200, "data": result}
101 | 
102 |     def get_embeddings(self, params):
103 |         # TODO: 支持embeddings
104 |         print("embedding")
105 |         print(params)
106 | 
107 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
108 |         # TODO: 确认模板是否需要修改
109 |         return conv.Conversation(
110 |             name=self.model_names[0],
111 |             system_message="你是一个聪明、对人类有帮助的人工智能，你可以对人类提出的问题给出有用、详细、礼貌的回答。",
112 |             messages=[],
113 |             roles=["user", "assistant", "system"],
114 |             sep="\n### ",
115 |             stop_str="###",
116 |         )
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     import uvicorn
121 |     from server.utils import MakeFastAPIOffline
122 |     from fastchat.serve.model_worker import app
123 | 
124 |     worker = QwenWorker(
125 |         controller_addr="http://127.0.0.1:20001",
126 |         worker_addr="http://127.0.0.1:20007",
127 |     )
128 |     sys.modules["fastchat.serve.model_worker"].worker = worker
129 |     MakeFastAPIOffline(app)
130 |     uvicorn.run(app, port=20007)
131 | 


--------------------------------------------------------------------------------
/examples/model_workers/tiangong.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | import hashlib
 4 | 
 5 | from fastchat.conversation import Conversation
 6 | from .base import *
 7 | from fastchat import conversation as conv
 8 | import json
 9 | from typing import List, Literal, Dict
10 | import requests
11 | 
12 | 
13 | 
14 | class TianGongWorker(ApiModelWorker):
15 |     def __init__(
16 |         self,
17 |         *,
18 |         controller_addr: str = None,
19 |         worker_addr: str = None,
20 |         model_names: List[str] = ["tiangong-api"],
21 |         version: Literal["SkyChat-MegaVerse"] = "SkyChat-MegaVerse",
22 |         **kwargs,
23 |     ):
24 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
25 |         kwargs.setdefault("context_len", 32768)
26 |         super().__init__(**kwargs)
27 |         self.version = version
28 | 
29 |     def do_chat(self, params: ApiChatParams) -> Dict:
30 |         params.load_config(self.model_names[0])
31 | 
32 |         url = 'https://sky-api.singularity-ai.com/saas/api/v4/generate'
33 |         data = {
34 |             "messages": params.messages,
35 |             "model": "SkyChat-MegaVerse"
36 |         }       
37 |         timestamp = str(int(time.time()))     
38 |         sign_content = params.api_key + params.secret_key + timestamp    
39 |         sign_result = hashlib.md5(sign_content.encode('utf-8')).hexdigest() 
40 |         headers={
41 |             "app_key": params.api_key,
42 |             "timestamp": timestamp,
43 |             "sign": sign_result,
44 |             "Content-Type": "application/json",
45 |             "stream": "true" # or change to "false" 不处理流式返回内容
46 |         }
47 |         
48 |         # 发起请求并获取响应
49 |         response = requests.post(url, headers=headers, json=data, stream=True)
50 | 
51 |         text = ""
52 |         # 处理响应流
53 |         for line in response.iter_lines(chunk_size=None, decode_unicode=True):
54 |             if line:
55 |                 # 处理接收到的数据
56 |                 # print(line.decode('utf-8'))
57 |                 resp = json.loads(line)
58 |                 if resp["code"] == 200:                   
59 |                     text += resp['resp_data']['reply']
60 |                     yield {
61 |                         "error_code": 0,
62 |                         "text": text
63 |                         }
64 |                 else:
65 |                     data = {
66 |                         "error_code": resp["code"],
67 |                         "text": resp["code_msg"]
68 |                         }
69 |                     self.logger.error(f"请求天工 API 时出错：{data}")
70 |                     yield data
71 | 
72 |     def get_embeddings(self, params):
73 |         # TODO: 支持embeddings
74 |         print("embedding")
75 |         print(params)
76 | 
77 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
78 |         # TODO: 确认模板是否需要修改
79 |         return conv.Conversation(
80 |             name=self.model_names[0],
81 |             system_message="",
82 |             messages=[],
83 |             roles=["user", "system"],
84 |             sep="\n### ",
85 |             stop_str="###",
86 |         )
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/examples/model_workers/xinghuo.py:
--------------------------------------------------------------------------------
  1 | from fastchat.conversation import Conversation
  2 | from .base import *
  3 | from fastchat import conversation as conv
  4 | import sys
  5 | import json
  6 | from model_workers import SparkApi
  7 | import websockets
  8 | from muagent.utils.server_utils import run_async, iter_over_async
  9 | from typing import List, Dict
 10 | import asyncio
 11 | 
 12 | 
 13 | 
 14 | async def request(appid, api_key, api_secret, Spark_url, domain, question, temperature, max_token):
 15 |     wsParam = SparkApi.Ws_Param(appid, api_key, api_secret, Spark_url)
 16 |     wsUrl = wsParam.create_url()
 17 |     data = SparkApi.gen_params(appid, domain, question, temperature, max_token)
 18 |     print(data)
 19 |     async with websockets.connect(wsUrl) as ws:
 20 |         await ws.send(json.dumps(data, ensure_ascii=False))
 21 |         finish = False
 22 |         while not finish:
 23 |             chunk = await ws.recv()
 24 |             response = json.loads(chunk)
 25 |             if response.get("header", {}).get("status") == 2:
 26 |                 finish = True
 27 |             if text := response.get("payload", {}).get("choices", {}).get("text"):
 28 |                 yield text[0]["content"]
 29 | 
 30 | 
 31 | class XingHuoWorker(ApiModelWorker):
 32 |     def __init__(
 33 |             self,
 34 |             *,
 35 |             model_names: List[str] = ["xinghuo-api"],
 36 |             controller_addr: str = None,
 37 |             worker_addr: str = None,
 38 |             version: str = None,
 39 |             **kwargs,
 40 |     ):
 41 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
 42 |         kwargs.setdefault("context_len", 8000) # TODO: V1模型的最大长度为4000，需要自行修改
 43 |         super().__init__(**kwargs)
 44 |         self.version = version
 45 | 
 46 |     def do_chat(self, params: ApiChatParams) -> Dict:
 47 |         # TODO: 当前每次对话都要重新连接websocket，确认是否可以保持连接
 48 |         params.load_config(self.model_names[0])
 49 | 
 50 |         version_mapping = {
 51 |             "v1.5": {"domain": "general", "url": "ws://spark-api.xf-yun.com/v1.1/chat","max_tokens": 4000},
 52 |             "v2.0": {"domain": "generalv2", "url": "ws://spark-api.xf-yun.com/v2.1/chat","max_tokens": 8000},
 53 |             "v3.0": {"domain": "generalv3", "url": "ws://spark-api.xf-yun.com/v3.1/chat","max_tokens": 8000},
 54 |         }
 55 | 
 56 |         def get_version_details(version_key):
 57 |             return version_mapping.get(version_key, {"domain": None, "url": None})
 58 | 
 59 |         details = get_version_details(params.version)
 60 |         domain = details["domain"]
 61 |         Spark_url = details["url"]
 62 |         text = ""
 63 |         try:
 64 |             loop = asyncio.get_event_loop()
 65 |         except:
 66 |             loop = asyncio.new_event_loop()
 67 |         params.max_tokens = min(details["max_tokens"], params.max_tokens or 0)
 68 |         for chunk in iter_over_async(
 69 |                 request(params.APPID, params.api_key, params.APISecret, Spark_url, domain, params.messages,
 70 |                         params.temperature, params.max_tokens),
 71 |                 loop=loop,
 72 |         ):
 73 |             if chunk:
 74 |                 text += chunk
 75 |                 yield {"error_code": 0, "text": text}
 76 | 
 77 |     def get_embeddings(self, params):
 78 |         # TODO: 支持embeddings
 79 |         print("embedding")
 80 |         print(params)
 81 | 
 82 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
 83 |         # TODO: 确认模板是否需要修改
 84 |         return conv.Conversation(
 85 |             name=self.model_names[0],
 86 |             system_message="你是一个聪明的助手，请根据用户的提示来完成任务",
 87 |             messages=[],
 88 |             roles=["user", "assistant"],
 89 |             sep="\n### ",
 90 |             stop_str="###",
 91 |         )
 92 | 
 93 | 
 94 | if __name__ == "__main__":
 95 |     import uvicorn
 96 |     from server.utils import MakeFastAPIOffline
 97 |     from fastchat.serve.model_worker import app
 98 | 
 99 |     worker = XingHuoWorker(
100 |         controller_addr="http://127.0.0.1:20001",
101 |         worker_addr="http://127.0.0.1:21003",
102 |     )
103 |     sys.modules["fastchat.serve.model_worker"].worker = worker
104 |     MakeFastAPIOffline(app)
105 |     uvicorn.run(app, port=21003)
106 | 


--------------------------------------------------------------------------------
/examples/model_workers/zhipu.py:
--------------------------------------------------------------------------------
  1 | from fastchat.conversation import Conversation
  2 | import os
  3 | from .base import *
  4 | from fastchat import conversation as conv
  5 | import sys
  6 | from typing import List, Dict, Iterator, Literal
  7 | from loguru import logger
  8 | # from configs import logger, log_verbose
  9 | log_verbose = os.environ.get("log_verbose", False)
 10 | 
 11 | class ChatGLMWorker(ApiModelWorker):
 12 |     DEFAULT_EMBED_MODEL = "text_embedding"
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         *,
 17 |         model_names: List[str] = ["zhipu-api"],
 18 |         controller_addr: str = None,
 19 |         worker_addr: str = None,
 20 |         version: Literal["chatglm_turbo"] = "chatglm_turbo",
 21 |         **kwargs,
 22 |     ):
 23 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
 24 |         kwargs.setdefault("context_len", 32768)
 25 |         super().__init__(**kwargs)
 26 |         self.version = version
 27 | 
 28 |     def do_chat(self, params: ApiChatParams) -> Iterator[Dict]:
 29 |         # TODO: 维护request_id
 30 |         import zhipuai
 31 | 
 32 |         params.load_config(self.model_names[0])
 33 |         zhipuai.api_key = params.api_key
 34 | 
 35 |         if log_verbose:
 36 |             logger.info(f'{self.__class__.__name__}:params: {params}')
 37 | 
 38 |         response = zhipuai.model_api.sse_invoke(
 39 |             model=params.version,
 40 |             prompt=params.messages,
 41 |             temperature=params.temperature,
 42 |             top_p=params.top_p,
 43 |             incremental=False,
 44 |         )
 45 |         for e in response.events():
 46 |             if e.event == "add":
 47 |                 yield {"error_code": 0, "text": e.data}
 48 |             elif e.event in ["error", "interrupted"]:
 49 |                 data = {
 50 |                     "error_code": 500,
 51 |                     "text": str(e),
 52 |                     "error": {
 53 |                         "message":  str(e),
 54 |                         "type": "invalid_request_error",
 55 |                         "param": None,
 56 |                         "code": None,
 57 |                     }
 58 |                 }
 59 |                 self.logger.error(f"请求智谱 API 时发生错误：{data}")
 60 |                 yield data
 61 | 
 62 |     def do_embeddings(self, params: ApiEmbeddingsParams) -> Dict:
 63 |         import zhipuai
 64 | 
 65 |         params.load_config(self.model_names[0])
 66 |         zhipuai.api_key = params.api_key
 67 | 
 68 |         embeddings = []
 69 |         try:
 70 |             for t in params.texts:
 71 |                 response = zhipuai.model_api.invoke(model=params.embed_model or self.DEFAULT_EMBED_MODEL, prompt=t)
 72 |                 if response["code"] == 200:
 73 |                     embeddings.append(response["data"]["embedding"])
 74 |                 else:
 75 |                     self.logger.error(f"请求智谱 API 时发生错误：{response}")
 76 |                     return response  # dict with code & msg
 77 |         except Exception as e:
 78 |             self.logger.error(f"请求智谱 API 时发生错误：{data}")
 79 |             data = {"code": 500, "msg": f"对文本向量化时出错：{e}"}
 80 |             return data
 81 | 
 82 |         return {"code": 200, "data": embeddings}
 83 | 
 84 |     def get_embeddings(self, params):
 85 |         # TODO: 支持embeddings
 86 |         print("embedding")
 87 |         # print(params)
 88 | 
 89 |     def make_conv_template(self, conv_template: str = None, model_path: str = None) -> Conversation:
 90 |         # 这里的是chatglm api的模板，其它API的conv_template需要定制
 91 |         return conv.Conversation(
 92 |             name=self.model_names[0],
 93 |             system_message="你是一个聪明的助手，请根据用户的提示来完成任务",
 94 |             messages=[],
 95 |             roles=["Human", "Assistant", "System"],
 96 |             sep="\n###",
 97 |             stop_str="###",
 98 |         )
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     import uvicorn
103 |     from server.utils import MakeFastAPIOffline
104 |     from fastchat.serve.model_worker import app
105 | 
106 |     worker = ChatGLMWorker(
107 |         controller_addr="http://127.0.0.1:20001",
108 |         worker_addr="http://127.0.0.1:21001",
109 |     )
110 |     sys.modules["fastchat.serve.model_worker"].worker = worker
111 |     MakeFastAPIOffline(app)
112 |     uvicorn.run(app, port=21001)
113 | 


--------------------------------------------------------------------------------
/examples/sdfile_api.py:
--------------------------------------------------------------------------------
  1 | import sys, os, json, traceback, uvicorn, argparse
  2 | 
  3 | src_dir = os.path.join(
  4 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  5 | )
  6 | sys.path.append(src_dir)
  7 | 
  8 | from loguru import logger
  9 | 
 10 | from fastapi import FastAPI
 11 | from fastapi.middleware.cors import CORSMiddleware
 12 | from fastapi import File, UploadFile
 13 | 
 14 | from muagent.utils.server_utils import BaseResponse, ListResponse, DataResponse
 15 | from configs.server_config import OPEN_CROSS_DOMAIN, SDFILE_API_SERVER
 16 | from configs.model_config import JUPYTER_WORK_PATH
 17 | 
 18 | 
 19 | VERSION = "v0.1.0"
 20 | 
 21 | async def sd_upload_file(file: UploadFile = File(...), work_dir: str = JUPYTER_WORK_PATH):
 22 |     # 保存上传的文件到服务器
 23 |     try:
 24 |         content = await file.read()
 25 |         with open(os.path.join(work_dir, file.filename), "wb") as f:
 26 |             f.write(content)
 27 |         return {"data": True}
 28 |     except:
 29 |         return {"data": False}
 30 | 
 31 | 
 32 | async def sd_download_file(filename: str, save_filename: str = "filename_to_download.ext", work_dir: str = JUPYTER_WORK_PATH):
 33 |     # 从服务器下载文件
 34 |     logger.debug(f"{os.path.join(work_dir, filename)}")
 35 |     return {"data": os.path.join(work_dir, filename), "filename": save_filename}
 36 |     # return {"data": FileResponse(os.path.join(work_dir, filename), filename=save_filename)}
 37 | 
 38 | 
 39 | async def sd_list_files(work_dir: str = JUPYTER_WORK_PATH):
 40 |     # 去除目录
 41 |     return {"data": os.listdir(work_dir)}
 42 | 
 43 | 
 44 | async def sd_delete_file(filename: str, work_dir: str = JUPYTER_WORK_PATH):
 45 |     # 去除目录
 46 |     try:
 47 |         os.remove(os.path.join(work_dir, filename))
 48 |         return {"data": True}
 49 |     except:
 50 |         return {"data": False}
 51 | 
 52 | 
 53 | def create_app(open_cross_domain, version=VERSION):
 54 |     app = FastAPI(
 55 |         title="DevOps-ChatBot API Server",
 56 |         version=version
 57 |     )
 58 |     # MakeFastAPIOffline(app)
 59 |     # Add CORS middleware to allow all origins
 60 |     # 在config.py中设置OPEN_DOMAIN=True，允许跨域
 61 |     # set OPEN_DOMAIN=True in config.py to allow cross-domain
 62 |     if open_cross_domain:
 63 |     # if OPEN_CROSS_DOMAIN:
 64 |         app.add_middleware(
 65 |             CORSMiddleware,
 66 |             allow_origins=["*"],
 67 |             allow_credentials=True,
 68 |             allow_methods=["*"],
 69 |             allow_headers=["*"],
 70 |         )
 71 | 
 72 |     app.post("/sdfiles/upload",
 73 |             tags=["files upload and download"],
 74 |             response_model=BaseResponse,
 75 |             summary="上传文件到沙盒"
 76 |             )(sd_upload_file)
 77 |     
 78 |     app.get("/sdfiles/download",
 79 |             tags=["files upload and download"],
 80 |             response_model=DataResponse,
 81 |             summary="从沙盒下载文件"
 82 |             )(sd_download_file)
 83 | 
 84 |     app.get("/sdfiles/list",
 85 |             tags=["files upload and download"],
 86 |             response_model=ListResponse,
 87 |             summary="从沙盒工作目录展示文件"
 88 |             )(sd_list_files)
 89 |     
 90 |     app.get("/sdfiles/delete",
 91 |             tags=["files upload and download"],
 92 |             response_model=BaseResponse,
 93 |             summary="从沙盒工作目录中删除文件"
 94 |             )(sd_delete_file)
 95 |     return app
 96 | 
 97 | 
 98 | 
 99 | def run_api(host, port, open_cross_domain, **kwargs):
100 |     app = create_app(open_cross_domain)
101 |     if kwargs.get("ssl_keyfile") and kwargs.get("ssl_certfile"):
102 |         uvicorn.run(app,
103 |                     host=host,
104 |                     port=port,
105 |                     ssl_keyfile=kwargs.get("ssl_keyfile"),
106 |                     ssl_certfile=kwargs.get("ssl_certfile"),
107 |                     )
108 |     else:
109 |         uvicorn.run(app, host=host, port=port)
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     parser = argparse.ArgumentParser(prog='DevOps-ChatBot',
114 |                                      description='About DevOps-ChatBot, local knowledge based LLM with langchain'
115 |                                                  ' ｜ 基于本地知识库的 LLM 问答')
116 |     parser.add_argument("--host", type=str, default="0.0.0.0")
117 |     parser.add_argument("--port", type=int, default="7862")
118 |     # parser.add_argument("--port", type=int, default=SDFILE_API_SERVER["port"])
119 |     parser.add_argument("--open_cross_domain", type=bool, default=False)
120 |     parser.add_argument("--ssl_keyfile", type=str)
121 |     parser.add_argument("--ssl_certfile", type=str)
122 |     # 初始化消息
123 |     args = parser.parse_args()
124 |     args_dict = vars(args)
125 |     run_api(host=args.host,
126 |             port=args.port,
127 |             open_cross_domain=args.open_cross_domain,
128 |             ssl_keyfile=args.ssl_keyfile,
129 |             ssl_certfile=args.ssl_certfile,
130 |             )


--------------------------------------------------------------------------------
/examples/start.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | 
4 | cp ../configs/model_config.py.example ../configs/model_config.py
5 | cp ../configs/server_config.py.example ../configs/server_config.py
6 | 
7 | streamlit run webui_config.py --server.port 8510
8 | 


--------------------------------------------------------------------------------
/examples/stop.py:
--------------------------------------------------------------------------------
 1 | import docker, sys, os
 2 | from loguru import logger
 3 | 
 4 | src_dir = os.path.join(
 5 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 6 | )
 7 | sys.path.append(src_dir)
 8 | 
 9 | from configs.server_config import (
10 |     SANDBOX_CONTRAINER_NAME, CONTRAINER_NAME, SANDBOX_SERVER, DOCKER_SERVICE
11 | )
12 | 
13 | from start import check_docker, check_process
14 | 
15 | try:
16 |     client = docker.from_env()
17 | except:
18 |     client = None
19 | 
20 | 
21 | def stop_main():
22 |     # 
23 |     check_docker(client, SANDBOX_CONTRAINER_NAME, do_stop=True, )
24 |     check_process(f"port={SANDBOX_SERVER['port']}", do_stop=True)
25 |     check_process(f"port=5050", do_stop=True)
26 | 
27 |     # 
28 |     check_docker(client, CONTRAINER_NAME, do_stop=True, )
29 |     check_process("api.py", do_stop=True)
30 |     check_process("sdfile_api.py", do_stop=True)
31 |     check_process("llm_api.py", do_stop=True)
32 |     check_process("webui.py", do_stop=True)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     stop_main()


--------------------------------------------------------------------------------
/examples/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from configs.model_config import ONLINE_LLM_MODEL
 4 | from configs.server_config import FSCHAT_MODEL_WORKERS
 5 | from configs.model_config import llm_model_dict, LLM_DEVICE
 6 | 
 7 | from loguru import logger
 8 | 
 9 | 
10 | 
11 | def get_model_worker_config(
12 |         model_name: str = None,
13 |         fastchat_mdoel_workers: dict = FSCHAT_MODEL_WORKERS,
14 |         online_llm_model: dict = ONLINE_LLM_MODEL,
15 |         llm_model_dict: dict = llm_model_dict,
16 |         llm_device: str = LLM_DEVICE
17 |     ) -> dict:
18 |     '''
19 |     加载model worker的配置项。
20 |     优先级:FSCHAT_MODEL_WORKERS[model_name] > ONLINE_LLM_MODEL[model_name] > FSCHAT_MODEL_WORKERS["default"]
21 |     '''
22 |     import model_workers
23 |     
24 |     config = fastchat_mdoel_workers.get("default", {}).copy()
25 |     config.update(online_llm_model.get(model_name, {}).copy())
26 |     config.update(fastchat_mdoel_workers.get(model_name, {}).copy())
27 | 
28 |     if model_name in online_llm_model:
29 |         config["online_api"] = True
30 |         if provider := config.get("provider"):
31 |             try:
32 |                 config["worker_class"] = getattr(model_workers, provider)
33 |             except Exception as e:
34 |                 msg = f"在线模型 ‘{model_name}’ 的provider没有正确配置"
35 |                 logger.error(f'{e.__class__.__name__}: {msg}')
36 |     # 本地模型
37 |     if model_name in llm_model_dict:
38 |         path = llm_model_dict[model_name]["local_model_path"]
39 |         config["model_path"] = path
40 |         if path and os.path.isdir(path):
41 |             config["model_path_exists"] = True
42 |         config["device"] = llm_device
43 | 
44 |     # logger.debug(f"config: {config}")
45 |     return config


--------------------------------------------------------------------------------
/examples/webui.py:
--------------------------------------------------------------------------------
 1 | # 运行方式：
 2 | # 1. 安装必要的包：pip install streamlit-option-menu streamlit-chatbox>=1.1.6
 3 | # 2. 运行本机fastchat服务：python server\llm_api.py 或者 运行对应的sh文件
 4 | # 3. 运行API服务器：python server/api.py。如果使用api = ApiRequest(no_remote_api=True)，该步可以跳过。
 5 | # 4. 运行WEB UI：streamlit run webui.py --server.port 7860
 6 | from loguru import logger
 7 | import os
 8 | import sys
 9 | import streamlit as st
10 | from streamlit_option_menu import option_menu
11 | 
12 | import multiprocessing
13 | 
14 | src_dir = os.path.join(
15 |     os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
16 | )
17 | sys.path.append(src_dir)
18 | 
19 | from webui import *
20 | from configs.model_config import VERSION, LLM_MODEL
21 | from configs.server_config import NO_REMOTE_API
22 | from configs.model_config import CB_ROOT_PATH
23 | 
24 | from configs.model_config import embedding_model_dict, kbs_config, EMBEDDING_MODEL, DEFAULT_VS_TYPE, WEB_CRAWL_PATH
25 | 
26 | 
27 | api = ApiRequest(base_url="http://127.0.0.1:7861", no_remote_api=NO_REMOTE_API, cb_root_path=CB_ROOT_PATH)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     st.set_page_config(
32 |         "CodeFuse-ChatBot WebUI",
33 |         os.path.join("../sources/imgs", "devops-chatbot.png"),
34 |         initial_sidebar_state="expanded",
35 |         menu_items={
36 |             'Get Help': 'https://github.com/codefuse-ai/codefuse-chatbot',
37 |             'Report a bug': "https://github.com/codefuse-ai/codefuse-chatbot/issues",
38 |             'About': f"""欢迎使用 CodeFuse-ChatBot WebUI {VERSION}！"""
39 |         }
40 |     )
41 | 
42 |     if not chat_box.chat_inited:
43 |         st.toast(
44 |             f"欢迎使用 [`CodeFuse-ChatBot`](https://github.com/codefuse-ai/codefuse-chatbot) ! \n\n"
45 |             f"当前使用模型`{LLM_MODEL}`, 您可以开始提问了."
46 |         )
47 | 
48 |     pages = {
49 |         "对话": {
50 |             "icon": "chat",
51 |             "func": dialogue_page,
52 |         },
53 |         "知识库管理": {
54 |             "icon": "hdd-stack",
55 |             "func": knowledge_page,
56 |         },
57 |         "代码知识库管理": {
58 |             "icon": "hdd-stack",
59 |             "func": code_page,
60 |         },
61 |         # "Prompt管理": {
62 |         #     "icon": "hdd-stack",
63 |         #     "func": prompt_page,
64 |         # },
65 |     }
66 | 
67 |     with st.sidebar:
68 |         st.image(
69 |             os.path.join(
70 |                 "../sources/imgs",
71 |                 "devops-chatbot.png"
72 |             ),
73 |             use_column_width=True
74 |         )
75 |         st.caption(
76 |             f"""<p align="right"> CodeFuse-ChatBot 当前版本：{VERSION}</p>""",
77 |             unsafe_allow_html=True,
78 |         )
79 |         options = list(pages)
80 |         icons = [x["icon"] for x in pages.values()]
81 | 
82 |         default_index = 0
83 |         selected_page = option_menu(
84 |             "",
85 |             options=options,
86 |             icons=icons,
87 |             # menu_icon="chat-quote",
88 |             default_index=default_index,
89 |         )
90 | 
91 |     if selected_page in pages:
92 |         pages[selected_page]["func"](api)
93 |     # pages["对话"]["func"](api, )
94 |     # pages["知识库管理"]["func"](api, embedding_model_dict, kbs_config, EMBEDDING_MODEL, DEFAULT_VS_TYPE, WEB_CRAWL_PATH)
95 |     # pages["代码知识库管理"]["func"](api, )
96 | 


--------------------------------------------------------------------------------
/examples/webui/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dialogue import dialogue_page, chat_box
 2 | from .document import knowledge_page
 3 | from .code import code_page
 4 | from .prompt import prompt_page
 5 | from .utils import ApiRequest
 6 | 
 7 | __all__ = [
 8 |     "dialogue_page", "chat_box", "prompt_page", "knowledge_page",
 9 |     "ApiRequest", "code_page"
10 | ]


--------------------------------------------------------------------------------
/examples/webui/prompt.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import os
 3 | import time
 4 | from datetime import datetime
 5 | import traceback
 6 | from typing import Literal, Dict, Tuple
 7 | from st_aggrid import AgGrid, JsCode
 8 | from st_aggrid.grid_options_builder import GridOptionsBuilder
 9 | import pandas as pd
10 | 
11 | from .utils import *
12 | from muagent.utils.path_utils import *
13 | from muagent.service.service_factory import get_kb_details, get_kb_doc_details
14 | from muagent.orm import table_init
15 | 
16 | 
17 | 
18 | def prompt_page(api: ApiRequest):
19 |     # 判断表是否存在并进行初始化
20 |     table_init()
21 | 
22 |     now = datetime.now()
23 |     with st.sidebar:
24 | 
25 |         cols = st.columns(2)
26 |         export_btn = cols[0]
27 |         if cols[1].button(
28 |                 "清空prompt",
29 |                 use_container_width=True,
30 |         ):
31 |             st.experimental_rerun()
32 | 
33 |     export_btn.download_button(
34 |         "导出记录",
35 |         "测试prompt",
36 |         file_name=f"{now:%Y-%m-%d %H.%M}_对话记录.md",
37 |         mime="text/markdown",
38 |         use_container_width=True,
39 |     )
40 | 


--------------------------------------------------------------------------------
/examples/webui/yamls/webui_en.yaml:
--------------------------------------------------------------------------------
 1 | # This is an example of webui
 2 | dialogue:
 3 |   mode_instruction: 请选择对话模式
 4 |   mode: 
 5 |     - LLM Conversation
 6 |     - Knowledge Base Q&A
 7 |     - Code Knowledge Base Q&A
 8 |     - Search Engine Q&A
 9 |     - Agents Q&A
10 |   history_length: History of Dialogue Turns
11 |   text_mode_swtich: Switched to mode
12 |   text_knowledgeBase_swtich: Current Knowledge Base"
13 |   text_loaded_kbase: Loaded Knowledge Base
14 |   text_loaded_cbase: Loaded Code Knowledge Base
15 |   # Knowledge Base Q&A
16 |   kbase_expander_name: 知识库配置
17 |   kbase_selectbox_name:  请选择知识库：
18 |   kbase_ninput_topk_name: 匹配知识条数：
19 |   kbase_ninput_score_threshold_name: 知识匹配分数阈值：
20 |   # Code Knowledge Base Q&A
21 |   cbase_expander_name: 代码知识库配置
22 |   cbase_selectbox_name:  请选择代码知识库：
23 |   cbase_ninput_topk_name: 匹配代码条数：
24 |   cbase_selectbox_type_name: 请选择查询模式：
25 |   cbase_search_type_v1:
26 |     - 基于 cypher
27 |     - 基于标签
28 |     - 基于描述
29 |   cbase_search_type_v2:
30 |     - 基于 cypher
31 |     - 基于标签
32 | 
33 |   # Search Engine Q&A
34 |   expander_search_name: 搜索引擎配置
35 |   selectbox_search_name:  请选择搜索引擎
36 |   ninput_search_topk_name: 匹配搜索结果条数：
37 |   # Agents Q&A
38 |   phase_expander_name: Phase管理
39 |   phase_selectbox_name:  请选择待使用的执行链路
40 |   phase_toggle_detailed_name: 是否使用明细信息进行agent交互
41 |   phase_toggle_doToolUsing: 开启工具使用
42 |   phase_multiselect_tools: 请选择待使用的工具
43 |   phase_toggle_doSearch: 开启搜索增强
44 |   phase_toggle_doDocRetrieval: 开启知识库检索增强
45 |   phase_toggle_doCodeRetrieval: 开启代码检索增强
46 | 
47 | sandbox:
48 |   expander_name: 沙盒文件管理
49 |   file_upload_name: 上传沙盒文件
50 |   selectbox_name: 选择要处理文件
51 |   button_upload_name: 点击上传
52 |   button_download_name: 点击下载
53 |   button_delete_name: 点击删除
54 |   toggle_doCodeInterpreter: 开启代码解释器
55 |   toggle_doAutoCodeExec: 自动执行代码
56 | 
57 |   expander_code_name: 代码编辑执行器
58 |   textArea_code_name: 代码片段
59 |   button_modify_code_name: 修改对话
60 |   text_modify_code: 修改对话成功
61 |   button_exec_code_name: 执行代码
62 |   text_execing_code: 正在执行代码
63 |   text_error_exec_code: code 不能为空
64 | 
65 | 
66 | chat:
67 |   chat_placeholder: 请输入对话内容，换行请使用Ctrl+Enter
68 |   chatbox_saying: 正在思考...
69 |   chatbox_doc_querying: 正在查询知识库
70 |   chatbox_code_querying: 正在查询代码知识库
71 |   chatbox_searching: 正在执行搜索
72 |   chatbox_search_result: 网络搜索结果
73 |   chatbox_doc_result: 知识库匹配结果
74 |   chatbox_code_result: 代码库匹配节点
75 | 
76 | export:
77 |   button_clear_conversation_name: 清空对话
78 |   download_button_export_name: 导出记录


--------------------------------------------------------------------------------
/examples/webui/yamls/webui_zh.yaml:
--------------------------------------------------------------------------------
 1 | # This is an example of webui
 2 | dialogue:
 3 |   mode_instruction: 请选择对话模式
 4 |   mode: 
 5 |     - LLM 对话
 6 |     - 知识库问答
 7 |     - 代码知识库问答
 8 |     - 搜索引擎问答
 9 |     - Agent问答
10 |   history_length: 历史对话轮数
11 |   text_mode_swtich: 已切换到模式
12 |   text_knowledgeBase_swtich: 当前知识库
13 |   text_loaded_kbase: 已加载知识库
14 |   text_loaded_cbase: 已加载代码知识库
15 |   # Knowledge Base Q&A
16 |   kbase_expander_name: 知识库配置
17 |   kbase_selectbox_name:  请选择知识库：
18 |   kbase_ninput_topk_name: 匹配知识条数：
19 |   kbase_ninput_score_threshold_name: 知识匹配分数阈值：
20 |   # Code Knowledge Base Q&A
21 |   cbase_expander_name: 代码知识库配置
22 |   cbase_selectbox_name:  请选择代码知识库：
23 |   cbase_ninput_topk_name: 匹配代码条数：
24 |   cbase_selectbox_type_name: 请选择查询模式：
25 |   cbase_search_type_v1:
26 |     - 基于 cypher
27 |     - 基于标签
28 |     - 基于描述
29 |   cbase_search_type_v2:
30 |     - 基于 cypher
31 |     - 基于标签
32 | 
33 |   # Search Engine Q&A
34 |   expander_search_name: 搜索引擎配置
35 |   selectbox_search_name:  请选择搜索引擎
36 |   ninput_search_topk_name: 匹配搜索结果条数：
37 |   # Agents Q&A
38 |   phase_expander_name: Phase管理
39 |   phase_selectbox_name:  请选择待使用的执行链路
40 |   phase_toggle_detailed_name: 是否使用明细信息进行agent交互
41 |   phase_toggle_doToolUsing: 开启工具使用
42 |   phase_multiselect_tools: 请选择待使用的工具
43 |   phase_toggle_doSearch: 开启搜索增强
44 |   phase_toggle_doDocRetrieval: 开启知识库检索增强
45 |   phase_toggle_doCodeRetrieval: 开启代码检索增强
46 | 
47 | sandbox:
48 |   expander_name: 沙盒文件管理
49 |   file_upload_name: 上传沙盒文件
50 |   selectbox_name: 选择要处理文件
51 |   button_upload_name: 点击上传
52 |   button_download_name: 点击下载
53 |   button_delete_name: 点击删除
54 |   toggle_doCodeInterpreter: 开启代码解释器
55 |   toggle_doAutoCodeExec: 自动执行代码
56 | 
57 |   expander_code_name: 代码编辑执行器
58 |   textArea_code_name: 代码片段
59 |   button_modify_code_name: 修改对话
60 |   text_modify_code: 修改对话成功
61 |   button_exec_code_name: 执行代码
62 |   text_execing_code: 正在执行代码
63 |   text_error_exec_code: code 不能为空
64 | 
65 | 
66 | chat:
67 |   chat_placeholder: 请输入对话内容，换行请使用Ctrl+Enter
68 |   chatbox_saying: 正在思考...
69 |   chatbox_doc_querying: 正在查询知识库
70 |   chatbox_code_querying: 正在查询代码知识库
71 |   chatbox_searching: 正在执行搜索
72 |   chatbox_search_result: 网络搜索结果
73 |   chatbox_doc_result: 知识库匹配结果
74 |   chatbox_code_result: 代码库匹配节点
75 | 
76 | export:
77 |   button_clear_conversation_name: 清空对话
78 |   download_button_export_name: 导出记录


--------------------------------------------------------------------------------
/nltk_data/corpora/cmudict/README:
--------------------------------------------------------------------------------
 1 | The Carnegie Mellon Pronouncing Dictionary [cmudict.0.7a]
 2 | 
 3 | ftp://ftp.cs.cmu.edu/project/speech/dict/
 4 | https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a
 5 | 
 6 | Copyright (C) 1993-2008 Carnegie Mellon University. All rights reserved.
 7 | 
 8 | File Format: Each line consists of an uppercased word,
 9 | a counter (for alternative pronunciations), and a transcription.
10 | Vowels are marked for stress (1=primary, 2=secondary, 0=no stress).
11 | E.g.: NATURAL 1 N AE1 CH ER0 AH0 L
12 | 
13 | The dictionary contains 127069 entries.  Of these, 119400 words are assigned
14 | a unique pronunciation, 6830 words have two pronunciations, and 839 words have
15 | three or more pronunciations.  Many of these are fast-speech variants.
16 | 
17 | Phonemes: There are 39 phonemes, as shown below:
18 |     
19 |     Phoneme Example Translation    Phoneme Example Translation
20 |     ------- ------- -----------    ------- ------- -----------
21 |     AA      odd     AA D           AE      at      AE T
22 |     AH      hut     HH AH T        AO      ought   AO T
23 |     AW      cow     K AW           AY      hide    HH AY D
24 |     B       be      B IY           CH      cheese  CH IY Z
25 |     D       dee     D IY           DH      thee    DH IY
26 |     EH      Ed      EH D           ER      hurt    HH ER T
27 |     EY      ate     EY T           F       fee     F IY
28 |     G       green   G R IY N       HH      he      HH IY
29 |     IH      it      IH T           IY      eat     IY T
30 |     JH      gee     JH IY          K       key     K IY
31 |     L       lee     L IY           M       me      M IY
32 |     N       knee    N IY           NG      ping    P IH NG
33 |     OW      oat     OW T           OY      toy     T OY
34 |     P       pee     P IY           R       read    R IY D
35 |     S       sea     S IY           SH      she     SH IY
36 |     T       tea     T IY           TH      theta   TH EY T AH
37 |     UH      hood    HH UH D        UW      two     T UW
38 |     V       vee     V IY           W       we      W IY
39 |     Y       yield   Y IY L D       Z       zee     Z IY
40 |     ZH      seizure S IY ZH ER
41 | 
42 | (For NLTK, entries have been sorted so that, e.g. FIRE 1 and FIRE 2
43 | are contiguous, and not separated by FIRE'S 1.)
44 | 
45 | Redistribution and use in source and binary forms, with or without
46 | modification, are permitted provided that the following conditions
47 | are met:
48 | 
49 | 1. Redistributions of source code must retain the above copyright
50 |    notice, this list of conditions and the following disclaimer.
51 |    The contents of this file are deemed to be source code.
52 | 
53 | 2. Redistributions in binary form must reproduce the above copyright
54 |    notice, this list of conditions and the following disclaimer in
55 |    the documentation and/or other materials provided with the
56 |    distribution.
57 | 
58 | This work was supported in part by funding from the Defense Advanced
59 | Research Projects Agency, the Office of Naval Research and the National
60 | Science Foundation of the United States of America, and by member
61 | companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge
62 | the contributions of many volunteers to the expansion and improvement of
63 | this dictionary.
64 | 
65 | THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
66 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
67 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
68 | PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
69 | NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
70 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
71 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
72 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
73 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
75 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 | 
77 | 


--------------------------------------------------------------------------------
/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/czech.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/czech.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/danish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/danish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/dutch.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/dutch.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/english.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/english.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/estonian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/estonian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/finnish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/finnish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/french.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/french.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/german.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/german.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/greek.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/greek.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/italian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/italian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/malayalam.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/malayalam.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/norwegian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/norwegian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/polish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/polish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/portuguese.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/portuguese.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/russian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/russian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/slovene.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/slovene.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/spanish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/spanish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/swedish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/swedish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/PY3/turkish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/PY3/turkish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/czech.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/czech.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/danish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/danish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/dutch.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/dutch.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/estonian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/estonian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/finnish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/finnish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/french.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/french.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/german.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/german.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/italian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/italian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/malayalam.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/malayalam.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/norwegian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/norwegian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/polish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/polish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/portuguese.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/portuguese.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/russian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/russian.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/slovene.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/slovene.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/spanish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/spanish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/swedish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/swedish.pickle


--------------------------------------------------------------------------------
/nltk_data/tokenizers/punkt/turkish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/nltk_data/tokenizers/punkt/turkish.pickle


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch<=2.0.1
 2 | fschat==0.2.33
 3 | nltk~=3.8.1
 4 | uvicorn~=0.23.1
 5 | starlette~=0.27.0
 6 | # pydantic<=1.10.14
 7 | unstructured[all-docs]
 8 | pypdf
 9 | duckduckgo-search
10 | pysocks
11 | accelerate
12 | websockets
13 | fake_useragent
14 | selenium
15 | jsonref
16 | 
17 | # uncomment libs if you want to use corresponding vector store
18 | # pymilvus==2.1.3 # requires milvus==2.1.3
19 | # psycopg2
20 | # pgvector
21 | 
22 | streamlit
23 | streamlit_option_menu
24 | streamlit-chatbox
25 | streamlit-aggrid
26 | # streamlit-antd-components>=0.1.11
27 | httpx
28 | tenacity<8.4.0
29 | 
30 | codefuse-muagent
31 | # qwen model
32 | # protobuf==3.20.*
33 | transformers_stream_generator
34 | einops
35 | optimum
36 | # auto-gptq
37 | # modelscope
38 | 
39 | # vllm model
40 | # vllm; sys_platform == "linux"
41 | 
42 | # chatglm
43 | sentencepiece


--------------------------------------------------------------------------------
/sources/docs/python_langchain_com_docs_get_started_introduction_text.jsonl:
--------------------------------------------------------------------------------
1 | {"url": "https://python.langchain.com/docs/get_started/introduction", "host_url": "https://python.langchain.com", "title": "Introduction | 🦜️🔗 Langchain", "all_text": "\n\nIntroduction | 🦜️🔗 Langchain\n\nSkip to main content🦜️🔗 LangChainDocsUse casesIntegrationsAPICommunityChat our docsLangSmithJS/TS DocsSearchCTRLKGet startedIntroductionInstallationQuickstartLangChain Expression LanguageInterfaceHow toCookbookLangChain Expression Language (LCEL)ModulesModel I/​ORetrievalChainsMemoryAgentsCallbacksModulesGuidesMoreGet startedIntroductionOn this pageIntroductionLangChain is a framework for developing applications powered by language models. It enables applications that:Are context-aware: connect a language model to sources of context (prompt instructions, few shot examples, content to ground its response in, etc.)Reason: rely on a language model to reason (about how to answer based on provided context, what actions to take, etc.)The main value props of LangChain are:Components: abstractions for working with language models, along with a collection of implementations for each abstraction. Components are modular and easy-to-use, whether you are using the rest of the LangChain framework or notOff-the-shelf chains: a structured assembly of components for accomplishing specific higher-level tasksOff-the-shelf chains make it easy to get started. For complex applications, components make it easy to customize existing chains and build new ones.Get started​Here’s how to install LangChain, set up your environment, and start building.We recommend following our Quickstart guide to familiarize yourself with the framework by building your first LangChain application.Note: These docs are for the LangChain Python package. For documentation on LangChain.js, the JS/TS version, head here.Modules​LangChain provides standard, extendable interfaces and external integrations for the following modules, listed from least to most complex:Model I/O​Interface with language modelsRetrieval​Interface with application-specific dataChains​Construct sequences of callsAgents​Let chains choose which tools to use given high-level directivesMemory​Persist application state between runs of a chainCallbacks​Log and stream intermediate steps of any chainExamples, ecosystem, and resources​Use cases​Walkthroughs and best-practices for common end-to-end use cases, like:Document question answeringChatbotsAnalyzing structured dataand much more...Guides​Learn best practices for developing with LangChain.Ecosystem​LangChain is part of a rich ecosystem of tools that integrate with our framework and build on top of it. Check out our growing list of integrations and dependent repos.Additional resources​Our community is full of prolific developers, creative builders, and fantastic teachers. Check out YouTube tutorials for great tutorials from folks in the community, and Gallery for a list of awesome LangChain projects, compiled by the folks at KyroLabs.Community​Head to the Community navigator to find places to ask questions, share feedback, meet other developers, and dream about the future of LLM’s.API reference​Head to the reference section for full documentation of all classes and methods in the LangChain Python package.PreviousGet startedNextInstallationGet startedModulesExamples, ecosystem, and resourcesUse casesGuidesEcosystemAdditional resourcesCommunityAPI referenceCommunityDiscordTwitterGitHubPythonJS/TSMoreHomepageBlogCopyright © 2023 LangChain, Inc.\n\n"}
2 | 


--------------------------------------------------------------------------------
/sources/docs_imgs/BaseAgent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/BaseAgent.png


--------------------------------------------------------------------------------
/sources/docs_imgs/agent-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/agent-flow.png


--------------------------------------------------------------------------------
/sources/docs_imgs/devops-chatbot-module-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devops-chatbot-module-v2.png


--------------------------------------------------------------------------------
/sources/docs_imgs/devops-chatbot-module.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devops-chatbot-module.png


--------------------------------------------------------------------------------
/sources/docs_imgs/devopsgpt_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devopsgpt_example.png


--------------------------------------------------------------------------------
/sources/docs_imgs/devopsgpt_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/devopsgpt_example2.png


--------------------------------------------------------------------------------
/sources/docs_imgs/luban.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/luban.png


--------------------------------------------------------------------------------
/sources/docs_imgs/objective.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/objective.png


--------------------------------------------------------------------------------
/sources/docs_imgs/objective_v4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/objective_v4.png


--------------------------------------------------------------------------------
/sources/docs_imgs/roadmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/roadmap.png


--------------------------------------------------------------------------------
/sources/docs_imgs/roadmap2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/roadmap2.png


--------------------------------------------------------------------------------
/sources/docs_imgs/webui_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/webui_config.png


--------------------------------------------------------------------------------
/sources/docs_imgs/wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/docs_imgs/wechat.png


--------------------------------------------------------------------------------
/sources/imgs/devops-chatbot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/devops-chatbot.png


--------------------------------------------------------------------------------
/sources/imgs/devops-chatbot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/devops-chatbot2.png


--------------------------------------------------------------------------------
/sources/imgs/docker_logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/docker_logs.png


--------------------------------------------------------------------------------
/sources/imgs/fastapi_docs_020_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codefuse-ai/codefuse-chatbot/d6932ecfc855035fdcb25140b80e339e6137652c/sources/imgs/fastapi_docs_020_0.png


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/agent-flow-en.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Attention
 3 | Attention：The overall content is not yet complete, and further refinements to the flow and other Agent diagrams will be made in the future.
 4 | 
 5 | ## Introduction to Core Connectors
 6 | To facilitate everyone's understanding of the entire CoAgent link, we use a Flow format to detail how to build through configuration settings.
 7 | 
 8 | <div align=center>
 9 |   <img src="/sources//docs_imgs/agent-flow.png" alt="图片">
10 | </div>
11 | 
12 | 
13 | <br>Below, we will first introduce the related core components<br>
14 | 
15 | ### Agent
16 | At the design level of the Agent, we provide four basic types of Agents, which allows for the basic role settings of these Agents to meet the interaction and usage of a variety of common scenarios.
17 | 1. BaseAgent: Provides basic question and answer, tool usage, and code execution functions. It implements Input => Output according to the Prompt format.
18 | 
19 | <div align=center>
20 |   <img src="/sources//docs_imgs/BaseAgent.png" alt="图片" style="width: 500px;  height:auto;">
21 | </div>
22 | 
23 | 2. ExecutorAgent: Executes tasks in sequence from a task list based on the plan arranged by the User or the previous Agent, completing the related tasks.
24 | 3. ReactAgent: Provides standard React functionality, based on the issue to perform the current task.
25 | 4. electorAgent: Provides the functionality of choosing an Agent. 
26 | 
27 | It selects the appropriate Agent to respond based on the question from the User or the previous Agent. After output, the message is pushed into the memory pool, which is subsequently managed by the Memory Manager.
28 | 
29 | ### Chain
30 | Basic Chain: BaseChain, which connects the interaction of agents, completing the management of related messages and memory.
31 | 
32 | ### Phase
33 | Basic Phase: BasePhase, which connects the interaction of chains, completing the management of related messages and memory.
34 | 
35 | ### Prompt Manager
36 | Creation of prompts for each agent in a Multi-Agent link:
37 | 
38 | - By simply setting prompt_input_keys and prompt_output_keys, one can reuse the preset Prompt Context creation logic, thus achieving rapid configuration of the agent prompt.
39 | - The prompt manager module can also be redesigned with new key-context designs to implement a personalized Agent Prompt.
40 | 
41 | ### Memory Manager
42 | Mainly used for the management of chat history, which is not yet completed:
43 | 
44 | - Manages the reading and writing of chat history in the database, including user input, llm output, doc retrieval, code retrieval, search retrieval.
45 | - Summarizes key information from the chat history to form a summary context, which serves as prompt context.
46 | - Provides a search function to retrieve information related to the question from the chat history or the summary context, aiding in question and answer sessions.
47 | 


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/agent-flow.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ## 注意
 4 | 注意：整体内容未完善，后续还会完善flow和其它Agent的图例
 5 | 
 6 | ## 核心Connector介绍
 7 | 为了便于大家理解整个 CoAgent 的链路，我们采取 Flow 的形式来详细介绍如何通过配置构建
 8 | 
 9 | <div align=center>
10 |   <img src="/sources/docs_imgs/agent-flow.png" alt="图片">
11 | </div>
12 | 
13 | 
14 | <br>下面，我们先介绍相关的核心组件<br>
15 | 
16 | ### Agent
17 | 在Agent设计层面，我们提供了四种基本的Agent类型，对这些Agent进行Role的基础设定，可满足多种通用场景的交互和使用
18 | 1. BaseAgent：提供基础问答、工具使用、代码执行的功能，根据Prompt格式实现 输入 => 输出
19 | 
20 | <div align=center>
21 |   <img src="/sources/docs_imgs/BaseAgent.png" alt="图片" style="width: 500px;  height:auto;">
22 | </div>
23 | 
24 | 2. ExecutorAgent：对任务清单进行顺序执行，根据 User 或 上一个Agent编排的计划，完成相关任务
25 | 3. ReactAgent：提供标准React的功能，根据问题实现当前任务
26 | 4. SelectorAgent：提供选择Agent的功能，根据User 或 上一个 Agent的问题选择合适的Agent来进行回答.
27 | 
28 | 输出后将 message push 到 memory pool 之中，后续通过Memory Manager进行管理
29 | 
30 | ### Chain
31 | 基础链路：BaseChain，串联agent的交互，完成相关message和memory的管理
32 | 
33 | ### Phase
34 | 基础场景：BasePhase，串联chain的交互，完成相关message和memory的管理
35 | 
36 | ### Prompt Manager
37 | Mutli-Agent链路中每一个agent的prompt创建
38 | - 通过对promtp_input_keys和promtp_output_keys对的简单设定，可以沿用预设 Prompt Context 创建逻辑，从而实现agent prompt快速配置
39 | - 也可以对prompt manager模块进行新的 key-context 设计，实现个性化的 Agent Prompt
40 | 
41 | ### Memory Manager
42 | 主要用于 chat history 的管理，暂未完成
43 | - 将chat history在数据库进行读写管理，包括user input、 llm output、doc retrieval、code retrieval、search retrieval
44 | - 对 chat history 进行关键信息总结 summary context，作为 prompt context
45 | - 提供检索功能，检索 chat history 或者 summary context 中与问题相关信息，辅助问答
46 | 


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/coagent-en.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 简介
 3 | To enhance the performance of large language models (LLMs) in terms of inference accuracy, the industry has seen various innovative approaches to utilizing LLMs. From the earliest Chain of Thought (CoT), Text of Thought (ToT), to Graph of Thought (GoT), these methods have continually expanded the capability boundaries of LLMs. In dealing with complex problems, we can use the ReAct process to select, invoke, and execute tool feedback, achieving multi-round tool usage and multi-step execution.
 4 | 
 5 | However, for more complex scenarios, such as the development of intricate code, single-function LLM Agents are clearly insufficient. Thus, the community has begun to develop combinations of multiple Agents, such as projects focused on metaGPT, GPT-Engineer, chatDev in the development domain, and AutoGen projects focused on automating the construction of Agents and Agent dialogue.
 6 | 
 7 | After in-depth analysis of these frameworks, it has been found that most Agent frameworks are highly coupled, with poor usability and extensibility. They achieve specific scenarios in preset environments, but expanding these scenarios is fraught with difficulty.
 8 | 
 9 | Therefore, we aim to build an extensible, user-friendly Multi-Agent framework to support ChatBots in retrieving knowledge base information while assisting with various common tasks such as daily office work, data analysis, and development operations.
10 | 
11 | This Multi-Agent framework project incorporates excellent design elements from multiple frameworks, such as the message pool from metaGPT and the agent selector from autogen.
12 | 
13 | <div align=center>
14 |   <img src="/sources/docs_imgs/luban.png" alt="图片">
15 | </div>
16 | 
17 | The following modules will introduce the necessary components of the Multi Agent framework from five aspects:
18 | 
19 | - **Agent Communication:** In the Multi-Agent framework, ensuring effective information exchange among Agents is crucial for managing context and improving Q&A efficiency. 
20 |   - Follow a straightforward and intuitive chain-based dialogue principle, arranging Agents in a linear fashion to form an execution chain. 
21 |   - Drawing from the Message Pool framework in metaGPT, Agents are allowed to push and subscribe to the Message Pool, making the chain more flexible. This is beneficial for fine-tuning the scenario of Prompt engineering but challenging to manage complex chain relationship analysis.
22 | 
23 | - **Standard Operation Process (SOP)**: Standardizing the parsing and handling of LLM's generated results. 
24 |   - Define the input and output scope of an Agent, assembling and parsing relevant Actions and Statuses to ensure the stability of the framework. 
25 |   - Encapsulate a variety of fundamental Action execution modules, such as Tool Using, Planning, Coding, Direct Answering, final answer, etc., to meet the basic work requirements of an Agent.
26 | 
27 | - **Plan and Executor**: Enhance LLM's tool usage, Agent scheduling, and code generation. Several basic chains have been set up, for example: 
28 |   - a. Single-round Q&A, which can also be expanded to forms like CoT, ToT, GoT, etc. 
29 |   - b. ReAct, a basic response decision-making process where the model sets SOP status to terminate the loop. 
30 |   - c. Task Planning - Executor, where the task is completed and can end.
31 | - **Long-short term memory Management**: The key difference between Multi-Agent and single Agent is that Multi-Agent needs to handle a large amount of communication information, similar to the process of human teamwork collaboration. Add an Agent specifically responsible for content summarization (similar to a meeting assistant) to summarize long-term memories and provide more effective information to the next Agent, rather than passing all content to the next one.
32 | - **Human-agent interaction**: In the face of complex scenarios, human intervention is required in the Agent interaction process to provide feedback. Through the aforementioned Long-short term memory Management and Agent Communication processes, enable the LLM to accurately understand human intentions, thereby completing tasks more effectively.
33 | 
34 | In summary, these five elements together construct a Multi-Agent framework, ensuring closer and more efficient cooperation between Agents while also adapting to more complex task requirements and a variety of interaction scenarios. By combining multiple Agent chains to implement a complete and complex project launch scenario (Dev Phase), such as Demand Chain (CEO), Product Argument Chain (CPO, CFO, CTO), Engineer Group Chain (Selector, Developer1~N), QA Engineer Chain (Developer, Tester), Deploy Chain (Developer, Deployer).
35 | 
36 | ## 模块分类
37 | - [connector](/sources/readme_docs/coagent/connector/connector_agent.md)
38 | - document_loaders
39 | - embeddings
40 | - llm_models
41 | - orm
42 | - sandbox
43 | - service
44 | - text_splitter
45 | - tools
46 | - utils
47 | 
48 | 


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/coagent.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 📜 目录
 3 | - [简介](#简介)
 4 | - [模块分类](#模块分类)
 5 | 
 6 | 
 7 | ## 简介
 8 | 
 9 | 为了提高大型模型在推理准确性方面的表现，业界出现了多种创新的大型语言模型(LLM)玩法。从最早的CoT、ToT到GoT，这些方法不断拓展了LLM的能力边界。在处理复杂问题时，我们可以通过ReAct过程来选择、调用和执行工具反馈，同时实现多轮工具使用和多步骤执行。
10 | 
11 | 但对于更复杂的场景，例如复杂代码的开发，单一功能的LLM Agent显然难以胜任。因此，社区开始发展出多Agent的组合玩法，比如专注于metaGPT、GPT-Engineer、chatDev等开发领域的项目，以及专注于自动化构建Agent和Agent对话的AutoGen项目。
12 | 
13 | 经过对这些框架的深入分析，发现大多数的Agent框架整体耦合度较高，其易用性和可扩展性较差。在预设场景中实现特定场景，但想要进行场景扩展却困难重重。
14 | 
15 | 因此，我们希望构建一个可扩展、易于使用的Multi-Agent框架，以支持ChatBot在获取知识库信息的同时，能够辅助完成日常办公、数据分析、开发运维等各种通用任务。
16 | 
17 | 本项目的Mutli-Agent框架汲取兼容了多个框架的优秀设计，比如metaGPT中的消息池（message pool）、autogen中的代理选择器（agent selector）等。
18 | 
19 | <div align=center>
20 |   <img src="/sources/docs_imgs/luban.png" alt="图片">
21 | </div>
22 | 
23 | 以下模块将从5个方面介绍Multi Agent框架所需要素：
24 | - Agent Communication在Multi Agent框架中，确保Agent可以有效地进行信息交流对于管理上下文以及提高问答效率至关重要。
25 |   a. 遵循简洁直观易于理解的链式对话原则，将Agent以线性方式排列串连成一个执行链路。
26 |   b. 借鉴metaGPT中的Message Pool框架，允许Agent对Message Pool进行推送和订阅，使链路更加灵活。有利于精细化Prompt工程的场景，但难以把握复杂链路的关系分析。
27 | - Standard Operation Process（SOP）：对LLM的生成结果进行标准化解析和处理。
28 |   a. 定义Agent的 Input 和 Output 范围，能够组装和解析相关Action和Status，保证框架运行的稳定性
29 |   b. 封装多种基础Action执行模块，如Tool Using、Planning、Coding、Direct Answering、final answer等SOP标识，以满足Agent的基本工作需求。 
30 | - Plan and Executor：增加LLM的Tool使用、Agent调度、代码的生成。设置了几种基本链路，例如：
31 |   a. 单轮问答，也可以扩展到CoT、ToT、GoT等形式。
32 |   b. ReAct，基础的响应决策过程，模型设置SOP 状态以终止循环
33 |   c. TaskPlaning - Executor，任务完成即可结束
34 | - Long-short term memory Management：Multi-Agent与单Agent的关键区别在于，Multi-Agent需要处理大量的交流信息，类似人类团队协作的过程。增加一个专门负责内容总结（类似于会议助理）的Agent，对长期记忆进行总结并提更有效信息传递给下一位Agent，而非传递所有内容给下一位Agent。
35 | - Human-agent interaction：面对复杂场景时，需要人类介入Agent交互过程并提供反馈。通过上述 Long-short term memory Management 和 Agent Communication 过程，使LLM能准确理解人类的意图，从而更有效地完成任务。
36 | 
37 | 总的来说，这五个要素共同构建了一个Multi Agent框架，确保Agent之间的协作更加紧密和高效，同时也能够适应更复杂的任务需求和更多样的交互场景。通过组合多个Agent链路来实现一个完整且复杂的项目上线场景（Dev Phase），如Demand Chain（CEO）、Product Arguement Chain（CPO、CFO、CTO）、Engineer Group Chain（Selector、Developer1~N）、QA Engineer Chain（Developer、Tester）、Deploy Chain（Developer、Deploer）。
38 | 
39 | 
40 | ## 模块分类
41 | - [connector](/sources/readme_docs/coagent/connector/connector_agent.md)
42 | - document_loaders
43 | - embeddings
44 | - llm_models
45 | - orm
46 | - sandbox
47 | - service
48 | - text_splitter
49 | - tools
50 | - utils
51 | 


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/connector/connector_agent.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Connector Agent
  3 | slug: Connector Agent ZH
  4 | url: "coagent/connector-agent-zh"
  5 | aliases:
  6 | - "/coagent/connector-agent-zh"
  7 | ---
  8 | 
  9 | 
 10 | ## 快速构建一个Agent
 11 | - 首先增加openai配置，也可以是其它类似于openai接口的模型（通过fastchat启动）
 12 | ```
 13 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH
 14 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
 15 | from coagent.connector.configs import AGETN_CONFIGS
 16 | from coagent.connector.agents import BaseAgent
 17 | from coagent.connector.schema import Message, load_role_configs
 18 | 
 19 | 
 20 | os.environ["API_BASE_URL"] = OPENAI_API_BASE
 21 | os.environ["OPENAI_API_KEY"] = "sk-xx"
 22 | openai.api_key = "sk-xxx"
 23 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659"
 24 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659"
 25 | ```
 26 | 
 27 | 
 28 | - 配置相关 LLM 和 Embedding Model
 29 | ```
 30 | # LLM 和 Embedding Model 配置
 31 | llm_config = LLMConfig(
 32 |     model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 
 33 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
 34 |     )
 35 | embed_config = EmbedConfig(
 36 |     embed_engine="model", embed_model="text2vec-base-chinese", 
 37 |     embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese"
 38 |     )
 39 | ```
 40 | 
 41 | - 这里从已有的agent配置选一个role来做示例
 42 | ```
 43 | # 从已有的配置中选择一个config，具体参数细节见下面
 44 | role_configs = load_role_configs(AGETN_CONFIGS)
 45 | agent_config = role_configs["general_planner"]
 46 | # 生成agent实例
 47 | base_agent = BaseAgent(
 48 |     role=agent_config.role, 
 49 |     prompt_config = agent_config.prompt_config,
 50 |     prompt_manager_type=agent_config.prompt_manager_type,
 51 |     chat_turn=agent_config.chat_turn,
 52 |     focus_agents=[],
 53 |     focus_message_keys=[],
 54 |     llm_config=llm_config,
 55 |     embed_config=embed_config,
 56 |     jupyter_work_path=JUPYTER_WORK_PATH,
 57 |     kb_root_path=KB_ROOT_PATH,
 58 |     ) 
 59 | # round-1
 60 | query_content = "确认本地是否存在employee_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
 61 | query = Message(
 62 |     role_name="human", role_type="user",
 63 |     role_content=query_content, input_query=query_content, origin_query=query_content,
 64 |     )
 65 | 
 66 | output_message = base_agent.step(query)
 67 | print(output_message.to_str_content(content_key="parsed_output_list"))
 68 | ```
 69 | 
 70 | ## Agent 参数配置
 71 | ```
 72 | # 配置结构在这个目录
 73 | from coagent.connector.schema import Role, PromptField
 74 | ```
 75 | 
 76 | 
 77 | ### Agent Config
 78 | |Config Key Name|	Type|	Description|
 79 | | ------------------ | ---------- | ---------- |
 80 | |role|	Role	|角色描述|
 81 | |prompt_config	|List[PromptField]	|Enum：PromptManager 也可以继承以上几种Agent然后去构造相关的Agent|
 82 | |prompt_manager_type	|String	|Enum：PromptManager 也可以继承以上几种Agent然后去构造自定义的Enum：PromptManager|
 83 | |focus_agents	|List[String]	|metagpt的逻辑，关注哪些agent生成的message，可选值范围为：role_name
 84 | |focus_message_keys	|List[String]|	额外增加的逻辑，关注message里面具体的 key 信息可选值范围为：agent 的 output_keys|
 85 | |chat_turn	|int	|只针对ReactAgent有效|
 86 | |llm_config	|LLMConfig	|大语言模型配置|
 87 | |embed_config	|EmbedConfig	|向量模型配置|
 88 | |sandbox_server	|Dict	|沙盒环境即notebook启动配置|
 89 | |jupyter_work_path	|str	|沙盒环境的工作目录|
 90 | |kb_root_path	|str	|memory的存储路径|
 91 | |log_verbose	|str	|agent prompt&predict的日志打印级别|
 92 | 
 93 | ### Role
 94 | 
 95 | | Config Key Name  | Type | Description        |
 96 | |------------------|------|--------------------|
 97 | | role_type        | str  | 角色类型, Enum: system、user、assistant、function、observation、summary           |
 98 | | role_name        | str  | 角色名称           |
 99 | | role_desc        | str  | 角色描述           |
100 | | agent_type       | str  | 代理类型           |
101 | | role_prompt      | str  | 角色提示           |
102 | | template_prompt  | str  | 模板提示           |
103 | 
104 | 
105 | ### PromptField
106 | 
107 | | Config Key Name | Type | Description |
108 | |-----------------|------|-------------|
109 | | field_name      | str  |             |
110 | | function_name   | str  |             |
111 | | title           | str  |             |
112 | | description     | str  |             |
113 | | is_context      | bool |             |
114 | | omit_if_empty   | bool |             |


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/connector/connector_chain.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Connector Chain
  3 | slug: Connector Chain ZH
  4 | url: "coagent/connector-chain-zh"
  5 | aliases:
  6 | - "/coagent/connector-chain-zh"
  7 | ---
  8 | 
  9 | ## 快速构建一个 agent chain
 10 | - 首先增加openai配置，也可以是其它类似于openai接口的模型（通过fastchat启动）
 11 | ```
 12 | # 设置openai的api-key
 13 | import os, sys
 14 | import openai
 15 | import importlib
 16 | 
 17 | os.environ["API_BASE_URL"] = OPENAI_API_BASE
 18 | os.environ["OPENAI_API_KEY"] = "sk-xxxx"
 19 | openai.api_key = "sk-xxxx"
 20 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659"
 21 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659"
 22 | ```
 23 | 
 24 | - 配置相关 LLM 和 Embedding Model
 25 | ```
 26 | # LLM 和 Embedding Model 配置
 27 | llm_config = LLMConfig(
 28 |     model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 
 29 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
 30 |     )
 31 | embed_config = EmbedConfig(
 32 |     embed_engine="model", embed_model="text2vec-base-chinese", 
 33 |     embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese"
 34 |     )
 35 | ```
 36 | 
 37 | 
 38 | - 这里从已有的agent配置选多个role组合成 agent chain
 39 | ```
 40 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH
 41 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
 42 | from coagent.connector.configs import AGETN_CONFIGS
 43 | from coagent.connector.chains import BaseChain
 44 | from coagent.connector.schema import Message, load_role_configs
 45 | 
 46 | # 构建 agent chain 链路
 47 | role_configs = load_role_configs(AGETN_CONFIGS)
 48 | agent_config = role_configs["general_planner"]
 49 | role1 = role_configs["general_planner"]
 50 | role2 = role_configs["executor"]
 51 | agent_module = importlib.import_module("examples.connector.agents")
 52 | agents = [
 53 |     getattr(agent_module, role1.role.agent_type)(
 54 |             role=role1.role, 
 55 |             prompt_config = role1.prompt_config,
 56 |             prompt_manager_type=role1.prompt_manager_type,
 57 |             chat_turn=role1.chat_turn,
 58 |             focus_agents=role1.focus_agents,
 59 |             focus_message_keys=role1.focus_message_keys,
 60 |             llm_config=llm_config,
 61 |             embed_config=embed_config,
 62 |             jupyter_work_path=JUPYTER_WORK_PATH,
 63 |             kb_root_path=KB_ROOT_PATH,
 64 |         ),
 65 |     getattr(agent_module, role2.role.agent_type)(
 66 |             role=role2.role, 
 67 |             prompt_config = role2.prompt_config,
 68 |             prompt_manager_type=role2.prompt_manager_type,
 69 |             chat_turn=role2.chat_turn,
 70 |             focus_agents=role2.focus_agents,
 71 |             focus_message_keys=role2.focus_message_keys,
 72 |             llm_config=llm_config,
 73 |             embed_config=embed_config,
 74 |             jupyter_work_path=JUPYTER_WORK_PATH,
 75 |             kb_root_path=KB_ROOT_PATH,
 76 |         ),
 77 |     ]
 78 | 
 79 | chain = BaseChain(
 80 |     agents, 
 81 |     chat_turn=1, 
 82 |     jupyter_work_path=JUPYTER_WORK_PATH,
 83 |     kb_root_path=KB_ROOT_PATH,
 84 |     llm_config=llm_config,
 85 |     embed_config=embed_config,
 86 |     )
 87 | ```
 88 | 
 89 | 
 90 | - 开始执行
 91 | ```
 92 | # round-1
 93 | query_content = "确认本地是否存在employee_data.csv，并查看它有哪些列和数据类型;然后画柱状图"
 94 | query = Message(
 95 |     role_name="human", role_type="user",
 96 |     role_content=query_content, input_query=query_content, origin_query=query_content,
 97 |     )
 98 | 
 99 | output_message, output_memory = chain.step(query)
100 | print(output_memory.to_str_messages(content_key="parsed_output_list"))
101 | 
102 | ```
103 | 
104 | 
105 | ## Chain 参数配置
106 | |Config Key Name|	Type	|Description|
107 | | ------------------ | ---------- | ---------- |
108 | |agents| List[BaseAgent] | 
109 | |llm_config	|LLMConfig	|大语言模型配置|
110 | |embed_config	|EmbedConfig	|向量模型配置|
111 | |sandbox_server	|Dict	|沙盒环境即notebook启动配置|
112 | |jupyter_work_path	|str	|沙盒环境的工作目录|
113 | |kb_root_path	|str	|memory的存储路径|
114 | |log_verbose	|str	|agent prompt&predict的日志打印级别|
115 | 


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/connector/connector_memory.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Connector Memory
 3 | slug: Connector Memory ZH
 4 | url: "coagent/connector-memory-zh"
 5 | aliases:
 6 | - "/coagent/connector-memory-zh"
 7 | ---
 8 | 
 9 | 
10 | ## Memory Manager
11 | 主要用于 chat history 的管理，暂未完成
12 | - 将chat history在数据库进行读写管理，包括user input、 llm output、doc retrieval、code retrieval、search retrieval
13 | - 对 chat history 进行关键信息总结 summary context，作为 prompt context
14 | - 提供检索功能，检索 chat history 或者 summary context 中与问题相关信息，辅助问答
15 | 
16 | 
17 | 
18 | ## 使用示例
19 | 
20 | ### 创建 memory manager 实例
21 | ```
22 | import os
23 | import openai
24 | 
25 | from coagent.base_configs.env_config import KB_ROOT_PATH
26 | from coagent.connector.memory_manager import BaseMemoryManager, LocalMemoryManager
27 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
28 | from coagent.connector.schema import Message
29 | 
30 | os.environ["API_BASE_URL"] = OPENAI_API_BASE
31 | os.environ["OPENAI_API_KEY"] = "sk-xx"
32 | openai.api_key = "sk-xxx"
33 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659"
34 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659"
35 | 
36 | # LLM 和 Embedding Model 配置
37 | llm_config = LLMConfig(
38 |     model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 
39 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
40 |     )
41 | embed_config = EmbedConfig(
42 |     embed_engine="model", embed_model="text2vec-base-chinese", 
43 |     embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese"
44 |     )
45 | 
46 | # 
47 | phase_name = "test"
48 | memory_manager = LocalMemoryManager(
49 |             unique_name=phase_name, 
50 |             do_init=True, 
51 |             kb_root_path = KB_ROOT_PATH, 
52 |             embed_config=embed_config, 
53 |             llm_config=llm_config
54 |         )
55 | ```
56 | 
57 | ### 支持Message管理
58 | 
59 | ```
60 | message1 = Message(
61 |     role_name="test1", role_type="user", input_query="hello", origin_query="hello",
62 |     parsed_output_list=[{"input": "hello"}]
63 | )
64 | 
65 | text = "hi! how can I help you?"
66 | message2 = Message(
67 |     role_name="test2", role_type="assistant", input_query=text, origin_query=text,
68 |     role_content=text, step_content=text, parsed_output_list=[{"answer": text}]
69 | )
70 | 
71 | text = "they say hello and hi to each other"
72 | message3 = Message(
73 |     role_name="test3", role_type="summary",
74 |     role_content=text, step_content=text,
75 |     parsed_output_list=[{"summary": text}]
76 |     )
77 | 
78 | ```
79 | 
80 | ### 支持 memory 检索
81 | ```
82 | # embedding retrieval test
83 | text = "say hi, i want some help"
84 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "datetime"))
85 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "embedding"))
86 | print(memory_manager.router_retrieval(text=text, datetime="2024-01-08 20:22:00", n=4, top_k=5, retrieval_type= "text"))
87 | 
88 | ```
89 | ### 支持 memory 总结
90 | ```
91 | # recursive_summary test
92 | print(memory_manager.recursive_summary(local_memory_manager.recall_memory.messages, split_n=1))
93 | ```


--------------------------------------------------------------------------------
/sources/readme_docs/coagent/connector/connector_phase.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Connector Phase
 3 | slug: Connector Phase ZH
 4 | url: "coagent/connector-phase-zh"
 5 | aliases:
 6 | - "/coagent/connector-phase-zh"
 7 | ---
 8 | 
 9 | 
10 | 
11 | ## 快速构建一个 agent phase
12 | - 首先增加openai配置，也可以是其它类似于openai接口的模型（通过fastchat启动）
13 | ```
14 | from coagent.base_configs.env_config import JUPYTER_WORK_PATH, KB_ROOT_PATH
15 | from coagent.llm_models.llm_config import EmbedConfig, LLMConfig
16 | from coagent.connector.configs import AGETN_CONFIGS
17 | from coagent.connector.phase import BasePhase
18 | from coagent.connector.schema import Message, load_role_configs
19 | 
20 | 
21 | os.environ["API_BASE_URL"] = OPENAI_API_BASE
22 | os.environ["OPENAI_API_KEY"] = "sk-xx"
23 | openai.api_key = "sk-xxx"
24 | # os.environ["OPENAI_PROXY"] = "socks5h://127.0.0.1:13659"
25 | os.environ["DUCKDUCKGO_PROXY"] = os.environ.get("DUCKDUCKGO_PROXY") or "socks5://127.0.0.1:13659"
26 | ```
27 | 
28 | 
29 | - 配置相关 LLM 和 Embedding Model
30 | ```
31 | # LLM 和 Embedding Model 配置
32 | llm_config = LLMConfig(
33 |     model_name="gpt-3.5-turbo", model_device="cpu",api_key=os.environ["OPENAI_API_KEY"], 
34 |     api_base_url=os.environ["API_BASE_URL"], temperature=0.3
35 |     )
36 | embed_config = EmbedConfig(
37 |     embed_engine="model", embed_model="text2vec-base-chinese", 
38 |     embed_model_path="D://project/gitlab/llm/external/ant_code/Codefuse-chatbot/embedding_models/text2vec-base-chinese"
39 |     )
40 | ```
41 | 
42 | 
43 | - 这里从已有的 phase 配置中选一个 phase 来做示例
44 | ```
45 | # log-level，print prompt和llm predict
46 | os.environ["log_verbose"] = "2"
47 | 
48 | phase_name = "searchChatPhase"
49 | phase = BasePhase(
50 |     phase_name, embed_config=embed_config, llm_config=llm_config, 
51 | )
52 | 
53 | # round-1
54 | query_content1 = "美国当前总统是谁？"
55 | query = Message(
56 |     role_name="human", role_type="user", 
57 |     role_content=query_content1, input_query=query_content1, origin_query=query_content1,
58 |     search_engine_name="duckduckgo", score_threshold=1.0, top_k=3
59 |     )
60 | 
61 | output_message, output_memory = phase.step(query)
62 | 
63 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
64 | 
65 | # round-2
66 | query_content2 = "美国上一任总统是谁，两个人有什么关系没？"
67 | query = Message(
68 |     role_name="human", role_type="user", 
69 |     role_content=query_content2, input_query=query_content2, origin_query=query_content2,
70 |     search_engine_name="duckduckgo", score_threshold=1.0, top_k=3
71 |     )
72 | output_message, output_memory = phase.step(query)
73 | print(output_memory.to_str_messages(return_all=True, content_key="parsed_output_list"))
74 | ```
75 | 
76 | 
77 | 
78 | ## Phase 参数配置
79 | |Config Key Name	|Type	|Description|
80 | | ------------------ | ---------- | ---------- |
81 | |phase_name|	String|	场景名称|
82 | |phase_config|CompletePhaseConfig| 默认为None，可直接指定完整的phaseconfig， 暂未实现|
83 | |llm_config	|LLMConfig	|大语言模型配置|
84 | |embed_config	|EmbedConfig	|向量模型配置|
85 | |sandbox_server	|Dict	|沙盒环境即notebook启动配置|
86 | |jupyter_work_path	|str	|沙盒环境的工作目录|
87 | |kb_root_path	|str	|memory的存储路径|
88 | |log_verbose	|str	|agent prompt&predict的日志打印级别|
89 | | base_phase_config | Union[dict, str] | 默认配置：PHASE_CONFIGS，可通过实现对这个变量新增来实现自定义配置 |
90 | | base_chain_config | Union[dict, str] | 默认配置：CHAIN_CONFIGS，可通过实现对这个变量新增来实现自定义配置 |
91 | | base_role_config  | Union[dict, str] | 默认配置：AGETN_CONFIGS，可通过实现对这个变量新增来实现自定义配置 |
92 | 


--------------------------------------------------------------------------------
/sources/readme_docs/contribution/contribute_guide.md:
--------------------------------------------------------------------------------
  1 | 非常感谢您对 Codefuse 项目感兴趣，我们非常欢迎您对 Codefuse 项目的各种建议、意见（包括批评）、评论和贡献。
  2 | 
  3 | 您对 Codefuse 的各种建议、意见、评论可以直接通过 GitHub 的 Issues 提出。
  4 | 
  5 | 参与 Codefuse 项目并为其作出贡献的方法有很多：代码实现、测试编写、流程工具改进、文档完善等等。任何贡献我们都会非常欢迎，并将您加入贡献者列表.
  6 | 
  7 | 进一步，有了足够的贡献后，您还可以有机会成为 Codefuse 的 Committer。
  8 | 
  9 | 任何问题，您都可以联系我们得到及时解答，联系方式包括微信、Gitter（GitHub提供的即时聊天工具）、邮件等等。
 10 | 
 11 | 
 12 | ## 初次接触
 13 | 初次来到 Codefuse 社区，您可以：
 14 | 
 15 | - 关注 Codefuse Github 代码库
 16 | - 加入 Codefuse 相关的微信群 随时提问；
 17 | 通过以上方式及时了解 Codefuse 项目的开发动态并为您关注的话题发表意见。
 18 | 
 19 | 
 20 | ## 贡献方式
 21 | 这份贡献指南并不仅仅关于编写代码。我们重视并感激在各个领域的帮助。以下是一些您可以贡献的方式
 22 | - 文档
 23 | - Issue
 24 | - PR
 25 | 
 26 | ### 改进文档
 27 | 文档是您了解 Codefuse 的最主要的方式，也是我们最需要帮助的地方！
 28 | 
 29 | 浏览文档，可以加深您对 Codefuse 的了解，也可以帮助您理解 Codefuse 的功能和技术细节，如果您发现文档有问题，请及时联系我们；
 30 | 
 31 | 如果您对改进文档的质量感兴趣，不论是修订一个页面的地址、更正一个链接、以及写一篇更优秀的入门文档，我们都非常欢迎！
 32 | 
 33 | 我们的文档大多数是使用 markdown 格式编写的，您可以直接通过在 GitHub 中的 docs/ 中修改并提交文档变更。如果提交代码变更，可以参阅 Pull Request。
 34 | 
 35 | ### 如果发现了一个 Bug 或问题
 36 | 如果发现了一个 Bug 或问题，您可以直接通过 GitHub 的 Issues 提一个新的 Issue，我们会有人定期处理。详情见[Issue Template](#issue-template)
 37 | 
 38 | 您也可以通过阅读分析代码自己修复（当然在这之前最好能和我们交流下，或许已经有人在修复同样的问题了），然后提交一个 Pull Request。
 39 | 
 40 | ### 修改代码和提交PR（Pull Request）
 41 | 您可以下载代码，编译安装，部署运行试一试（可以参考编译文档，看看是否与您预想的一样工作。如果有问题，您可以直接联系我们，提 Issue 或者通过阅读和分析源代码自己修复。详情见[Contribution](#contribution)
 42 | 
 43 | 无论是修复 Bug 还是增加 Feature，我们都非常欢迎。如果您希望给 Doris 提交代码，您需要从 GitHub 上 fork 代码库至您的项目空间下，为您提交的代码创建一个新的分支，添加源项目为upstream，并提交PR。 提交PR的方式可以参考文档 Pull Request。
 44 | 
 45 | 
 46 | 
 47 | 
 48 | ## Issue Type
 49 | Issue分为三种类型
 50 | - Bug: 代码或者执行示例存在bug或缺少依赖导致无法正确执行
 51 | - Documentation：文档表述存在争议、文档内容与代码不一致等
 52 | - Feature：在当前代码基础继续演进的新功能
 53 | 
 54 | ## Issue Template
 55 | ### Issue: Bug Template
 56 | 
 57 | **提交Issue前的确认清单** 
 58 | <br>要先确认是否查看  document、issue、discussion(github 功能) 等公开的文档信息
 59 | - 我搜索了Codefuse相关的所有文档。
 60 | - 我使用GitHub搜索寻找了一个类似的问题，但没有找到。
 61 | - 我为这个问题添加了一个非常描述性的标题。
 62 | 
 63 | **系统信息** 
 64 | <br>确认系统，如 mac -xx 、windwos-xx、linux-xx
 65 | 
 66 | **代码版本**
 67 | <br>确认代码版本或者分支，master、release等
 68 | 
 69 | **问题描述**
 70 | <br>描述您碰到的问题，想要实现的事情、或代码执行Bug
 71 | 
 72 | **代码示例**
 73 | <br>附上你的执行代码和相关配置，以便能够快速介入进行复现
 74 | 
 75 | **报错信息、日志**
 76 | <br>执行上述代码示例后的报错日志和相关信息
 77 | 
 78 | **相关依赖的模块**
 79 | <br>以chatbot项目为例
 80 | - connector
 81 | - codechat
 82 | - sandbox
 83 | - ...
 84 | 
 85 | 
 86 | ### Issue: Documentation Template
 87 | **Issue with current documentation:**
 88 | <br>请帮忙指出当前文档中的问题、错别字或者令人困惑的地方
 89 | 
 90 | **Idea or request for content**
 91 | <br>您觉得合理的文档表述方式应该是什么样的
 92 | 
 93 | 
 94 | ### Issue: Feature Template
 95 | **提交Issue前的确认清单** 
 96 | <br>要先确认是否查看  document、issue、discussion(github 功能) 等公开的文档信息
 97 | - 我搜索了Codefuse相关的所有文档。
 98 | - 我使用GitHub Issue搜索寻找了一个类似的问题，但没有找到。
 99 | - 我为这个问题添加了一个非常描述性的标题。
100 | 
101 | **功能描述**
102 | <br>描述这个功能作何用途
103 | 
104 | **相关示例**
105 | <br>提供参考的文档、仓库等信息，Please provide links to any relevant GitHub repos, papers, or other resources if relevant.
106 | 
107 | **动机**
108 | <br>描述下这个feature的动机，为什么需要这个功能，提供足够的上下文信息帮助理解这个feature的诉求
109 | 
110 | **Contribution**
111 | <br>你如何参与到这个feature的构建（如果参与的话）
112 | 
113 | 
114 | 
115 | ## Contribution
116 | 
117 | ### Pre-Checklist
118 | - 要先确认是否查看  document、issue、discussion(github 功能) 等公开的文档信息
119 | - 找到你想处理的GitHub问题。如果不存在，创建一个问题或草案PR，并请求维护者进行检查。
120 | - 检查相关的、相似的或重复的拉取请求。
121 | - 创建一个草案拉取请求。
122 | - 完成PR模板中的描述。
123 | - 链接任何被你的PR解决的GitHub问题。
124 | 
125 | ### Description
126 | PR的描述信息，用简洁的语言表达PR完成的事情，具体规范见[Commit 格式规范](#commit-格式规范)
127 | 
128 | ### Related Issue
129 | `#xx` if has
130 | 
131 | ### Test Code with Result
132 | 请提供相关的测试代码如果有必要的话
133 | 
134 | 
135 | ## Commit 格式规范
136 | Commit 分为“标题”和“内容”。原则上标题全部小写。内容首字母大写。
137 | 
138 | 
139 | ### 标题
140 | commit message的标题：`[<type>](<scope>) <subject> (#pr)`
141 | 
142 | 
143 | ### type 可选值
144 | 
145 | 本次提交的类型，限定在以下类型（全小写）
146 | - fix：bug修复
147 | - feature：新增功能
148 | - feature-wip：开发中的功能，比如某功能的部分代码。
149 | - improvement：原有功能的优化和改进
150 | - style：代码风格调整
151 | - typo：代码或文档勘误
152 | - refactor：代码重构（不涉及功能变动）
153 | - performance/optimize：性能优化
154 | - test：单元测试的添加或修复
155 | - deps：第三方依赖库的修改
156 | - community：社区相关的修改，如修改 Github Issue 模板等。
157 | 
158 | 几点说明：
159 | 
160 | 如在一次提交中出现多种类型，需增加多个类型。
161 | 如代码重构带来了性能提升，可以同时添加 [refactor][optimize]
162 | 不得出现如上所列类型之外的其他类型。如有必要，需要将新增类型添加到这个文档中。
163 | 
164 | ### scope 可选值
165 | 本次提交涉及的模块范围。因为功能模块繁多，在此仅罗列部分，后续根据需求不断完善。
166 | <br>以 chatbot的框架为例
167 | - connector
168 | - codechat
169 | - sandbox
170 | - ...
171 | 
172 | 几点说明：
173 | 
174 | 尽量使用列表中已存在的选项。如需添加，请及时更新本文档。
175 | 
176 | ### subject 内容
177 | 标题需尽量清晰表明本次提交的主要内容。
178 | 
179 | 
180 | ## 示例
181 | comming soon
182 | 
183 | 
184 | ## Reference
185 | [doris-commit-format](https://doris.apache.org/zh-CN/community/how-to-contribute/commit-format-specification)


--------------------------------------------------------------------------------
/sources/readme_docs/fastchat.md:
--------------------------------------------------------------------------------
  1 | # 本地私有化/大模型接口接入
  2 | 
  3 | 依托于开源的 LLM 与 Embedding 模型，本项目可实现基于开源模型的离线私有部署。此外，本项目也支持 OpenAI API 的调用。
  4 | 
  5 | ## 本地私有化模型接入
  6 | 
  7 | <br>模型地址配置示例，model_config.py配置修改
  8 | 
  9 | ```bash
 10 | # 建议：走huggingface接入，尽量使用chat模型，不要使用base，无法获取正确输出
 11 | # 注意：当llm_model_dict和VLLM_MODEL_DICT同时存在时，优先启动VLLM_MODEL_DICT中的模型配置
 12 | 
 13 | # llm_model_dict 配置接入示例如下
 14 | 
 15 | # 1、若把模型放到 ~/codefuse-chatbot/llm_models 路径下
 16 | # 若模型地址如下
 17 | model_dir: ~/codefuse-chatbot/llm_models/THUDM/chatglm-6b
 18 | 
 19 | # 参考配置如下
 20 | llm_model_dict = {
 21 |     "chatglm-6b": {
 22 |         "local_model_path": "THUDM/chatglm-6b",
 23 |         "api_base_url": "http://localhost:8888/v1",  # "name"修改为fastchat服务中的"api_base_url"
 24 |         "api_key": "EMPTY"
 25 |     }
 26 | }
 27 | 
 28 | VLLM_MODEL_DICT = {
 29 |  'chatglm2-6b':  "THUDM/chatglm-6b",
 30 | }
 31 | 
 32 | # or 若模型地址如下
 33 | model_dir: ~/codefuse-chatbot/llm_models/chatglm-6b
 34 | llm_model_dict = {
 35 |     "chatglm-6b": {
 36 |         "local_model_path": "chatglm-6b",
 37 |         "api_base_url": "http://localhost:8888/v1",  # "name"修改为fastchat服务中的"api_base_url"
 38 |         "api_key": "EMPTY"
 39 |     }
 40 | }
 41 | 
 42 | VLLM_MODEL_DICT = {
 43 |  'chatglm2-6b':  "chatglm-6b",
 44 | }
 45 | 
 46 | # 2、若不想移动相关模型到 ~/codefuse-chatbot/llm_models
 47 | # 同时删除 `模型路径重置` 以下的相关代码，具体见model_config.py
 48 | # 若模型地址如下
 49 | model_dir: ~/THUDM/chatglm-6b
 50 | # 参考配置如下
 51 | llm_model_dict = {
 52 |     "chatglm-6b": {
 53 |         "local_model_path": "your personl dir/THUDM/chatglm-6b",
 54 |         "api_base_url": "http://localhost:8888/v1",  # "name"修改为fastchat服务中的"api_base_url"
 55 |         "api_key": "EMPTY"
 56 |     }
 57 | }
 58 | 
 59 | VLLM_MODEL_DICT = {
 60 |  'chatglm2-6b':  "your personl dir/THUDM/chatglm-6b",
 61 | }
 62 | ```
 63 | 
 64 | ```bash
 65 | # 3、指定启动的模型服务，两者保持一致
 66 | LLM_MODEL = "chatglm-6b"
 67 | LLM_MODELs = ["chatglm-6b"]
 68 | ```
 69 | 
 70 | ```bash
 71 | # server_config.py配置修改， 若LLM_MODELS无多个模型配置不需要额外进行设置
 72 | # 修改server_config.py#FSCHAT_MODEL_WORKERS的配置
 73 | "model_name": {'host': DEFAULT_BIND_HOST, 'port': 20057}
 74 | ```
 75 | 
 76 | 
 77 | 
 78 | <br>量化模型接入
 79 | 
 80 | ```bash
 81 | # 若需要支撑codellama-34b-int4模型，需要给fastchat打一个补丁
 82 | cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py
 83 | 
 84 | # 若需要支撑qwen-72b-int4模型，需要给fastchat打一个补丁
 85 | cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py
 86 | # 量化需修改llm_api.py的配置
 87 | # examples/llm_api.py#559 取消注释 kwargs["gptq_wbits"] = 4
 88 | ```
 89 | 
 90 | ## 公开大模型接口接入
 91 | 
 92 | ```bash
 93 | # model_config.py配置修改
 94 | # ONLINE_LLM_MODEL
 95 | # 其它接口开发来自于langchain-chatchat项目，缺少相关账号未经测试
 96 | 
 97 | # 指定启动的模型服务，两者保持一致
 98 | LLM_MODEL = "gpt-3.5-turbo"
 99 | LLM_MODELs = ["gpt-3.5-turbo"]
100 | ```
101 | 
102 | 外部大模型接口接入示例
103 | 
104 | ```bash
105 | # 1、实现新的模型接入类
106 | # 参考  ~/examples/model_workers/openai.py#ExampleWorker
107 | # 实现do_chat函数即可使用LLM的能力
108 | 
109 | class XXWorker(ApiModelWorker):
110 |     def __init__(
111 |             self,
112 |             *,
113 |             controller_addr: str = None,
114 |             worker_addr: str = None,
115 |             model_names: List[str] = ["gpt-3.5-turbo"],
116 |             version: str = "gpt-3.5",
117 |             **kwargs,
118 |     ):
119 |         kwargs.update(model_names=model_names, controller_addr=controller_addr, worker_addr=worker_addr)
120 |         kwargs.setdefault("context_len", 16384) #TODO 16K模型需要改成16384
121 |         super().__init__(**kwargs)
122 |         self.version = version
123 | 
124 |     def do_chat(self, params: ApiChatParams) -> Dict:
125 |         '''
126 |         执行Chat的方法，默认使用模块里面的chat函数。
127 |         :params.messages : [
128 |             {"role": "user", "content": "hello"}, 
129 |             {"role": "assistant", "content": "hello"}
130 |             ]
131 |         :params.xx: 详情见 ApiChatParams 
132 |         要求返回形式：{"error_code": int, "text": str}
133 |         '''
134 |         return {"error_code": 500, "text": f"{self.model_names[0]}未实现chat功能"}
135 | 
136 | 
137 | # 最后在 ~/examples/model_workers/__init__.py 中完成注册
138 | # from .xx import XXWorker
139 | 
140 | # 2、通过已有模型接入类完成接入
141 | # 或者直接使用已有的相关大模型类进行使用（缺少相关账号测试，欢迎大家测试后提PR）
142 | ```
143 | 
144 | 
145 | ```bash
146 | # model_config.py#ONLINE_LLM_MODEL 配置修改
147 | # 填写专属模型的 version、api_base_url、api_key、provider（与上述类名一致）
148 | ONLINE_LLM_MODEL = {
149 |     # 线上模型。请在server_config中为每个在线API设置不同的端口
150 | 
151 |     "openai-api": {
152 |         "model_name": "gpt-3.5-turbo",
153 |         "api_base_url": "https://api.openai.com/v1",
154 |         "api_key": "",
155 |         "openai_proxy": "",
156 |     },
157 |     "example": {
158 |         "version": "gpt-3.5",  # 采用openai接口做示例
159 |         "api_base_url": "https://api.openai.com/v1",
160 |         "api_key": "",
161 |         "provider": "ExampleWorker",
162 |     },
163 | }
164 | ```
165 | 
166 | ## 启动大模型服务
167 | ```bash
168 | # start llm-service（可选）  单独启动大模型服务
169 | python examples/llm_api.py
170 | ```
171 | 
172 | ```bash
173 | # 启动测试
174 | import openai
175 | # openai.api_key = "EMPTY" # Not support yet
176 | openai.api_base = "http://127.0.0.1:8888/v1"
177 | 
178 | # 选择你启动的模型
179 | model = "example"
180 | 
181 | # create a chat completion
182 | completion = openai.ChatCompletion.create(
183 |     model=model,
184 |     messages=[{"role": "user", "content": "Hello! What is your name? "}],
185 |     max_tokens=100,
186 | )
187 | # print the completion
188 | print(completion.choices[0].message.content)
189 | 
190 | # 正确输出后则确认LLM可正常接入
191 | ```
192 | 
193 | 
194 | 
195 | or
196 | 
197 | ```bash
198 | # model_config.py#USE_FASTCHAT 判断是否进行fastchat接入本地模型
199 | USE_FASTCHAT = "gpt" not in LLM_MODEL
200 | python start.py #221 自动执行 python llm_api.py
201 | ```


--------------------------------------------------------------------------------
/sources/readme_docs/roadmap-en.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Roadmap Overview
 3 | 
 4 | - [x] Sandbox Environment ✅
 5 |   - [x] Isolated sandbox environment for code execution ✅
 6 |   - [x] File upload and download ✅
 7 |   - [ ] Support for Java execution environment ⬜
 8 | - [x] Vector Database & Retrieval ✅
 9 |   - [x] Task retrieval ✅
10 |   - [x] Tool retrieval ✅
11 | - [x] Prompt Management ✅
12 | - [x] Memory Management ✅
13 | - [x] Multi Agent Framework ✅
14 |   - [ ] PRD (Product Requirement Document), system analysis, interface design ⬜
15 |   - [ ] Generate code based on requirement documents, system analysis, and interface design ⬜
16 |   - [ ] Automated testing, automated debugger ⬜
17 |   - [ ] Operations process integration (ToolLearning) ⬜
18 |   - [ ] Fully automated end-to-end process ⬜
19 | - [x] Integration with LLM based on fastchat ✅
20 | - [x] Integration with Text Embedding based on sentencebert ✅
21 | - [x] Improved vector loading speed ✅
22 | - [x] Connector ✅
23 |   - [x] React Mode based on langchain ✅
24 |   - [x] Tool retrieval completed with langchain ✅
25 | - [ ] General Capability for Web Crawl ⬜
26 |   - [x] Technical documentation: Zhihu, CSDN, Alibaba Cloud Developer Forum, Tencent Cloud Developer Forum, etc. ✅
27 |   - [ ] Issue document ⬜
28 |   - [ ] SDK Library Document ⬜
29 | 
30 | v0.0
31 | - [x] Sandbox Environment ✅
32 |   - [x] Isolated sandbox environment for code execution ✅
33 | - [x] Integration with LLM based on fastchat ✅
34 | - [x] Integration with Text Embedding based on sentencebert ✅
35 | - [x] General Capability for Web Crawl: Technical documentation: Zhihu, CSDN, Alibaba Cloud Developer Forum, Tencent Cloud Developer Forum, etc. ✅
36 | 
37 | Done
38 | <br>
39 | 
40 | v0.1 
41 | - [x] Sandbox Environment: File upload and download ✅
42 | - [x] Vector Database & Retrieval ✅
43 |   - [x] Task retrieval ✅
44 |   - [x] Tool retrieval ✅
45 | - [x] Connector ✅
46 |   - [x] React Mode based on langchain ✅
47 | - [x] Integration with Text Embedding based on sentencebert: Improved vector loading speed ✅
48 | 
49 | Done
50 | <br>
51 | 
52 | v0.2
53 | - [x] Prompt Management ✅
54 | - [x] Memory Management ✅
55 | - [x] Vector Database & Retrieval  ✅
56 | 
57 | Done
58 | <br>
59 | 
60 | v0.3
61 | - [x] Sandbox Environment ✅
62 |   - [ ] Support for Java execution environment ⬜
63 | - [x] Multi Agent ✅
64 |   - [ ] PRD (Product Requirement Document), system analysis, interface design ⬜
65 |   - [ ] Generate code based on requirement documents, system analysis, and interface design ⬜
66 |   - [ ] Automated testing, automated debugger ⬜
67 |   - [ ] Operations process integration (ToolLearning) ⬜
68 |   - [ ] Fully automated end-to-end process ⬜
69 | - [x] General Capability for Web Crawl ✅
70 |   - [ ] Issue document ⬜
71 |   - [ ] SDK Library Document ⬜
72 |   
73 | DDL： 2024.12.31
74 | <br>


--------------------------------------------------------------------------------
/sources/readme_docs/roadmap.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## RoadMap
 3 | 
 4 | <div align=center>
 5 |   <img src="../../sources/docs_imgs/devops-chatbot-module-v2.png" alt="图片" width="600" height="400">
 6 | </div>
 7 | <br>
 8 | 
 9 | 
10 | 完整路线
11 | - [x] Sandbox 环境 ✅
12 |   - [x] 环境隔离的sandbox环境与代码执行 ✅
13 |   - [x] 上传、下载文件 ✅
14 |   - [ ] 支持java执行环境 
15 | - [ ] Vector Database & Retrieval
16 |   - [x] task retrieval ✅
17 |   - [x] tool retrieval ✅
18 | - [x] Prompt Management ✅
19 | - [x] memory Management ✅
20 | - [x] Multi Agent ✅
21 |   - [ ] PRD需求文档、系分、接口设计 ⬜
22 |   - [ ] 根据需求文档、系分、接口设计生产代码 ⬜
23 |   - [ ] 自动测试、自动debugger ⬜
24 |   - [ ] 运维流程接入（ToolLearning）⬜
25 |   - [ ] 全流程自动 ⬜
26 | - [x] 基于fastchat接入LLM ✅
27 | - [x] 基于sentencebert接入Text Embedding ✅
28 |   - [x] 向量加载速度提升 ✅
29 | - [x] Connector ✅
30 |   - [x] 基于langchain的react模式 ✅
31 |   - [x] 基于langchain完成tool检索 ✅
32 | - [x] Web Crawl 通用能力 ✅
33 |   - [x] 技术文档: 知乎、csdn、阿里云开发者论坛、腾讯云开发者论坛等 ✅
34 |   - [ ] issue document ⬜
35 |   - [ ] SDK Library Document ⬜
36 | 
37 | 
38 | <br><br>
39 | 
40 | - v0.0
41 | - [x] Sandbox 环境 ✅
42 |   - [x] 环境隔离的sandbox环境与代码执行 ✅
43 | - [x] 基于fastchat接入LLM ✅
44 | - [x] 基于sentencebert接入Text Embedding ✅
45 | - [x] Web Crawl 通用能力：技术文档: 知乎、csdn、阿里云开发者论坛、腾讯云开发者论坛等 ✅
46 | <br>
47 | - v0.1
48 | - [x] Sandbox 环境: 上传、下载文件 ✅
49 | - [x] Vector Database & Retrieval ✅
50 |   - [x] task retrieval ✅
51 |   - [x] tool retrieval ✅
52 | - [x] Connector ✅
53 |   - [x] 基于langchain的react模式 ✅
54 | - [x] 基于sentencebert接入Text Embedding: 向量加载速度提升 ✅
55 | 
56 | Done
57 | <br>
58 | 
59 | - v0.2
60 | - [x] Prompt Management ✅
61 | - [x] memory Management ✅
62 | - [x] Vector Database & Retrieval ✅
63 | 
64 | DDL： 2024.01.31
65 | <br>
66 | 
67 | - v0.3
68 | - [x] Sandbox 环境 ✅
69 |   - [ ] 支持java执行环境 ⬜
70 | - [x] Multi Agent Framework ✅
71 |   - [ ] PRD需求文档、系分、接口设计 ⬜
72 |   - [ ] 根据需求文档、系分、接口设计生产代码 ⬜
73 |   - [ ] 自动测试、自动debugger ⬜
74 |   - [ ] 运维流程接入（ToolLearning） ⬜
75 |   - [ ] 全流程自动 ⬜
76 | - [x] Web Crawl 通用能力 ✅
77 |   - [ ] issue document ⬜
78 |   - [ ] SDK Library Document ⬜
79 | 
80 | DDL： 2024.12.31
81 | <br>
82 | 


--------------------------------------------------------------------------------
/sources/readme_docs/start-en.md:
--------------------------------------------------------------------------------
  1 | 
  2 | If you need to deploy a privatized model, please install the NVIDIA driver yourself.
  3 | 
  4 | ### Preparation of Python environment
  5 | - It is recommended to use conda to manage the python environment (optional)
  6 | ```bash
  7 | # Prepare conda environment
  8 | conda create --name Codefusegpt python=3.9
  9 | conda activate Codefusegpt
 10 | ```
 11 | 
 12 | - Install related dependencies
 13 | ```bash
 14 | cd Codefuse-ChatBot
 15 | pip install -r requirements.txt
 16 | ```
 17 | 
 18 | ### Sandbox Environment Preparation
 19 | - Windows Docker installation:
 20 | [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/) supports 64-bit versions of Windows 10 Pro with Hyper-V enabled (Hyper-V is not required for versions v1903 and above), or 64-bit versions of Windows 10 Home v1903 and above.
 21 |   - [【全面详细】Windows10 Docker安装详细教程](https://zhuanlan.zhihu.com/p/441965046)
 22 |   - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/windows)
 23 |   - [Handling 'Docker Desktop requires the Server service to be enabled'](https://blog.csdn.net/sunhy_csdn/article/details/106526991)
 24 |   - [安装wsl或者等报错提示](https://learn.microsoft.com/zh-cn/windows/wsl/install)
 25 | 
 26 | - Linux Docker installation：
 27 | Linux installation is relatively simple, please search Baidu/Google for installation guides.
 28 | 
 29 | - Mac Docker installation
 30 |   - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/mac)
 31 | 
 32 | ```bash
 33 | # Build the image for the sandbox environment, see above for notebook version issues
 34 | bash docker_build.sh
 35 | ```
 36 | 
 37 | ### Model Download (Optional)
 38 | 
 39 | If you need to use open-source LLM and Embedding models, you can download them from HuggingFace.
 40 | Here we take THUDM/chatglm2-6b and text2vec-base-chinese as examples:
 41 | 
 42 | ```
 43 | # install git-lfs
 44 | git lfs install
 45 | 
 46 | # install LLM-model
 47 | git lfs clone https://huggingface.co/THUDM/chatglm2-6b
 48 | cp ~/THUDM/chatglm2-6b ~/codefuse-chatbot/llm_models/
 49 | 
 50 | # install Embedding-model
 51 | git lfs clone https://huggingface.co/shibing624/text2vec-base-chinese
 52 | cp ~/shibing624/text2vec-base-chinese ~/codefuse-chatbot/embedding_models/
 53 | ```
 54 | 
 55 | 
 56 | 
 57 | ### Basic Configuration
 58 | 
 59 | ```bash
 60 | # Modify the basic configuration for service startup
 61 | cd configs
 62 | cp model_config.py.example model_config.py
 63 | cp server_config.py.example server_config.py
 64 | 
 65 | # model_config#11~12 If you need to use the OpenAI interface, the OpenAI interface key
 66 | os.environ["OPENAI_API_KEY"] = "sk-xxx"
 67 | # Replace with the api_base_url you need
 68 | os.environ["API_BASE_URL"] = "https://api.openai.com/v1"
 69 | 
 70 | # vi model_config#LLM_MODEL The language model you need to choose
 71 | LLM_MODEL = "gpt-3.5-turbo"
 72 | LLM_MODELs = ["gpt-3.5-turbo"]
 73 | 
 74 | # vi model_config#EMBEDDING_MODEL The private vector model you need to choose
 75 | EMBEDDING_ENGINE = 'model'
 76 | EMBEDDING_MODEL = "text2vec-base"
 77 | 
 78 | # Example of vector model access, modify model_config#embedding_model_dict
 79 | # If the model directory is:
 80 | model_dir: ~/codefuse-chatbot/embedding_models/shibing624/text2vec-base-chinese
 81 | # Configure as follows
 82 | "text2vec-base": "shibing624/text2vec-base-chinese"
 83 | 
 84 | 
 85 | # vi server_config#8~14, It's recommended to use a container to start the service to prevent environment conflicts when installing other dependencies using the codeInterpreter feature
 86 | DOCKER_SERVICE = True
 87 | # Whether to use a container sandbox
 88 | SANDBOX_DO_REMOTE = True
 89 | ```
 90 | 
 91 | 
 92 | 
 93 | ### Starting the Service
 94 | 
 95 | By default, only the webui-related services are started, and fastchat is not started (optional).
 96 | 
 97 | ```bash
 98 | # If you need to support the codellama-34b-int4 model, you need to patch fastchat
 99 | # cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py
100 | # Modify examples/llm_api.py#258 to kwargs={"gptq_wbits": 4},
101 | 
102 | # start llm-service (optional)
103 | python examples/llm_api.py
104 | ```
105 | For more LLM integration methods, see[more details...](sources/readme_docs/fastchat-en.md)
106 | <br>
107 | 
108 | ```bash
109 | # After completing the server_config.py configuration, you can start with one click
110 | cd examples
111 | python start.py
112 | ```


--------------------------------------------------------------------------------
/sources/readme_docs/start.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 如需使用私有化模型部署，请自行安装 nvidia 驱动程序。
  3 | 
  4 | ### python 环境准备
  5 | 
  6 | - 推荐采用 conda 对 python 环境进行管理（可选）
  7 | ```bash
  8 | # 准备 conda 环境
  9 | conda create --name devopsgpt python=3.9
 10 | conda activate devopsgpt
 11 | ```
 12 | 
 13 | - 安装相关依赖
 14 | ```bash
 15 | cd codefuse-chatbot
 16 | # python=3.9，notebook用最新即可，python=3.8用notebook=6.5.6
 17 | pip install -r requirements.txt
 18 | ```
 19 | 
 20 | ### 沙盒环境准备
 21 | - windows Docker 安装：
 22 | [Docker Desktop for Windows](https://docs.docker.com/desktop/install/windows-install/) 支持 64 位版本的 Windows 10 Pro，且必须开启 Hyper-V（若版本为 v1903 及以上则无需开启 Hyper-V），或者 64 位版本的 Windows 10 Home v1903 及以上版本。
 23 | 
 24 |   - [【全面详细】Windows10 Docker安装详细教程](https://zhuanlan.zhihu.com/p/441965046)
 25 |   - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/windows)
 26 |   - [Docker Desktop requires the Server service to be enabled 处理](https://blog.csdn.net/sunhy_csdn/article/details/106526991)
 27 |   - [安装wsl或者等报错提示](https://learn.microsoft.com/zh-cn/windows/wsl/install)
 28 | 
 29 | - Linux Docker 安装：
 30 | Linux 安装相对比较简单，请自行 baidu/google 相关安装
 31 | 
 32 | - Mac Docker 安装
 33 |   - [Docker 从入门到实践](https://yeasy.gitbook.io/docker_practice/install/mac)
 34 | 
 35 | ```bash
 36 | # 构建沙盒环境的镜像，notebook版本问题见上述
 37 | bash docker_build.sh
 38 | ```
 39 | 
 40 | ### 模型下载（可选）
 41 | 
 42 | 如需使用开源 LLM 与 Embedding 模型可以从 HuggingFace 下载。
 43 | 此处以 THUDM/chatglm2-6bm 和 text2vec-base-chinese 为例：
 44 | 
 45 | ```
 46 | # install git-lfs
 47 | git lfs install
 48 | 
 49 | # install LLM-model
 50 | git lfs clone https://huggingface.co/THUDM/chatglm2-6b
 51 | cp ~/THUDM/chatglm2-6b ~/codefuse-chatbot/llm_models/
 52 | 
 53 | # install Embedding-model
 54 | git lfs clone https://huggingface.co/shibing624/text2vec-base-chinese
 55 | cp ~/shibing624/text2vec-base-chinese ~/codefuse-chatbot/embedding_models/
 56 | ```
 57 | 
 58 | 
 59 | ### 基础配置
 60 | 
 61 | ```bash
 62 | # 修改服务启动的基础配置
 63 | cd configs
 64 | cp model_config.py.example model_config.py
 65 | cp server_config.py.example server_config.py
 66 | 
 67 | # model_config#11~12 若需要使用openai接口，openai接口key
 68 | os.environ["OPENAI_API_KEY"] = "sk-xxx"
 69 | # 可自行替换自己需要的api_base_url
 70 | os.environ["API_BASE_URL"] = "https://api.openai.com/v1"
 71 | 
 72 | # vi model_config#LLM_MODEL 你需要选择的语言模型
 73 | LLM_MODEL = "gpt-3.5-turbo"
 74 | LLM_MODELs = ["gpt-3.5-turbo"]
 75 | 
 76 | # vi model_config#EMBEDDING_MODEL 你需要选择的私有化向量模型
 77 | EMBEDDING_ENGINE = 'model'
 78 | EMBEDDING_MODEL = "text2vec-base"
 79 | 
 80 | # 向量模型接入示例，修改 model_config#embedding_model_dict
 81 | # 若模型地址为：
 82 | model_dir: ~/codefuse-chatbot/embedding_models/shibing624/text2vec-base-chinese
 83 | # 配置如下
 84 | "text2vec-base": "shibing624/text2vec-base-chinese"
 85 | 
 86 | # vi server_config#8~14, 推荐采用容器启动服务
 87 | DOCKER_SERVICE = True
 88 | # 是否采用容器沙箱
 89 | SANDBOX_DO_REMOTE = True
 90 | ```
 91 | 
 92 | ### 启动服务
 93 | 
 94 | 默认只启动webui相关服务，未启动fastchat（可选）。
 95 | ```bash
 96 | # 若需要支撑codellama-34b-int4模型，需要给fastchat打一个补丁
 97 | # cp examples/gptq.py ~/site-packages/fastchat/modules/gptq.py
 98 | # examples/llm_api.py#258 修改为 kwargs={"gptq_wbits": 4},
 99 | 
100 | # start llm-service（可选）
101 | python examples/llm_api.py
102 | ```
103 | 更多LLM接入方法见[详情...](sources/readme_docs/fastchat.md)
104 | <br>
105 | 
106 | ```bash
107 | # 完成server_config.py配置后，可一键启动
108 | cd examples
109 | python start.py
110 | ```


--------------------------------------------------------------------------------
/tests/file_test.py:
--------------------------------------------------------------------------------
 1 | import requests, os, sys
 2 | # src_dir = os.path.join(
 3 | #     os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 4 | # )
 5 | # sys.path.append(src_dir)
 6 | 
 7 | # from dev_opsgpt.utils.common_utils import st_load_file
 8 | # from dev_opsgpt.sandbox.pycodebox import PyCodeBox
 9 | # from examples.file_fastapi import upload_file, download_file
10 | # from pathlib import Path
11 | # import httpx
12 | # from loguru import logger
13 | # from io import BytesIO
14 | 
15 | 
16 | # def _parse_url(url: str, base_url: str) -> str:
17 | #     if (not url.startswith("http")
18 | #                 and base_url
19 | #             ):
20 | #         part1 = base_url.strip(" /")
21 | #         part2 = url.strip(" /")
22 | #         return f"{part1}/{part2}"
23 | #     else:
24 | #         return url
25 | 
26 | # base_url: str = "http://127.0.0.1:7861"
27 | # timeout: float = 60.0,
28 | # url = "/files/upload"
29 | # url = _parse_url(url, base_url)
30 | # logger.debug(url)
31 | # kwargs = {}
32 | # kwargs.setdefault("timeout", timeout)
33 | 
34 | # import asyncio
35 | # file = "./torch_test.py"
36 | # upload_filename = st_load_file(file, filename="torch_test.py")
37 | # asyncio.run(upload_file(upload_filename))
38 | 
39 | import requests
40 | url = "http://127.0.0.1:7862/sdfiles/download?filename=torch_test.py&save_filename=torch_test.py"
41 | r = requests.get(url)
42 | print(type(r.text))


--------------------------------------------------------------------------------
/tests/torch_test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | print(torch.__version__)
3 | print(torch.cuda.is_available())


--------------------------------------------------------------------------------
/web_crawler/main_test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from utils.WebCrawler import WebCrawler
 3 | 
 4 | logging.basicConfig(level=logging.INFO)
 5 | 
 6 | if __name__ == '__main__':
 7 |     # 保存地址，分别保存html源文件、处理后text文件
 8 |     html_dir = "data/html/tmp_csdn_122513786_html.jsonl"
 9 |     text_dir = "data/text/tmp_csdn_122513786_text.jsonl"
10 |     # 下载网页数据
11 |     # https://www.langchain.asia/
12 |     # https://blog.csdn.net/weixin_43791511/article/details/122513786
13 |     # https://zhuanlan.zhihu.com/p/645400277
14 |     # https://www.aliyun.com/?utm_content=se_1014243503
15 |     # 'https://cloud.tencent.com/developer/article/1004500?from=15425'
16 |     base_url = 'https://www.langchain.asia/'
17 |     # 爬取方式：
18 |     ## requests和selenium两种方式；requests为简单请求静态网址html内容，js动态数据无法获取；
19 |     ## selenium为模拟人行为请求，可获取全部html数据，但请求时间较长10-20s单网页，尽量设置5s以上的time_sleep。
20 |     reptile_lib = "requests"
21 |     method = "get"  # 目前只支持get请求
22 |     time_sleep = 4  # 每两次请求间隔时间s
23 |     wc = WebCrawler()
24 |     # 爬取base_url单网址
25 |     wc.webcrawler_single(html_dir=html_dir,
26 |                          text_dir=text_dir,
27 |                          base_url=base_url,
28 |                          reptile_lib=reptile_lib,
29 |                          method=method,
30 |                          time_sleep=time_sleep
31 |                          )
32 | 
33 |     # # 爬取base_url页面所有网址，限制target_url_prefix为前缀，默认target_url_prefix=base_url
34 |     # wc.webcrawler_1_degree(html_dir=html_dir,
35 |     #                        text_dir=text_dir,
36 |     #                        base_url=base_url,
37 |     #                        reptile_lib=reptile_lib,
38 |     #                        method=method,
39 |     #                        time_sleep=time_sleep
40 |     #                        )
41 | 


--------------------------------------------------------------------------------
/web_crawler/utils/DocTokenizer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import re
 3 | 
 4 | # 非打印字符
 5 | NON_PRINTING_CHARS_RE = re.compile(
 6 |     f"[{''.join(map(chr, list(range(0, 32)) + list(range(127, 160))))}]"
 7 | )
 8 | 
 9 | class DocTokenizer():
10 |     '''
11 |     文档text处理器。
12 |     '''
13 | 
14 |     def __init__(self):
15 |         pass
16 | 
17 |     def doc_process(self, text):
18 |         '''
19 |         去除多余换行、去掉每行非打印字符和开头结尾空格
20 |         '''
21 |         # 去除多余换行
22 |         text = self.remove_excess_lines(text)
23 |         # 将文本拆分成行
24 |         lines = text.split("\n")
25 |         # 去掉每一行的开头和结尾的空格
26 |         lines = [self.remove_non_printing_char_line(
27 |             line.strip()) for line in lines]
28 |         # 将行重新组合成文本
29 |         text_new = "\n".join(lines)
30 |         return text_new
31 | 
32 |     def remove_excess_lines(self, text):
33 |         '''
34 |         将2个以上的换行符替换为2个，html解析text时会产生大量换行\n
35 |         '''
36 |         pattern = r'\n\n+'
37 |         return re.sub(pattern, '\n\n', text)
38 | 
39 |     def remove_non_printing_char_line(self, text):
40 |         '''
41 |         去除每一行的非打印字符
42 |         '''
43 |         return NON_PRINTING_CHARS_RE.sub("", text)
44 | 


--------------------------------------------------------------------------------
/web_crawler/utils/Html2Text.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from bs4 import BeautifulSoup
  3 | import logging
  4 | import json
  5 | import os
  6 | from tqdm import tqdm
  7 | import re
  8 | from .DocTokenizer import DocTokenizer
  9 | 
 10 | logging.basicConfig(level=logging.INFO)
 11 | 
 12 | 
 13 | class Html2Text():
 14 |     '''从html中提取text文本内容。
 15 |     '''
 16 | 
 17 |     def __init__(self):
 18 |         pass
 19 | 
 20 |     def html2text(self,
 21 |                   target_content_tag={},
 22 |                   target_tag_list=[],
 23 |                   html_dir=None,
 24 |                   text_dir=None,
 25 |                   mode="w",
 26 |                   is_get_all_text=False
 27 |                   ):
 28 |         '''
 29 |         从html中提取text文本内容，需要指定提取html中的tag标签。输入为地址，html文件保存在jsonl文件中，输出也需要指定地址。
 30 |         :param target_content_tag: html中正文content所在tag，字典格式限制长度为1，key为选中便签类型name/class/id，vaule为标签取值如div/title/article等
 31 |         :param target_tag_list: 指定提取html对应的tag文本，列表，每个元素都与target_content_tag格式相同
 32 |         :param is_get_all_text: True则将html页面所有text内容保存到all_text字典中；False不保存all_text
 33 |         :param html_dir: html数据地址，注意需要时jsonl格式，一行为一个json字典，有text/url/host_url三个字段
 34 |         :param text_dir: 将提取的text内容保存的地址，同样是jsonl格式。
 35 |         :return: None
 36 |         '''
 37 |         assert isinstance(target_content_tag,dict), "target_content_tag请输入字典格式！"
 38 |         assert len(target_content_tag.keys()) <= 1,"target_content_tag属性字典只能指定唯一元素！"
 39 |         for _ in target_tag_list:
 40 |             assert isinstance(_, dict), "target_tag_list列表元素需要字典格式！"
 41 |             assert len(_.keys()) <= 1, "target_tag_list列表中的属性字典只能指定唯一元素！"
 42 |         # 创建保存目录
 43 |         os.makedirs(os.path.dirname(text_dir), exist_ok=True)
 44 |         # 读取文件
 45 |         logging.info("读取文件中……")
 46 |         html_dict_list = self.read_html_jsonl(html_dir)
 47 |         url_nums = len(html_dict_list)
 48 |         logging.info("共{url_nums}个html网址".format(url_nums=url_nums))
 49 |         # 循环处理每行html数据：html提取content正文、指定tag内容
 50 |         text_dict_list = []
 51 |         for html_dict in tqdm(html_dict_list, mininterval=1):
 52 |             # 是否获取全部text内容
 53 |             text_dict = self.get_text_dict(
 54 |                 html_dict=html_dict,
 55 |                 target_content_tag=target_content_tag,
 56 |                 target_tag_list=target_tag_list,
 57 |                 is_get_all_text=is_get_all_text
 58 |             )
 59 |             text_dict_list.append(text_dict)
 60 |         logging.info("保存html提取的text内容……")
 61 |         self.save_text_jsonl(json_list=text_dict_list,
 62 |                              file_path=text_dir,
 63 |                              mode=mode)
 64 |         logging.info("保存成功！地址：%s" % text_dir)
 65 | 
 66 |     def get_text_dict(self,
 67 |                       html_dict={},
 68 |                       target_content_tag={},
 69 |                       target_tag_list=[],
 70 |                       is_get_all_text=True
 71 |                       ):
 72 |         '''{"name":"div"}
 73 |         提取html网页字符中的纯文本内容，采用BeautifulSoup.get_text()获取全部text文本，target_tag_list指定要提取文本的标签。
 74 |         :param html_dict: 网页返回的全部文本内容response.text和url
 75 |         :param target_content_tag: html中正文content所在tag，字典格式限制长度为1，key为选中便签类型name/class/id，vaule为标签取值如div/title/article等
 76 |         :param target_tag_list: 指定提取html对应的tag文本，列表，每个元素都与target_content_tag格式相同
 77 |         :return: text_content:{} 提取的text文本内容
 78 |         '''
 79 |         # 格式定义
 80 |         assert isinstance(target_content_tag,dict), "target_content_tag请输入字典格式！"
 81 |         assert len(target_content_tag.keys()) <= 1,"target_content_tag属性字典只能指定唯一元素！"
 82 |         for _ in target_tag_list:
 83 |             assert isinstance(_, dict), "target_tag_list列表元素需要字典格式！"
 84 |             assert len(_.keys()) <= 1, "target_tag_list列表中的属性字典只能指定唯一元素！"
 85 |         # 提取html的内容
 86 |         html_content = html_dict['text']
 87 |         url = html_dict['url']
 88 |         host_url = html_dict['host_url']
 89 |         # 创建BeautifulSoup对象
 90 |         soup = BeautifulSoup(html_content, 'html.parser')
 91 |         # 处理pre引用代码块，添```引用
 92 |         pre_tags = soup.find_all('code')
 93 |         for pre_tag in pre_tags:
 94 |             pre_tag.string = '\n```code\n' + pre_tag.get_text() + '\n```\n'
 95 |         # 提取HTML中的文本内容
 96 |         doc_tokenizer = DocTokenizer()
 97 |         text_dict = {}
 98 |         text_dict['url'] = url
 99 |         text_dict['host_url'] = host_url
100 |         # 提取网页的title，不存在则置空
101 |         try:
102 |             text_dict['title'] = soup.title.text
103 |         except:
104 |             text_dict['title'] = None
105 |         # 是否提取全部text，不区分标签
106 |         if is_get_all_text:
107 |             all_text = soup.get_text(separator="", strip=False)
108 |             text_dict['all_text'] = doc_tokenizer.doc_process(all_text)
109 |         # 提取正文tag，可以按照标签的class提取，或按照tag名提取
110 |         if target_content_tag:
111 |             text_dict["content"] = self.soup_find_all_text(soup=soup,doc_tokenizer=doc_tokenizer,attrs=target_content_tag)
112 |         # 提取html中tag内容，每个tag独立作为字段保存
113 |         for target_tag in target_tag_list:
114 |             if target_tag:
115 |                 # 提取目标tag名
116 |                 tag_ = list(target_tag.values())[0]
117 |                 # 提取目标tag内容
118 |                 text_dict[tag_] = self.soup_find_all_text(soup,doc_tokenizer,attrs=target_tag)
119 |         return text_dict
120 | 
121 |     def soup_find_all_text(self,soup,doc_tokenizer,attrs):
122 |         assert isinstance(attrs,dict), "attrs请输入字典格式！"
123 |         assert len(attrs.keys()) == 1,"attrs属性字典只能指定唯一元素！"
124 |         if list(attrs.keys())[0]=="name":
125 |             _tags = soup.find_all(name=attrs["name"])
126 |         else:
127 |             _tags = soup.find_all(attrs=attrs)
128 |         tags_text = ""
129 |         for _tag in _tags:
130 |             tag_text = _tag.get_text(separator="", strip=False)
131 |             tag_text = doc_tokenizer.doc_process(tag_text)
132 |             tags_text += tag_text.strip() + "\n\n"
133 |         return tags_text
134 | 
135 |     def read_html_jsonl(self, file_name=None):
136 |         '''
137 |         读取html的josnl文件
138 |         '''
139 |         html_dict_list = []
140 |         with open(file_name, "r", encoding="utf-8") as f:
141 |             for k, line in enumerate(f):
142 |                 line = json.loads(line)
143 |                 html_dict_list.append(line)
144 |         return html_dict_list
145 | 
146 |     def save_text_jsonl(self, json_list=[], file_path=None, mode="w"):
147 |         '''
148 |         将json_list保存成jsonl格式文件
149 |         '''
150 |         with open(file_path, mode, encoding="utf-8") as f:
151 |             for line in json_list:
152 |                 f.write(json.dumps(line, ensure_ascii=False) + "\n")
153 | 


--------------------------------------------------------------------------------
/web_crawler/utils/WebCrawler.py:
--------------------------------------------------------------------------------
  1 | from .WebHtmlExtractor import WebHtmlExtractor
  2 | import logging
  3 | from .Html2Text import Html2Text
  4 | 
  5 | 
  6 | class WebCrawler():
  7 |     '''爬取url内容，分为requests和selenium两种方式；selenium需提前下载chrome浏览器与chromedriver，并配置路径。
  8 |     安装selenium模拟访问网站，需安装并调试chromedriver，版本与电脑chrome需一致，且正确配置路径。mac电脑路径：打开finder,再按command+shift+G进入/usr/local/bin；windows可配置路径。
  9 |     '''
 10 | 
 11 |     def __init__(self):
 12 |         pass
 13 | 
 14 |     def webcrawler_single(self,
 15 |                           html_dir=None,
 16 |                           text_dir=None,
 17 |                           base_url=None,
 18 |                           reptile_lib="requests",
 19 |                           method="get",
 20 |                           mode="w",
 21 |                           time_sleep=4,
 22 |                           time_out=10,
 23 |                           target_content_tag={},
 24 |                           target_tag_list=[]
 25 |                           ):
 26 |         '''
 27 |         爬取base_url页网址，分别保存html与解析处理的text
 28 |         :param html_dir: 保存html地址，jsonl文件
 29 |         :param text_dir: 将提取的text内容保存的地址，同样是jsonl格式。
 30 |         :param base_url: 目标网址
 31 |         :param reptile_lib: requests和selenium两种方式；requests为简单请求静态网址html内容，js动态数据无法获取；selenium为模拟人行为请求，可获取全部html数据，但请求时间较长，尽量设置5s以上的time_sleep，selenium需提前下载chrome浏览器与chromedriver，并配置路径。。
 32 |         :param method: requests请求有get/post两种，selenium只支持get
 33 |         :param time_sleep: 等待时间s
 34 |         :param time_out: 超时时长s
 35 |         :param target_content_tag: html中正文content所在tag，字典格式限制长度为1，key为选中便签类型name/class/id，vaule为标签取值如div/title/article等
 36 |         :param target_tag_list: 指定提取html对应的tag文本，列表，每个元素都与target_content_tag格式相同
 37 |         :return: None
 38 |         '''
 39 |         assert method=="get","只支持get请求！"
 40 |         # 发送请求获取base_url结果：包含相关页面全部网址
 41 |         whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out)
 42 |         whe.save_url_html(base_url=base_url, reptile_lib=reptile_lib, method=method, html_dir=html_dir, mode=mode)
 43 |         # 读取文件
 44 |         h2t = Html2Text()
 45 |         # 读取并处理，只按照指定tag获取text，不获取全部text内容
 46 |         h2t.html2text(target_content_tag=target_content_tag,
 47 |                       target_tag_list=target_tag_list,
 48 |                       html_dir=html_dir,
 49 |                       text_dir=text_dir,
 50 |                       mode="w",
 51 |                       is_get_all_text=True)
 52 | 
 53 |     def webcrawler_batch(self,
 54 |                           html_dir=None,
 55 |                           text_dir=None,
 56 |                           target_url_list=[],
 57 |                           reptile_lib="requests",
 58 |                           method="get",
 59 |                           mode="w",
 60 |                           time_sleep=4,
 61 |                           time_out=10,
 62 |                           target_content_tag={},
 63 |                           target_tag_list=[]
 64 |                           ):
 65 |         '''
 66 |         爬取base_url页网址，分别保存html与解析处理的text
 67 |         :param html_dir: 保存html地址，jsonl文件
 68 |         :param text_dir: 将提取的text内容保存的地址，同样是jsonl格式。
 69 |         :param base_url: 目标网址
 70 |         :param reptile_lib: requests和selenium两种方式；requests为简单请求静态网址html内容，js动态数据无法获取；selenium为模拟人行为请求，可获取全部html数据，但请求时间较长，尽量设置5s以上的time_sleep，selenium需提前下载chrome浏览器与chromedriver，并配置路径。。
 71 |         :param method: requests请求有get/post两种，selenium只支持get
 72 |         :param time_sleep: 等待时间s
 73 |         :param time_out: 超时时长s
 74 |         :param target_content_tag: html中正文content所在tag，字典格式限制长度为1，key为选中便签类型name/class/id，vaule为标签取值如div/title/article等
 75 |         :param target_tag_list: 指定提取html对应的tag文本，列表，每个元素都与target_content_tag格式相同
 76 |         :return: None
 77 |         '''
 78 |         assert method=="get","只支持get请求！"
 79 |         # 发送请求获取base_url结果：包含相关页面全部网址
 80 |         whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out)
 81 |         # 循环调用
 82 |         try:
 83 |             for k,url in enumerate(target_url_list):
 84 |                 mode_batch = mode if k==0 else "a"
 85 |                 whe.save_url_html(base_url=url, reptile_lib=reptile_lib, method=method, html_dir=html_dir, mode=mode_batch)
 86 |         except:
 87 |             logging.warning("爬取停止！")
 88 |         # html中提取text信息，并对doc做基础处理
 89 |         h2t = Html2Text()
 90 |         h2t.html2text(target_content_tag=target_content_tag,
 91 |                       target_tag_list=target_tag_list,
 92 |                       html_dir=html_dir,
 93 |                       text_dir=text_dir,
 94 |                       mode="w",
 95 |                       is_get_all_text=True)
 96 | 
 97 |     def webcrawler_1_degree(self,
 98 |                             html_dir=None,
 99 |                             text_dir=None,
100 |                             base_url=None,
101 |                             reptile_lib="requests",
102 |                             method="get",
103 |                             mode="w",
104 |                             time_sleep=4,
105 |                             time_out=10,
106 |                             target_content_tag={},
107 |                             target_tag_list=[],
108 |                             target_url_prefix=None
109 |                             ):
110 |         '''
111 |         爬取base_url页面所有<a href=>网址，限制target_url_prefix为前缀，默认target_url_prefix=base_url，分别保存html与解析处理的text。
112 |         :param html_dir: 保存html地址，jsonl文件
113 |         :param text_dir: 将提取的text内容保存的地址，同样是jsonl格式。
114 |         :param base_url: 目标站点
115 |         :param target_url_prefix: 基于base_url网址，1度跳转链接 且 以target_url_prefix开头。默认为target_url_prefix=base_url（请求返回的当前网址url，中文会自动转为编码）。
116 |         :param reptile_lib: requests和selenium两种方式；requests为简单请求静态网址html内容，js动态数据无法获取；selenium为模拟人行为请求，可获取全部html数据，但请求时间较长，尽量设置5s以上的time_sleep。
117 |         :param method: requests请求有get/post两种，selenium只支持get
118 |         :param time_sleep: 等待时间s
119 |         :param time_out: 超时时长s
120 |         :param target_content_tag: html中正文content所在tag，字典格式限制长度为1，key为选中便签类型name/class/id，vaule为标签取值如div/title/article等
121 |         :param target_tag_list: 指定提取html对应的tag文本，列表，每个元素都与target_content_tag格式相同
122 |         :return: None
123 |         '''
124 |         assert method == "get", "只支持get请求！"
125 |         # 发送请求获取base_url结果：包含相关页面全部网址
126 |         whe = WebHtmlExtractor(time_sleep=time_sleep, time_out=time_out)
127 |         try:
128 |             whe.save_1_jump_url_in_base(base_url=base_url, target_url_prefix=target_url_prefix, reptile_lib=reptile_lib,
129 |                                         method=method, html_dir=html_dir, mode=mode)
130 |         except:
131 |             logging.warning("爬取停止！")
132 |         # 读取文件
133 |         h2t = Html2Text()
134 |         # 读取并处理，只按照指定tag获取text，不获取全部text内容
135 |         h2t.html2text(target_content_tag=target_content_tag,
136 |                       target_tag_list=target_tag_list,
137 |                       html_dir=html_dir,
138 |                       text_dir=text_dir,
139 |                       mode="w",
140 |                       is_get_all_text=True)
141 | 


--------------------------------------------------------------------------------