├── .github
    └── workflows
    │   ├── check.yml
    │   ├── milestone_release.yml
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── _quarto.yml
├── acm-sig-proceedings.csl
├── agent_intro.qmd
├── assistants.qmd
├── autogen.qmd
├── case1.qmd
├── code
    ├── functions_desc.py
    ├── langchain
    │   └── utils
    │   │   └── call_function.py
    ├── lc_010
    │   ├── test_conversation_agent.py
    │   ├── test_doc_agent.py
    │   ├── test_struct_agent.py
    │   ├── test_zero_shot_agent.py
    │   └── tools.py
    ├── retrieval_prompt.py
    ├── sk
    │   └── plugins
    │   │   ├── Math.py
    │   │   └── __init__.py
    ├── structed_chat_agent_base.py
    ├── test_chat_coversation_agent.py
    ├── test_chat_coversation_agent_1.py
    ├── test_conversation_agent.py
    ├── test_docstore_agent.py
    ├── test_embedding_query.py
    ├── test_embedding_visualization.py
    ├── test_ernie.py
    ├── test_ernie_fc.py
    ├── test_langchain_rag.py
    ├── test_milvus_embedding.py
    ├── test_qianfanendpoint.py
    ├── test_retrievalQA.py
    ├── test_sk_acrosticpoetry.py
    ├── test_sk_planner.py
    ├── test_wx_qianfan.py
    ├── test_zero_shot_agent copy.py
    └── test_zero_shot_agent.py
├── cover.png
├── diabetologia.csl
├── embedchain_intro.qmd
├── embedding.qmd
├── favicon.png
├── getuploadurl.sh
├── glossary.qmd
├── hallucination.qmd
├── images
    ├── LLMTree.jpeg
    ├── RAG_arch.png
    ├── RATG_overview.jpg
    ├── agent_concept.png
    ├── agent_seq.png
    ├── agent_tokens_demo.jpg
    ├── autogen_2.webp
    ├── autogen_agentchat.png
    ├── bd2023.jpg
    ├── bj_autumn.jpg
    ├── chain.png
    ├── code_freq_lc.jpg
    ├── code_freq_sk.jpg
    ├── copilot_stack_1.png
    ├── copilot_stack_2.png
    ├── diagram-assistant.webp
    ├── diagram-status.png
    ├── ernie_calc.jpg
    ├── fc_ls.png
    ├── function_calling_1.png
    ├── langchain_commit_counts.jpg
    ├── langchain_core_0113_qianfan_error.png
    ├── langchain_io.png
    ├── langchain_io_example.jpeg
    ├── langflow-demo.gif
    ├── langflow-demo.jpg
    ├── lc_milvus_coll_demo.jpg
    ├── llm_chatgpt_wb_hs.jpg
    ├── llm_decision_flow.jpg
    ├── llm_in_action_ways.png
    ├── milvus_cli_2.jpg
    ├── milvus_cli_case.jpg
    ├── obtuse_angle.jpg
    ├── pae_agent_seq.png
    ├── pe_arch.png
    ├── pe_wx_1.jpg
    ├── pe_wx_2.jpg
    ├── progandagents.png
    ├── rag_langchain_overview.jpeg
    ├── react.png
    ├── sk_application_process.jpg
    ├── sk_kernel.png
    ├── token_openai_demo_1.jpg
    ├── token_openai_demo_2.jpg
    ├── token_openai_demo_3.jpg
    ├── token_openai_demo_c_1.jpg
    ├── token_openai_demo_c_2.jpg
    ├── token_openai_demo_c_3.jpg
    ├── treand_lm.jpg
    ├── trend_llm.jpg
    ├── vector_stores.jpeg
    ├── vfe.png
    ├── waizg.jpg
    ├── weather_ernie.jpg
    ├── weather_gpt.jpg
    └── wenxi_tokenizer.jpg
├── index.qmd
├── langchain_agent_chat.qmd
├── langchain_agent_fc.qmd
├── langchain_agent_pae.qmd
├── langchain_agent_react.qmd
├── langchain_function_call.qmd
├── langchain_install.qmd
├── langchain_intro.qmd
├── langchain_openai_assistant.qmd
├── langchain_retrieval.qmd
├── langchain_serialization.qmd
├── langflow_intro.qmd
├── langsmith_intro.qmd
├── llm_intro.qmd
├── milvus_install.qmd
├── preface.qmd
├── prompt_engineer.qmd
├── rag_intro.qmd
├── references.bib
├── references.qmd
├── semantickernel_intro.qmd
├── semantickernel_plugins.qmd
├── semantickernel_prompt.qmd
├── semantickernel_promptflow.qmd
├── sft.qmd
├── theme.scss
└── tokens.qmd


/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: Build Book
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Build
12 |     runs-on: ubuntu-latest
13 |     env:
14 |       MY_SECRET   : ${{secrets.GH_ACCESS_TOKEN}}
15 | 
16 |     steps:
17 |       - name: Setup Quarto
18 |         uses: quarto-dev/quarto-actions/setup@v2
19 |         with:
20 |           version: 1.3.450
21 |       - name: Checkout main source
22 |         uses: actions/checkout@v4
23 |       - name: Install Quarto Extensions
24 |         working-directory: ${{ github.workspace }}
25 |         run: |
26 |           quarto add --no-prompt quarto-ext/include-code-files
27 |           quarto install --no-prompt tinytex
28 |       - name: Render and Publish
29 |         run: | 
30 |           quarto render
31 | 


--------------------------------------------------------------------------------
/.github/workflows/milestone_release.yml:
--------------------------------------------------------------------------------
 1 | # Trigger the workflow on milestone events
 2 | on: 
 3 |   milestone:
 4 |     types: [closed]
 5 | name: Milestone Closure
 6 | jobs:
 7 |   create-release-notes:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - uses: actions/checkout@master
11 |     - name: Create Release Notes
12 |       uses: docker://decathlon/release-notes-generator-action:2.0.1
13 |       env:
14 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
15 |         OUTPUT_FOLDER: temp_release_notes
16 |         USE_MILESTONE_TITLE: "true"
17 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Build & Release Book
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       version:
 6 |         description: 'The release version of the gitbook, such as v1.1.1'
 7 |         required: true
 8 |         default: 'v1.1.0'
 9 | 
10 | env:
11 |   DEBIAN_FRONTEND: noninteractive
12 | 
13 | jobs:
14 |   get-upload-url:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: johnyherangi/create-release-notes@main
18 |         id: create-release-notes
19 |         env:
20 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |       - uses: actions/create-release@v1
22 |         id: create-release
23 |         env:
24 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25 |         with:
26 |           tag_name: ${{ github.event.inputs.version }}
27 |           release_name: Build for ${{ github.event.inputs.version }}
28 |           body: ${{ steps.create-release-notes.outputs.release-notes }}
29 |     outputs:
30 |       upload-url: ${{ steps.create-release.outputs.upload_url }} 
31 |   
32 |   build:
33 |     needs: get-upload-url
34 |     name: Build & Release
35 |     runs-on: ubuntu-latest
36 |     steps:
37 |       - name: Setup Quarto
38 |         uses: quarto-dev/quarto-actions/setup@v2
39 |         with:
40 |           version: 1.3.450
41 |       - name: Checkout main source
42 |         uses: actions/checkout@v4
43 |       - name: Install Quarto Extensions
44 |         working-directory: ${{ github.workspace }}
45 |         run: |
46 |           quarto add --no-prompt quarto-ext/include-code-files
47 |           quarto install --no-prompt tinytex
48 |       - name: Render and Publish
49 |         run: | 
50 |           quarto render
51 |       - name: package the book
52 |         working-directory: ${{ github.workspace }}
53 |         run: |
54 |           mv public LLM_in_Action && tar czvf LLM_in_Action.tar.gz LLM_in_Action/*
55 |           ls .
56 |       - uses: actions/upload-release-asset@v1
57 |         env:
58 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
59 |         with:
60 |           upload_url: ${{ needs.get-upload-url.outputs.upload-url }}
61 |           asset_path: LLM_in_Action.tar.gz
62 |           asset_name: LLM_in_Action.tar.gz
63 |           asset_content_type: application/octet-stream
64 |   trigger:
65 |     needs: build
66 |     name: Trigger the wangwei1237.github.io_src deploy
67 |     runs-on: ubuntu-latest
68 |     steps:
69 |       - name: trigger the wangwei1237.github.io_src deploy
70 |         run: | 
71 |           curl -X POST \
72 |           -H "Accept: application/vnd.github.v3+json" \
73 |           -H "Authorization: token ${{ secrets.TOKEN }}" \
74 |           https://api.github.com/repos/wangwei1237/wangwei1237.github.io_src/dispatches \
75 |           -d '{"event_type":"update", "client_payload":{"from": "${{ github.repository }}", "msg": "${{ github.event.head_commit.message }}"}}'
76 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | .DS_Store
 4 | 
 5 | # prj configuration
 6 | .vscode
 7 | 
 8 | #build temp
 9 | public/*
10 | 
11 | /.quarto/
12 | _extensions/*
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LLM_in_Action
2 | Large Language Model in Action
3 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
  1 | project:
  2 |   type: book
  3 |   output-dir: public  
  4 | 
  5 | lang: zh
  6 | 
  7 | book:
  8 |   title: "Large Language Model in Action"
  9 |   favicon: favicon.png
 10 |   open-graph: true
 11 |   author: "VII QA"
 12 |   date: today
 13 |   date-format: "YYYY-MM-DD"
 14 |   cover-image: cover.png
 15 |   repo-url: https://github.com/wangwei1237/LLM_in_Action
 16 |   repo-actions: [edit, issue]
 17 |   search: true
 18 |   page-footer: Copyright VII-QA. All Rights Reserved.
 19 |   chapters:
 20 |     - index.qmd
 21 |     - preface.qmd
 22 |     - part: "LLM 基本概念"
 23 |       chapters:
 24 |       - llm_intro.qmd
 25 |       - tokens.qmd
 26 |       - embedding.qmd
 27 |       - sft.qmd
 28 |       - prompt_engineer.qmd
 29 |       - hallucination.qmd
 30 |       - rag_intro.qmd
 31 |       - agent_intro.qmd
 32 |       - assistants.qmd
 33 |     - part: "LangChain"
 34 |       chapters:
 35 |       - langchain_intro.qmd
 36 |       - langchain_serialization.qmd
 37 |       - langchain_retrieval.qmd
 38 |       - langchain_function_call.qmd
 39 |       - langchain_agent_react.qmd
 40 |       - langchain_agent_chat.qmd
 41 |       - langchain_openai_assistant.qmd
 42 |       - langchain_agent_fc.qmd
 43 |       - langchain_agent_pae.qmd
 44 |       - langsmith_intro.qmd
 45 |     - part: "Semantic Kernel"
 46 |       chapters:
 47 |       - semantickernel_intro.qmd
 48 |       - semantickernel_prompt.qmd
 49 |       - semantickernel_plugins.qmd
 50 |       - semantickernel_promptflow.qmd
 51 |     - part: "其他框架"
 52 |       chapters:
 53 |       - autogen.qmd
 54 |       - embedchain_intro.qmd
 55 |       - langflow_intro.qmd
 56 |     - part: "Case Study"
 57 |       chapters:
 58 |       - case1.qmd
 59 |     - references.qmd
 60 |   appendices: 
 61 |     - glossary.qmd
 62 |     - langchain_install.qmd
 63 |     - milvus_install.qmd
 64 |   site-url: https://wangwei1237.github.io/LLM_in_Action/
 65 |   navbar:
 66 |     logo: favicon.png
 67 |     search: true
 68 |     left:
 69 |       - text: "博客"
 70 |         icon: "house-fill"
 71 |         href: "https://wangwei1237.github.io/"
 72 |       - text: "书籍"
 73 |         icon: "bookshelf"
 74 |         menu: 
 75 |           - text: "动手学深度学习"
 76 |             href: "https://zh.d2l.ai/"
 77 |           - text: "Learn Prompting"
 78 |             href: "https://learnprompting.org/zh-Hans/docs/intro"
 79 |           - text: "Stabel Diffusion 提示词手册"
 80 |             href: "https://pan.baidu.com/s/1dciwgbhO-lfKyRo8lOqW9Q?pwd=cm9q"
 81 |       - text: "关于"
 82 |         icon: "person-badge-fill"
 83 |         href: "https://wangwei1237.github.io/aboutme/"
 84 | 
 85 | bibliography: references.bib
 86 | csl: acm-sig-proceedings.csl
 87 | 
 88 | format:
 89 |   html:
 90 |     theme: 
 91 |       light: [cosmo, theme.scss]
 92 |     code-copy: true
 93 |     code-overflow: wrap
 94 |     cover-image: cover.png
 95 | 
 96 | comments:
 97 |   utterances:
 98 |     repo: wangwei1237/LLM_in_Action
 99 |     label: comment
100 | 


--------------------------------------------------------------------------------
/acm-sig-proceedings.csl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-US">
  3 |   <info>
  4 |     <title>ACM SIG Proceedings ("et al." for 3+ authors)</title>
  5 |     <id>http://www.zotero.org/styles/acm-sig-proceedings</id>
  6 |     <link href="http://www.zotero.org/styles/acm-sig-proceedings" rel="self"/>
  7 |     <link href="https://www.acm.org/publications/authors/reference-formatting" rel="documentation"/>
  8 |     <author>
  9 |       <name>Naeem Esfahani</name>
 10 |       <email>nesfaha2@gmu.edu</email>
 11 |       <uri>http://mason.gmu.edu/~nesfaha2/</uri>
 12 |     </author>
 13 |     <contributor>
 14 |       <name>Chris Horn</name>
 15 |       <email>chris.horn@securedecisions.com</email>
 16 |     </contributor>
 17 |     <contributor>
 18 |       <name>Patrick O'Brien</name>
 19 |     </contributor>
 20 |     <category citation-format="numeric"/>
 21 |     <category field="science"/>
 22 |     <category field="engineering"/>
 23 |     <updated>2023-07-13T21:22:38+00:00</updated>
 24 |     <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
 25 |   </info>
 26 |   <macro name="author">
 27 |     <choose>
 28 |       <if type="webpage">
 29 |         <text variable="title" suffix=":"/>
 30 |       </if>
 31 |       <else>
 32 |         <names variable="author">
 33 |           <name name-as-sort-order="all" and="text" sort-separator=", " initialize-with="." delimiter-precedes-last="never" delimiter=", "/>
 34 |           <label form="short" prefix=" "/>
 35 |           <substitute>
 36 |             <names variable="editor"/>
 37 |             <names variable="translator"/>
 38 |           </substitute>
 39 |         </names>
 40 |       </else>
 41 |     </choose>
 42 |   </macro>
 43 |   <macro name="editor">
 44 |     <names variable="editor">
 45 |       <name initialize-with="." delimiter=", " and="text"/>
 46 |       <label form="short" prefix=", "/>
 47 |     </names>
 48 |   </macro>
 49 |   <macro name="access">
 50 |     <choose>
 51 |       <if type="article-journal" match="any">
 52 |         <text variable="DOI" prefix=". DOI:https://doi.org/"/>
 53 |       </if>
 54 |     </choose>
 55 |   </macro>
 56 |   <citation collapse="citation-number">
 57 |     <sort>
 58 |       <key variable="citation-number"/>
 59 |     </sort>
 60 |     <layout prefix="[" suffix="]" delimiter=", ">
 61 |       <text variable="citation-number"/>
 62 |     </layout>
 63 |   </citation>
 64 |   <bibliography entry-spacing="0" second-field-align="flush" et-al-min="3" et-al-use-first="1">
 65 |     <sort>
 66 |       <key macro="author"/>
 67 |       <key variable="title"/>
 68 |     </sort>
 69 |     <layout suffix=".">
 70 |       <text variable="citation-number" prefix="[" suffix="]"/>
 71 |       <text macro="author" suffix=" "/>
 72 |       <date variable="issued" suffix=". ">
 73 |         <date-part name="year"/>
 74 |       </date>
 75 |       <choose>
 76 |         <if type="paper-conference">
 77 |           <group delimiter=". ">
 78 |             <text variable="title"/>
 79 |             <group delimiter=" ">
 80 |               <text variable="container-title" font-style="italic"/>
 81 |               <group delimiter=", ">
 82 |                 <group delimiter=", " prefix="(" suffix=")">
 83 |                   <text variable="publisher-place"/>
 84 |                   <date variable="issued">
 85 |                     <date-part name="month" form="short" suffix=" "/>
 86 |                     <date-part name="year"/>
 87 |                   </date>
 88 |                 </group>
 89 |                 <text variable="page"/>
 90 |               </group>
 91 |             </group>
 92 |           </group>
 93 |         </if>
 94 |         <else-if type="article-journal">
 95 |           <group delimiter=". ">
 96 |             <text variable="title"/>
 97 |             <text variable="container-title" font-style="italic"/>
 98 |             <group delimiter=", ">
 99 |               <text variable="volume"/>
100 |               <group delimiter=" ">
101 |                 <text variable="issue"/>
102 |                 <date variable="issued" prefix="(" suffix=")">
103 |                   <date-part name="month" form="short" suffix=" "/>
104 |                   <date-part name="year"/>
105 |                 </date>
106 |               </group>
107 |               <text variable="page"/>
108 |             </group>
109 |           </group>
110 |         </else-if>
111 |         <else-if type="patent">
112 |           <group delimiter=". ">
113 |             <text variable="title"/>
114 |             <text variable="number"/>
115 |             <date variable="issued">
116 |               <date-part name="month" form="short" suffix=" "/>
117 |               <date-part name="day" suffix=", "/>
118 |               <date-part name="year"/>
119 |             </date>
120 |           </group>
121 |         </else-if>
122 |         <else-if type="thesis">
123 |           <group delimiter=". ">
124 |             <text variable="title" font-style="italic"/>
125 |             <text variable="archive_location" prefix="Doctoral Thesis #"/>
126 |             <text variable="publisher"/>
127 |           </group>
128 |         </else-if>
129 |         <else-if type="report">
130 |           <group delimiter=". ">
131 |             <text variable="title" font-style="italic"/>
132 |             <text variable="number" prefix="Technical Report #"/>
133 |             <text variable="publisher"/>
134 |           </group>
135 |         </else-if>
136 |         <else-if type="webpage">
137 |           <group delimiter=". ">
138 |             <text variable="URL" font-style="italic"/>
139 |             <date variable="accessed" prefix="Accessed: ">
140 |               <date-part name="year" suffix="-"/>
141 |               <date-part name="month" form="numeric-leading-zeros" suffix="-"/>
142 |               <date-part name="day" form="numeric-leading-zeros"/>
143 |             </date>
144 |           </group>
145 |         </else-if>
146 |         <else-if type="chapter paper-conference" match="any">
147 |           <group delimiter=". ">
148 |             <text variable="title"/>
149 |             <text variable="container-title" font-style="italic"/>
150 |             <text macro="editor"/>
151 |             <text variable="publisher"/>
152 |             <text variable="page"/>
153 |           </group>
154 |         </else-if>
155 |         <else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
156 |           <group delimiter=". ">
157 |             <text variable="title" font-style="italic"/>
158 |             <text variable="publisher"/>
159 |           </group>
160 |         </else-if>
161 |         <else>
162 |           <group delimiter=". ">
163 |             <text variable="title"/>
164 |             <text variable="container-title" font-style="italic"/>
165 |             <text variable="publisher"/>
166 |           </group>
167 |         </else>
168 |       </choose>
169 |       <text macro="access"/>
170 |     </layout>
171 |   </bibliography>
172 | </style>
173 | 


--------------------------------------------------------------------------------
/agent_intro.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | ---
  5 | 
  6 | # Agent {#sec-agent}
  7 | 
  8 | :::{.callout-tip}
  9 | LLM 也会有 阿克琉斯之踵。
 10 | :::
 11 | 
 12 | ## 阿克琉斯之踵
 13 | 虽然 LLM 非常强大，但在某些方面，与“最简单”的计算机程序的能力相比，LLM 并没有表现的更好，例如在 `计算` 和 `搜索` 这些计算机比较擅长的场景下，LLM 的表现却却很吃力。
 14 | 
 15 | 
 16 | ```{#lst-ernie_calc .python include="./code/test_ernie.py" code-line-numbers="true" lst-cap="文心大模型的计算能力测试"}
 17 | ```
 18 | 
 19 | @lst-ernie_calc 的执行结果如下：
 20 | 
 21 | ```bash
 22 | ['4.1乘以7.9等于31.79。']
 23 | ```
 24 | 
 25 | 但实际上，$4.1 * 7.9 = 32.39$，很明显，文心给出了错误的结果。
 26 | 
 27 | ![一言的计算结果](./images/ernie_calc.jpg){#fig-yiyan_rst}
 28 | 
 29 | 计算机程序（例如 python 的 [mumexpr](https://github.com/pydata/numexpr) 库）可以轻而易举的处理这种简单的计算，甚至处理比这更复杂的计算也不在话下。但是，面对这些计算，LLM 有时候却显得力不从心。
 30 | 
 31 | 在 @sec-RAG 中，我们提到，使用 RAG 可以解决训练数据的时效性问题、LLM 的幻觉问题、专有数据的安全性问题等问题，但是对于 @lst-ernie_calc 所示的问题，我们将如何解决？
 32 | 
 33 | 为了让 LLM 能更好的为我们赋能，我们必须解决这个问题，而接下来要介绍的 **Agent** 就是一种比较好的解决方案。
 34 | 
 35 | 利用 **Agent**，我们不但可以解决如上提到的 `计算` 的问题，我们还可以解决更多的问题。在我看来，**Agent** 可以解锁 LLM 的能力限制，让 LLM 具备无穷的力量，实现我们难以想象的事情。
 36 | 
 37 | ## 什么是 Agent
 38 | 在日常生活中，我们解决问题也不是仅依靠我们自己的能力，我们也会使用计算器进行数学计算，我们也会 `百度一下` 以获取相关信息，君子性非异也善假于物也。同样，Agent 使得 LLM 可以像人一样做同样的事情。
 39 | 
 40 | ![Agent 就是能够使用各种外部工具的 LLM](./images/agent_concept.png){#fig-agent_concept}
 41 | 
 42 | 从本质上讲，Agent 是一种特殊的 LLM，这种特殊的 LLM 的特殊性在于它可以使用各种外部工具来完成我们给定的操作。
 43 | 
 44 | 与我们使用外部工具完成任务一样：
 45 | 
 46 | 1. 我们首先会对任务进行思考
 47 | 2. 然后判断我们有哪些工具可用
 48 | 3. 接下来再选择一种我们可用的工具来实施行动
 49 | 4. 然后我们会观察行动结果以判断如何采取下一步的行动
 50 | 5. 我们会重复 1-4 这个过程，直到我们认为我们完成了给定的任务
 51 | 
 52 | ![Agent 流程示意图](./images/agent_seq.png){#fig-agent_seq}
 53 | 
 54 | 如 @fig-agent_seq 所示，虽然 Agent 本质上是 LLM，但是其包含的 `Thought` 和 `Tools Set` 将 Agent 和 LLM 区别开来，并且这种逐步思考的方式也使得 LLM 可以通过多次推理或多次使用工具来获取更好的结果。
 55 | 
 56 | 根据 B 站 UP 主发布的[视频](https://www.bilibili.com/video/BV1A24y1c7mr/?spm_id_from=888.80997.embed_other.whitelist&t=82)：作为一款优秀的 Agent，[AutoGPT](https://github.com/Significant-Gravitas/AutoGPT) 可以实现自己查询文献、学习文献，并最终完成给定论文题目写作的整个过程，而整个过程中出了最开始需要给 AutoGPT 发布任务外，其他环节则全部由 AutoGPT 自动完成。
 57 | 
 58 | ## ReAct 模式 {#sec-agent_react}
 59 | 此处的 ReAct 既不是软件设计模式中的 `reactor` 模式[^1]，也不是 Meta 公司开发的前端开发框架 `react`[^2]，而是 Yao 等人在 [-@yao2022react_online]，[-@yao2022react] 中提出的：把 `Reasoning` 和 `Action` 与语言模型结合起来的通用范式，以解决各种语言推理和决策任务。
 60 | 
 61 | 
 62 | ReAct 使 LLM 能够以交错的方式生成 `reasoning traces` 和 `text actions`，ReAct 可以从上下文中进行推理并提取有用的信息来进行后续的 `reasoning` 和 `action`，从而影响模型的内部状态。正如 [-@yao2022react_online] 说述，ReAct 将推理阶段和行动阶段进行有效的结合，进一步提升了 LLM 的性能。
 63 | 
 64 | ![ReAct 模型](./images/react.png){#fig-react}
 65 | 
 66 | 实际上，和 @fig-agent_seq 所示的流程是一致的。
 67 | 
 68 | :::{.callout-note}
 69 | ReAct 也称为 Action Agent，在 ReAct 模式系下，Agent 的下一步动作由之前的输出来决定，其本质是对 Prompt 进行优化的结果，一般可以用于规模较小的任务。
 70 | :::
 71 | 
 72 | ## PlanAndExecute 模式 {#sec-agent_pae}
 73 | 如前所述，Action Agent 适用于规模较小的任务。当任务规模较大，而任务的解决又高度依赖 Agent 来驱动并完成时，Action Agent 就开始变得捉襟见肘。
 74 | 
 75 | 我们即希望 Agent 能够处理更加复杂的任务，又希望 Agent 具备较高的稳定性和可靠性。这中既要又要的目标导致 Agent 的提示词变得越来越大，越来越复杂。
 76 | 
 77 | * 为了解决更复杂的任务，我们需要更多的工具和推理步骤，这会导致 Agent 的提示词中包含了过多的历史推理信息
 78 | * 同时，为了提升 Agent 的可靠性，需要不断的优化/增加 Tool 的描述，以便 LLM 可以选择正确的工具
 79 | 
 80 | 在这种背景下，PlanAndExecute 模式应运而生。PlanAndExecute 将 `计划`（`plan`） 与 `执行`（`execute`） 分离开来。
 81 | 
 82 | 在 PlanAndExecute 模式下，`计划` 由一个 LLM 来驱动生成，而 `执行` 则可以由另外的 Agent 来完成:
 83 | 
 84 | * 首先，使用一个 LLM 创建一个用于解决当前请求的、具有明确步骤的计划。
 85 | * 然后，使用传统的 Action Agent 来解决每个步骤。
 86 | 
 87 | ![PlanAndExectue Agent 基本流程](./images/pae_agent_seq.png){#fig-agent_fae_seq}
 88 | 
 89 | 目前，BabyAGI 也采用了类似的模式[^3]，更多关于 PlanAndExecute 模式的底层细节，可以参考 [-@wang2023planandsolve]。
 90 | 
 91 | :::{.callout-note}
 92 | 该模式下，Agent 将大型任务分解为较小的、可管理的子目标，从而可以高效处理复杂任务。
 93 | 
 94 | 这种方式可以通过 `计划` 让 LLM 更加“按部就班”，更加可靠。但是其代价在于，这种方法需要更多的 LLM 交互，也必然具有更高的延迟。[^4]
 95 | :::
 96 | 
 97 | ## Multi-Agent {#sec-multiagent}
 98 | 到现在为止，我们所讲的 Agent 都是 Single-Agent，也就是说我们仅在这个单独的 Agent 中（没有和其他的 Agent 交互），就完成了用户提出的任务。在 [-@multiagentintro] 中提到，Multi-Agent 是分布式 AI 领域的一个分支，强调在不同的 Agent 之间进行协作以完成用户的任务，这个时候的 Multi-Agent 主要存在于强化学习和博弈论(game theory) 的相关研究中。[-@talebirad2023multiagent] 提出了一种新的框架，通过利用 Multi Agent 系统的能力来增强大型语言模型 LLM 的能力，在这个新的框架中，作者引入了一个协作环境，在这个环境中，Multi Agent 组件（每个组件具有独特的属性和角色，可以由不同的 LLM 来驱动）协同工作，从而可以更高效、更有效地处理复杂的任务。
 99 | 
100 | :::{.callout-note title="Multi Agent 的定义"}
101 | 从本质上讲，Multi LLM Agent 是涉及到多个 LLM 驱动的 Agent 协同工作的融合体。与传统的 Single Agent 不同，Multi Agent 系统由各种 AI Agent 组成，每个 Agent 专门研究不同的领域，有助于全面解决问题。这种协作、协同效应产生了更细致和有效的解决方案。
102 | :::
103 | 
104 | 正如 @fig-autogen 所示，Multi Agent 系统可以通过不同 Agent 之间的协作来完成更为复杂的事情。
105 | 
106 | ![AutoGen 的 Multi Agent 架构图[^5]](./images/autogen_2.webp){#fig-autogen}
107 | 
108 | Multi Agent 的优势如下[^6]：
109 | 
110 | * **专业技能更强：**在 Multi Agent 系统中，每个 Agent 都拥有各自领域的专业知识，使其能够提供深入、准确的响应。这种专业知识的广度确保了所生成的解决方案是全面和知情的。
111 | * **问题解决能力更强：**复杂的问题往往需要采用不同层面的、综合的方法，Multi Agent 通过整合各个 Agent 的集体智慧，通过利用不同 Agent 各自的优势，以提供单 LLM 或者 单 Agent 难以解决的问题。正所谓：众人晒柴火焰高。
112 | * **稳定性更高：**冗余和可靠性是人工智能驱动解决方案的关键因素。从架构上讲，Multi Agent 降低了单点故障的风险，如果一个 Agent 遇到问题或限制，其他 Agent 则可以介入，以确保整体系统的稳定性。
113 | * **适应性更好：**在一个充满活力的世界里，适应性至关重要。Multi Agent 可以随着时间的推移而发展，新的 Agent 可以无缝集成以应对新出现的挑战。
114 | 
115 | ## 参考文献
116 | [^1]: [Reactor Pattern](https://en.wikipedia.org/wiki/Reactor_pattern)
117 | [^2]: [react 官网](https://react.dev/)
118 | [^3]: [BabyAGI](https://github.com/yoheinakajima/babyagi)
119 | [^4]: [Plan-and-Execute Agents](https://blog.langchain.dev/plan-and-execute-agents/)
120 | [^5]: [AutoGen](https://www.microsoft.com/en-us/research/project/autogen/)
121 | [^6]: [Revolutionizing AI: The Era of Multi-Agent Large Language Models](https://gafowler.medium.com/revolutionizing-ai-the-era-of-multi-agent-large-language-models-f70d497f3472#:~:text=Multi%2Dagent%20LLM%2C%20in%20essence,contributing%20to%20comprehensive%20problem%2Dsolving.)
122 | 
123 | 


--------------------------------------------------------------------------------
/assistants.qmd:
--------------------------------------------------------------------------------
 1 | # Assistant {#sec-assistant_intro}
 2 | 
 3 | 2023 年，11 月 6 日，OpenAI 召开了第一次开发者大会——[OpenAI DevDay](https://devday.openai.com/)，这次大会的相关内容可以参见：[OpenAI DevDay, Opening Keynote](https://www.bilibili.com/video/BV1au4y1a78B/?spm_id_from=333.337.search-card.all.click&vd_source=fbeb46d16d08ad900fac814e55c3f27f)。
 4 | 
 5 | 在这次大会上，为了简化使用大模型开发订制助理的步骤，OpenAI 正式发布了 [`Assistant API`](https://platform.openai.com/docs/assistants/overview)（视频中的第 1:02:47 处）。
 6 | 
 7 | 我们可以使用 `Assistant API` 在自己的应用程序中构建人工智能助手。助手由如下几部分构成：
 8 | 
 9 | * `name`：用于指定助理的名字。
10 | * `instructions`：用于指定助手的个性并定义其目标，和 `system message` 有些类似。
11 | * `tools`：用于指定助手可以访问的工具。助手可以访问多达128个工具，可以访问 OpenAI 托管的工具，也可以通过 `函数调用` 访问第三方的工具。目前 OpenAI 提供了三个工具：代码解释器、检索、函数调用。
12 | 
13 | :::{.callout-tip}
14 | 更详细的文档可以参考 [Assistant Overview](https://platform.openai.com/docs/assistants/overview)。
15 | :::
16 | 
17 | 助手可以根据说明并使用工具来响应用户的请求。实际上，OpenAI Assistant 其实和 @sec-agent 中介绍的 Agent 的概念非常相似。在 LangChain 的官方文档中，[OpenAI Assistant](https://python.langchain.com/docs/modules/agents/agent_types/openai_assistants) 也是作为一种 Agent 类型存在的，可见从 LangChain 的角度来看，OpenAI Assistant 本质上也是一种 Agent。而 `Assistant API` 的发布，则提高了我们开发 OpenAI Assistant 的效率。
18 | 
19 | 在 [OpenAI 的技术论坛](https://community.openai.com/)上，也有用户表示自己的疑惑：[`Assistant API` 和 LangChain 究竟有什么区别呢？](https://community.openai.com/t/the-difference-of-assistant-api-and-langchain/496223)。正如这个帖子下面的回复，Assistant API 和 LangChain Agent 都是在做同样的事情，只是在某些方面， Assistant API 更友好而已。
20 | 
21 | > `Assistant API` and LangChain are basically doing the same thing. Both require programming. The **only advantage of `Assistant API`** is that memory and context window are automatically managed where in LangChain you have explicitly set those things up.
22 | 
23 | ## Assisant API 的框架
24 | OpenAI Assistant API 的架构图如 @fig-d_assistant 所示。
25 | 
26 | ![OpenAI Assistant API 架构图](./images/diagram-assistant.webp){#fig-d_assistant}
27 | 
28 | 在 [Assistant Overview](https://platform.openai.com/docs/assistants/overview) 中，已经对该图做了非常多的解释，但是在我看来，最令人兴奋的能力是访问持久化线程的能力。
29 | 
30 | :::{.callout-note}
31 | 助理可以访问持久线程。线程通过存储消息历史记录来简化人工智能应用程序的开发，并在对话太长而超出模型的上下文长度时将其截断。
32 | 
33 | 使用 `Assistant API`，我们只需创建一次线程，然后我们可以在该线程内进行连续的多轮对话，而多轮对话需要的 `记忆` 功能，OpenAI 统统帮我们实现了（而如果使用 LangChain 或其他框架，这些都需要我们自己来实现）。我们可以轻松的实现如下的多轮会话：
34 | 
35 | * 1 + 1 =?
36 | * 那么，再加10的结果是？
37 | * ……
38 | :::
39 | 
40 | ## Assistant 运行状态
41 | 和进程类似，`Assistant API` 创建的线程在回答用户的问题时（对应 @fig-d_assistant 中的 Run 阶段），也会存在各种状态的转换。每一次执行的具体的状态转化如 @fig-openai_ass_status 所示。
42 | 
43 | ![Assistant API 线程每次执行的生命周期](./images/diagram-status.png){#fig-openai_ass_status}
44 | 
45 | | 状态 | 状态含义 |
46 | | --------- | --------- |
47 | | `queued` | 首次创建助理并执行或完成 `required_action` 时，将转变为 `queued` 状态。`queued` 状态应该立即转到 `in_progress`。 |
48 | | `in_progress` | 在 `in_progress` 时，助理使用模型和工具执行相关操作。我们可以通过检 [Run Step](https://platform.openai.com/docs/api-reference/runs/step-object) 来获取这次执行的具体进度。 |
49 | | `completed` | 一旦这次执行成功，就会转到该状态，此时，我们可以查看助理返回的所有消息。我们还可以通过该线程继续进行下一轮的对话。 |
50 | | `requires_action` | 当使用 `函数调用` 时，一旦模型确定了要调用的函数的名称和参数，线程将转变为 `required_action` 状态。然后，我们必须运行这些函数并提交函数响应，才能继续运行。如果在 `expires_at` 时间戳达到时（创建后大约10分钟）还没有提交函数的运行结果，则此次执行将进入 `expired` 状态。 |
51 | | `expired` | 当 `函数调用` 的输出未在 `expires_at` 之前提交时，就会发生这种情况。此外，如果此次执行时间过长，超过`expires_at` 规定的时间时，也会转换到该状态。 |
52 | | `cancelling` | 我们可以使用 [Cancel Run API](https://platform.openai.com/docs/api-reference/runs/cancelRun) 取消 `in_progress` 中的某次执行。一旦取消成功，此次执行的状态将变为 `cancelled`。需要注意的是，`Assistant API` 仅仅是尝试取消，但不能保证取消成功。 |
53 | | `cancelled` | 如果某次执行已经成功取消，则转到该状态。|
54 | | `failed` | 执行失败是，转为该状态。可以通过 `last_error` 查看失败原因。 |
55 | 
56 | : Assistant API 中不同的状态及其含义 {#tbl-ass_status}
57 | 
58 | 因为 `Assistant API` 提供的线程是持久线程，因此，每当我们需要使用该线程处理用户需求时，我们最好及时的查询该线程当前的状态，以避免出现非预期的结果。
59 | 
60 | ## Assistant API 示例
61 | 
62 | ```{#lst-ass_code_demo .python code-line-numbers="true" lst-cap="Assistant API 示例"}
63 | from langchain.agents.openai_assistant import OpenAIAssistantRunnable
64 | 
65 | interpreter_assistant = OpenAIAssistantRunnable.create_assistant(
66 |     name="langchain assistant",
67 |     instructions="You are a personal math tutor. Write and run code to answer math questions.",
68 |     tools=[{"type": "code_interpreter"}],
69 |     model="gpt-4-1106-preview",
70 | ) # <1>
71 | 
72 | output = interpreter_assistant.invoke({"content": "What's 10 - 4 raised to the 2.7"}) # <2>
73 | print(output)
74 | 
75 | """
76 | [ThreadMessage(id='msg_6Gj48OdMV8dQrFUPTh17UvG4', assistant_id='asst_19av1lcBjSCQ5cEk4pWqugEU', content=[MessageContentText(text=Text(annotations=[], value='The result of the expression \\(10 - 4^{2.7}\\) is approximately \\(-32.224\\).'), type='text')], created_at=1700038489, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_4wK9GIK2W0iiJlLB86DKoMQQ', thread_id='thread_Ie874bQrsaakLMpOMZe2KUav')]
77 | """ #<3>
78 | 
79 | output_2 = interpreter_assistant.invoke({"content": "Then, Add 10 to the result", "thread_id": "thread_Ie874bQrsaakLMpOMZe2KUav"}) #<4>
80 | print(output_2) 
81 | 
82 | """
83 | [ThreadMessage(id='msg_xRoHzvCdtqW9NRWxmE36VMZG', assistant_id='asst_mEAcerOkTv1IyggYoU3jTNMn', content=[MessageContentText(text=Text(annotations=[], value='After adding 10 to the previous result, the new result is approximately -22.224.'), type='text')], created_at=1700038760, file_ids=[], metadata={}, object='thread.message', role='assistant', run_id='run_ubrOtRXh6ITIRQ4BbPMk5juV', thread_id='thread_Ie874bQrsaakLMpOMZe2KUav')]
84 | """ #<5>
85 | ```
86 | 
87 | 1. 创建 Assistant 线程
88 | 2. 计算 $10-4^{2.7}$
89 | 3. 获得结果 -32.224，同时返回线程 id 等其他信息
90 | 4. 在当前结果基础上，继续用同一个线程执行 $res + 10$
91 | 5. 获得结果 -22.224
92 | 
93 | 更多的 `Assistant API` 的使用例子，我们在 @sec-assistant 中再介绍。
94 | 


--------------------------------------------------------------------------------
/autogen.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | filters:
 3 |    - include-code-files
 4 | code-annotations: below
 5 | ---
 6 | 
 7 | # AutoGen
 8 | 在看了 [OpenAI DevDay](https://www.bilibili.com/video/BV1au4y1a78B/?spm_id_from=333.337.search-card.all.click&vd_source=fbeb46d16d08ad900fac814e55c3f27f) 的相关直播后，我曾经认为，Assistant API（@sec-assistant_intro） 完全可以秒杀 AutoGen 了。
 9 | 
10 | 在 AutoGen 的讨论区中，我发现大家也有同样的思考。在 [New OpenAI Assistants / Assistants API](https://github.com/microsoft/autogen/discussions/575) 的讨论中，有用户说到：
11 | 
12 | > I am trying to understand if the new OpenAI Assistants API is different conceptually or even technically from Autogen? Is it really the same idea and almost same approach, just packaged in a more accessible way? 
13 | 
14 | 后来，我慢慢的明白了，如 @sec-multiagent 所述，AutoGen 是一种 MultiAgent 框架，而 Assistant API 本质上是一种 Sigle Agent。AutoGen 引入了多 Agent 交互、协同的机制，并且还给人类干预提供了机会，使得人机结合成为可能，也更大程度的激发了 LLM 的能量。
15 | 
16 | 所以，Assistant API 的发布不但不会影响 AutoGen 的发展，反而会促进 AutoGen 的发展。
17 | 
18 | ## AutoGen 概述
19 | AutoGen 是由微软、宾夕法尼亚州立大学和华盛顿大学联合推出的 Multi Agent 框架，我们可以使用 AutoGen 管理多个 Agent 来开发 LLM 应用程序。在 AutoGen 框架下，多个不同的 Agent 之间可以通过交互来完成任务。
20 | 
21 | 我们可以在 [github/microsoft/autogen](https://github.com/microsoft/autogen) 上获取最新的 AutoGen 版本，我们也可以在这个项目的 [讨论区](https://github.com/microsoft/autogen/discussions) 来进行相关技术的探讨。
22 | 
23 | AutoGen 框架中的 Agent 是可定制的、可交互的、可人工干预的，AutoGen 框架下的 Agents 既可以是 LLM 模式，也可以是人工操作或者工具集模式。
24 | 
25 | 在我看来，AutoGen 的最大魅力来在于其允许不同 Agent 之间的可交互性以及人工干预的能力，这最大程度的促进了人机结合的可能性，为实现最终的超级 AGI 智能体迈出了一大步。
26 | 
27 | ![AutoGen 示意图](./images/autogen_agentchat.png){#fig-autogen_autogen} 
28 | 
29 | 根据 @fig-autogen_autogen，AutoGen 有以下的几个特点：
30 | 
31 | * **构建效率高：**AutoGen 简化了开发复杂 LLM 工作流所涉及到的编排、自动化、优化等工作，能够以最小的开发成本构建基于 Multi Agents 对话的下一代 LLM 应用程序。
32 | * **可对话性：**AutoGen 支持多个 Agents 之间的交互协同，并且不同的 Agents 都可以根据实际需求进行定制，我们可以使用 AutoGen 构建广泛的应用（例如，不同的 Agent 数量，不同的 Agent 拓扑结构……），以完成不同的任务。
33 | 
34 | 除此之外，AutoGen 还提供了增强的 LLM 推理能力、统一的 API、如错误处理、多配置推理、上下文编程等各种能力，以进一步提升开发 LLM 原生应用程序的效率。
35 | 
36 | ## AutoGen 生成股价趋势图
37 | 
38 | ## AutoGen Vs LangChain
39 | 像 AutoGen 这样的 Multi Agent 给了我们非常大的想象空间，但是这是否意味着像如 LangChain 这样的 Single Agent 框架——直到现在，LangChain 还没有支持 Multi Agent——会过时呢？
40 | 
41 | LangChain 的 CEO & 联合创始人 Harrison Chase 在 Reddit 上开了一个 [AMA 讨论区](https://www.reddit.com/r/LangChain/comments/17ffvxo/im_harrison_chase_ceo_and_cofounder_of_langchain/)，并在这个讨论区和大家一起讨论和 LangChain 有关的话题。
42 | 
43 | 在这个讨论区中，有个用户提了一个 [LangChain 是否会支持 Multi Agent](https://www.reddit.com/r/LangChain/comments/17ffvxo/comment/k69qe2x/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button) 的问题：
44 | 
45 | > As far as I know, the library seems to provide support to single agents and some experimental support to other types of agent runtimes (eg. BabyAGI, AutoGPT). Do you have any plans to include multi-agent support like autogen?
46 | 
47 | 针对这一问题，Harrison Chase [回复道](https://www.reddit.com/r/LangChain/comments/17ffvxo/comment/k69qtoc/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button)：“LangChain 是否支持 Multi Agent 主要是看 Multi Agent 是否有具体的应用场景和案例。”。
48 | 
49 | > Yes we are considering it. The main thing blocking us from investing more is concrete use cases where multi agent frameworks are actually helpful. If you (or anyone) has ideas/suggestions we would love to hear so we can implement them!
50 | 
51 | 从这里可以看出，LangChain 完全是基于实用目的而开发。AutoGen 是个好东西，但是是否有合适的场景必须采用 AutoGen 才能实现呢？目前 AutoGen 给的可以应用的场景，是否用 LangChain 也可以完成呢？
52 | 
53 | 就像编程语言有面向函数编程和面相对象编程一样，LangChain 和 AutoGen 各自都有各自的战场，很难说谁会完全替代谁。
54 | 
55 | ![不同 Agents 之间的关系和编程语言元素之间关系的类比](./images/progandagents.png){#fig-prog_and_agents}
56 | 
57 | 从本质上讲，LangChain 是一个构建 Agent 的框架，它提供了创建和部署 Agent 所需的工具和基础设施；而 AutoGen 是一个可以与多个 Agent 进行对话、交互的 Agent。
58 | 
59 | 我更喜欢 LangChain，除了 Harrison Chase 提到的应用场景的问题之外，对我而言，LangChain 提供了多模型的统一接口，这使得其他模型接入起来非常方便。而 AutoGen 目前仅支持 GPTs 类模型，虽然可以使用 [FastChat](https://github.com/lm-sys/FastChat) [接入其他的模型](https://microsoft.github.io/autogen/blog/2023/07/14/Local-LLMs/)，但是整个的过程还是比较繁琐的。
60 | 
61 | 当问及 LangChain 的方向时，Harrison Chase 说到：[没有人可以确切的知道 LangChain 会走向何方？](https://www.reddit.com/r/LangChain/comments/17ffvxo/comment/k69uo54/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button)
62 | 
63 | >  think part of the fun is no one really knows where LangChain will go (or where the space will go) :)
64 | 
65 | 未来，当 Multi Agent 真的非常重要时，LangChain 或许也会引入 Multi Agent。
66 | 


--------------------------------------------------------------------------------
/case1.qmd:
--------------------------------------------------------------------------------
1 | # Case1


--------------------------------------------------------------------------------
/code/functions_desc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @discribe: functions description.
 3 | @author: wangwei1237@gmail.com
 4 | """
 5 | 
 6 | functions = [
 7 |     {
 8 |         "name": "get_current_news",
 9 |         "description": "Get the current news based on the location.",
10 |         "parameters": {
11 |             "type": "object",
12 |             "properties": {
13 |                 "location": {
14 |                     "type": "string",
15 |                     "description": "The city and state, e.g. San Francisco, CA",
16 |                 },
17 |             },
18 |             "required": ["location"],
19 |         },
20 |         "responses": {
21 |             "type": "object",
22 |             "properties": {
23 |                 "news": {
24 |                     "type": "string",
25 |                     "description": "The current news based on the location.",
26 |                 }
27 |             }
28 |         }
29 |     },
30 |     {
31 |         "name": "get_current_weather",
32 |         "description": "Get the current weather in a given location",
33 |         "parameters": {
34 |             "type": "object",
35 |             "properties": {
36 |                 "location": {
37 |                     "type": "string",
38 |                     "description": "The city and state, e.g. San Francisco, CA",
39 |                 },
40 |                 "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
41 |             },
42 |             "required": ["location"],
43 |         },
44 |         "responses": {
45 |             "type": "object",
46 |             "properties": {
47 |                 "temperature": {
48 |                     "type": "string",
49 |                     "description": "The temperature in the given location.",
50 |                 }
51 |             }
52 |         }
53 |     },
54 | ]


--------------------------------------------------------------------------------
/code/langchain/utils/call_function.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python3
 2 | """
 3 | @discribe: The function running for Ernie-Bot-4 Function Calling.
 4 | @author: wangwei1237@gmail.com
 5 | """
 6 | 
 7 | from typing import (
 8 |     Any,
 9 |     Callable,
10 |     Dict,
11 |     Sequence,
12 |     Type,
13 |     Union,
14 | )
15 | 
16 | from langchain.chains.ernie_functions import (
17 |     convert_to_ernie_function,
18 | )
19 | from langchain.pydantic_v1 import BaseModel
20 | 
21 | from langsmith.run_helpers import traceable
22 | 
23 | @traceable(run_type="tool")  #<1>
24 | def call_function(functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
25 |                   fc_by_llm: dict) -> str:
26 |     """Calling the function and return the result."""
27 |     if not fc_by_llm or "name" not in fc_by_llm or "arguments" not in fc_by_llm:
28 |         return ""
29 |     func_list = [f for f in functions if f.__name__ == fc_by_llm["name"]]
30 |     if len(func_list) != 1:
31 |         return ""
32 |     func = func_list[0]
33 |     func_args_keys = convert_to_ernie_function(func)["parameters"]["properties"].keys()
34 |     fc_args_by_llm = fc_by_llm["arguments"]
35 |     func_args = {
36 |         key: fc_args_by_llm[key] for key in func_args_keys if key in fc_args_by_llm
37 |     }
38 |     res = func(**func_args)
39 |     return res
40 | 


--------------------------------------------------------------------------------
/code/lc_010/test_conversation_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for conversation agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain_community.chat_models import QianfanChatEndpoint
 9 | from langchain_core.prompts.chat import ChatPromptTemplate
10 | from langchain_core.prompts import PromptTemplate
11 | from langchain.chains import LLMChain
12 | from langchain.chains import LLMMathChain
13 | from langchain.agents import Tool
14 | from langchain.agents import AgentExecutor, create_react_agent
15 | 
16 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
17 | 
18 | llm_math = LLMMathChain.from_llm(llm)
19 | 
20 | template = ChatPromptTemplate.from_messages([
21 |     ("user", "你是一个能力非凡的人工智能机器人。"),
22 |     ("assistant", "你好~"),
23 |     ("user", "{user_input}"),
24 | ])
25 | llm_chain = LLMChain(llm=llm, prompt=template)
26 | 
27 | # initialize the math tool
28 | math_tool = Tool(
29 |     name='Calculator',
30 |     func=llm_math.run,
31 |     description='Useful for when you need to answer questions about math.'
32 | )
33 | 
34 | # initialize the general LLM tool
35 | llm_tool = Tool(
36 |     name='Language Model',
37 |     func=llm_chain.run,
38 |     description='Use this tool for general purpose queries.'
39 | )
40 | 
41 | # when giving tools to LLM, we must pass as list of tools
42 | tools = [math_tool, llm_tool]
43 | 
44 | # get the prompt template string from: 
45 | # https://smith.langchain.com/hub/hwchase17/react-chat?organizationId=c4887cc4-1275-5361-82f2-b22aee75bad1
46 | prompt_template = """..."""   #<1>
47 | 
48 | prompt = PromptTemplate.from_template(prompt_template)
49 | 
50 | conversation_agent = create_react_agent(
51 |     llm=llm,
52 |     tools=tools,
53 |     prompt=prompt,
54 | )
55 | 
56 | history = ["Human: 今年是哪一年？，AI: 今年是 1768。"] #<2>
57 | querys = [
58 |     "这一年，中国有什么重大事件发生？",
59 |     "同年，其他国家有什么重大事件发生？",
60 | ]
61 | 
62 | agent_executor = AgentExecutor(agent=conversation_agent, tools=tools, verbose=True)
63 | 
64 | for query in querys:
65 |     try:
66 |         res = agent_executor.invoke({"input": query, "chat_history": "\n" . join(history)}) #<3>
67 |     except Exception as e:
68 |         res = {}
69 | 
70 |     history.append("Human: " + query + "\nAI: " + res.get("output", ""))
71 | 
72 |     print(res)


--------------------------------------------------------------------------------
/code/lc_010/test_doc_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for docstore agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain_community.chat_models import QianfanChatEndpoint
 9 | from langchain_core.prompts import PromptTemplate
10 | from langchain.agents import Tool
11 | from langchain_community.tools.wikipedia.tool import WikipediaQueryRun
12 | from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
13 | from langchain.agents import AgentExecutor, create_react_agent
14 | 
15 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
16 | wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
17 | 
18 | # initialize the docstore search tool
19 | search_tool = Tool(
20 |     name="Search Engine Tool",
21 |     func=wikipedia.run,
22 |     description='search wikipedia'
23 | )
24 | 
25 | # when giving tools to LLM, we must pass as list of tools
26 | tools = [search_tool] #<1>
27 | 
28 | # get the prompt template string from: 
29 | # https://smith.langchain.com/hub/hwchase17/react?organizationId=c4887cc4-1275-5361-82f2-b22aee75bad1
30 | prompt_template = """..."""   #<2>
31 | 
32 | prompt = PromptTemplate.from_template(prompt_template)
33 | 
34 | docstore_agent = create_react_agent(
35 |     llm=llm,
36 |     tools=tools,
37 |     prompt=prompt,
38 | )
39 | 
40 | agent_executor = AgentExecutor(agent=docstore_agent, tools=tools, verbose=True)
41 | res = agent_executor.invoke({"input": "What were Archimedes' last words?"})
42 | print(res)


--------------------------------------------------------------------------------
/code/lc_010/test_struct_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for struct agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.agents import AgentExecutor, create_structured_chat_agent
 9 | from langchain_community.chat_models import QianfanChatEndpoint
10 | from langchain_core.prompts.chat import (
11 |     AIMessage,
12 |     ChatPromptTemplate,
13 |     HumanMessage,
14 |     HumanMessagePromptTemplate,
15 |     MessagesPlaceholder,
16 |     SystemMessagePromptTemplate,
17 | )
18 | 
19 | from tools import PythagorasTool
20 | from tools import CircumferenceTool
21 | 
22 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
23 | 
24 | # when giving tools to LLM, we must pass as list of tools
25 | tools = [CircumferenceTool(), PythagorasTool()]
26 | 
27 | # the prompt template can get from: 
28 | # https://smith.langchain.com/hub/hwchase17/structured-chat-agent?organizationId=c4887cc4-1275-5361-82f2-b22aee75bad1
29 | system_message_template = """..."""
30 | human_message_template = """..."""
31 | 
32 | messages = [
33 |     SystemMessagePromptTemplate.from_template(system_message_template),
34 |     MessagesPlaceholder(variable_name="chat_history"),
35 |     HumanMessagePromptTemplate.from_template(human_message_template),
36 | ]
37 | 
38 | input_variables = ["tools", "tool_names", "input", "chat_history", "agent_scratchpad"]
39 | prompt = ChatPromptTemplate(input_variables=input_variables, messages=messages)
40 | 
41 | struct_agent = create_structured_chat_agent(
42 |     llm=llm,
43 |     tools=tools,
44 |     prompt=prompt,
45 | )
46 | 
47 | 
48 | agent_executor = AgentExecutor(agent=struct_agent, tools=tools, verbose=True)
49 | 
50 | history = []
51 | querys = [
52 |     """If I have a triangle with the opposite side of length 51 and the adjacent side of 40,
53 |     what is the length of the hypotenuse?""",
54 | ]
55 | 
56 | for query in querys:
57 |     try:
58 |         res = agent_executor.invoke({"input": query, "chat_history": history})
59 |     except Exception as e:
60 |         res = {}
61 | 
62 |     history.append(HumanMessage(content=query))
63 |     history.append(AIMessage(content=res.get("output", "")))
64 |     
65 |     print(res)


--------------------------------------------------------------------------------
/code/lc_010/test_zero_shot_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for react agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain_community.chat_models import QianfanChatEndpoint
 9 | from langchain_core.prompts.chat import ChatPromptTemplate
10 | from langchain_core.prompts import PromptTemplate
11 | from langchain.chains import LLMChain
12 | from langchain.chains import LLMMathChain
13 | from langchain.agents import Tool
14 | from langchain.agents import AgentExecutor, create_react_agent    #<1>
15 | 
16 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
17 | 
18 | llm_math = LLMMathChain.from_llm(llm)
19 | 
20 | template = ChatPromptTemplate.from_messages([
21 |     ("user", "你是一个能力非凡的人工智能机器人。"),
22 |     ("assistant", "你好~"),
23 |     ("user", "{user_input}"),
24 | ])
25 | llm_chain = LLMChain(llm=llm, prompt=template)
26 | 
27 | # initialize the math tool
28 | math_tool = Tool(
29 |     name='Calculator',
30 |     func=llm_math.run,
31 |     description='Useful for when you need to answer questions about math.'
32 | )
33 | 
34 | # initialize the general LLM tool
35 | llm_tool = Tool(
36 |     name='Language Model',
37 |     func=llm_chain.run,
38 |     description='Use this tool for general purpose queries.'
39 | )
40 | 
41 | # when giving tools to LLM, we must pass as list of tools
42 | tools = [math_tool, llm_tool]
43 | 
44 | # get the prompt template string from: 
45 | # https://smith.langchain.com/hub/hwchase17/react?organizationId=c4887cc4-1275-5361-82f2-b22aee75bad1
46 | prompt_template = """..."""   #<2>
47 | prompt = PromptTemplate.from_template(prompt_template)
48 | 
49 | zero_shot_agent = create_react_agent(
50 |     llm=llm,
51 |     tools=tools,
52 |     prompt=prompt,
53 | ) #<3>
54 | 
55 | agent_executor = AgentExecutor(agent=zero_shot_agent, tools=tools, verbose=True) #<4>
56 | try:
57 |     res = agent_executor.invoke({"input": "what's 4.1*7.9=?"}) #<5>
58 | except Exception as e:
59 |     res = {}
60 | 
61 | print(res)


--------------------------------------------------------------------------------
/code/lc_010/tools.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python3
 2 | """
 3 | @discribe: example for tools.
 4 | @author: wangwei1237@gmail.com
 5 | """
 6 | from langchain.tools import BaseTool
 7 | from math import pi
 8 | from typing import Union
 9 | from typing import Optional
10 | from math import sqrt, cos, sin
11 |   
12 | 
13 | class CircumferenceTool(BaseTool):
14 |     """CircumferenceTool"""
15 | 
16 |     name = "Circumference calculator"
17 |     description = "use this tool when you need to calculate a circumference using the radius of a circle"
18 | 
19 |     def _run(self, radius: Union[int, float]):
20 |         """run"""
21 |         return float(radius) * 2.0 * pi
22 | 
23 |     def _arun(self, radius: int):
24 |         """arun"""
25 |         raise NotImplementedError("This tool does not support async")
26 |     
27 | 
28 | desc = (
29 |     "use this tool when you need to calculate the length of a hypotenuse"
30 |     "given one or two sides of a triangle and/or an angle (in degrees). "
31 |     "To use the tool, you must provide at least two of the following parameters "
32 |     "['adjacent_side', 'opposite_side', 'angle']."
33 | )
34 | 
35 | class PythagorasTool(BaseTool):
36 |     """PythagorasTool"""
37 |     name = "Hypotenuse calculator"
38 |     description = desc
39 |     
40 |     def _run(
41 |         self,
42 |         adjacent_side: Optional[Union[int, float]] = None,
43 |         opposite_side: Optional[Union[int, float]] = None,
44 |         angle: Optional[Union[int, float]] = None
45 |     ):
46 |         """run"""
47 |         # check for the values we have been given
48 |         if adjacent_side and opposite_side:
49 |             return sqrt(float(adjacent_side)**2 + float(opposite_side)**2)
50 |         elif adjacent_side and angle:
51 |             return float(adjacent_side) / cos(float(angle))
52 |         elif opposite_side and angle:
53 |             return float(opposite_side) / sin(float(angle))
54 |         else:
55 |             return "Could not calculate the hypotenuse of the triangle. "
56 |     
57 |     def _arun(self, query: str):
58 |         """arun"""
59 |         raise NotImplementedError("This tool does not support async")
60 | 
61 | if "__main__" == __name__:
62 |     c =  CircumferenceTool()
63 |     res = c.run({'radius': 1})
64 |     print(res)
65 | 
66 |     c1 = PythagorasTool()
67 |     res = c1.run({'adjacent_side': 3, 'opposite_side': 4})
68 |     print(res)


--------------------------------------------------------------------------------
/code/retrieval_prompt.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | 
 3 | """
 4 | @discribe: prompt for test_retrievalQA.py.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chains.prompt_selector import ConditionalPromptSelector, is_chat_model
 9 | from langchain.prompts import PromptTemplate
10 | from langchain.prompts.chat import (
11 |     ChatPromptTemplate,
12 |     HumanMessagePromptTemplate,
13 |     AIMessagePromptTemplate,
14 | )
15 | 
16 | prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
17 | 
18 | {context}
19 | 
20 | Question: {question}
21 | Helpful Answer:"""
22 | PROMPT = PromptTemplate(
23 |     template=prompt_template, input_variables=["context", "question"]
24 | )
25 | 
26 | system_template = """Use the following pieces of context to answer the users question. 
27 | If you don't know the answer, just say that you don't know, don't try to make up an answer.
28 | ----------------
29 | {context}"""
30 | messages = [
31 |     HumanMessagePromptTemplate.from_template(system_template),  # <1>
32 |     AIMessagePromptTemplate.from_template("OK!"),  # <2>
33 |     HumanMessagePromptTemplate.from_template("{question}"),
34 | ]
35 | CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
36 | 
37 | 
38 | PROMPT_SELECTOR = ConditionalPromptSelector(
39 |     default_prompt=PROMPT, conditionals=[(is_chat_model, CHAT_PROMPT)]
40 | )
41 | 


--------------------------------------------------------------------------------
/code/sk/plugins/Math.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python3
 2 | """
 3 | @discribe: Math Plugin.
 4 | @author: wangwei1237@gmail.com
 5 | """
 6 | 
 7 | import math
 8 | from semantic_kernel.skill_definition import (
 9 |     sk_function,
10 |     sk_function_context_parameter,
11 | )
12 | from semantic_kernel.orchestration.sk_context import SKContext
13 | 
14 | 
15 | class MathPlugin:
16 |     """Math Plugin"""
17 |     @sk_function(
18 |         description="Takes the square root of a number",
19 |         name="Sqrt",
20 |         input_description="The value to take the square root of",
21 |     )
22 |     def square_root(self, number: str) -> str:
23 |         """SQRT"""
24 |         return str(math.sqrt(float(number)))
25 | 
26 |     @sk_function(
27 |         description="Adds two numbers together",
28 |         name="Add",
29 |     )
30 |     @sk_function_context_parameter(
31 |         name="input",
32 |         description="The first number to add",
33 |     )
34 |     @sk_function_context_parameter(
35 |         name="number2",
36 |         description="The second number to add",
37 |     )
38 |     def add(self, context: SKContext) -> str:
39 |         """ADD"""
40 |         return str(float(context["input"]) + float(context["number2"]))
41 |     
42 |     @sk_function(
43 |         description="Subtract two numbers",
44 |         name="Subtract",
45 |     )
46 |     @sk_function_context_parameter(
47 |         name="input",
48 |         description="The first number to subtract from",
49 |     )
50 |     @sk_function_context_parameter(
51 |         name="number2",
52 |         description="The second number to subtract away",
53 |     )
54 |     def subtract(self, context: SKContext) -> str:
55 |         """SUBTRACT"""
56 |         return str(float(context["input"]) - float(context["number2"]))
57 | 
58 |     @sk_function(
59 |         description="Multiply two numbers. When increasing by a percentage, don't forget to add 1 to the percentage.",
60 |         name="Multiply",
61 |     )
62 |     @sk_function_context_parameter(
63 |         name="input",
64 |         description="The first number to multiply",
65 |     )
66 |     @sk_function_context_parameter(
67 |         name="number2",
68 |         description="The second number to multiply",
69 |     )
70 |     def multiply(self, context: SKContext) -> str:
71 |         """MULTIPLY"""
72 |         return str(float(context["input"]) * float(context["number2"]))
73 | 
74 |     @sk_function(
75 |         description="Divide two numbers",
76 |         name="Divide",
77 |     )
78 |     @sk_function_context_parameter(
79 |         name="input",
80 |         description="The first number to divide from",
81 |     )
82 |     @sk_function_context_parameter(
83 |         name="number2",
84 |         description="The second number to divide by",
85 |     )
86 |     def divide(self, context: SKContext) -> str:
87 |         """DIVIDE"""
88 |         return str(float(context["input"]) / float(context["number2"]))


--------------------------------------------------------------------------------
/code/sk/plugins/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/code/sk/plugins/__init__.py


--------------------------------------------------------------------------------
/code/structed_chat_agent_base.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python3
  2 | 
  3 | """
  4 | @discribe: update StructuredChatAgent for the ErnieChatBot.
  5 | @author: wangwei1237@gmail.com
  6 | """
  7 | 
  8 | import re
  9 | from typing import Any, List, Optional, Sequence, Tuple
 10 | 
 11 | from langchain.agents.agent import Agent, AgentOutputParser
 12 | from langchain.agents.structured_chat.output_parser import (
 13 |     StructuredChatOutputParserWithRetries,
 14 | )
 15 | from langchain.agents.structured_chat.prompt import FORMAT_INSTRUCTIONS, PREFIX, SUFFIX
 16 | from langchain.callbacks.base import BaseCallbackManager
 17 | from langchain.chains.llm import LLMChain
 18 | from langchain.prompts.chat import (
 19 |     ChatPromptTemplate,
 20 |     HumanMessagePromptTemplate,
 21 |     SystemMessagePromptTemplate,
 22 |     AIMessagePromptTemplate,
 23 | )
 24 | from langchain.pydantic_v1 import Field
 25 | from langchain.schema import AgentAction, BasePromptTemplate
 26 | from langchain.schema.language_model import BaseLanguageModel
 27 | from langchain.tools import BaseTool
 28 | 
 29 | HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}"
 30 | 
 31 | 
 32 | class StructuredChatAgent(Agent):
 33 |     """Structured Chat Agent."""
 34 | 
 35 |     output_parser: AgentOutputParser = Field(
 36 |         default_factory=StructuredChatOutputParserWithRetries
 37 |     )
 38 |     """Output parser for the agent."""
 39 | 
 40 |     @property
 41 |     def observation_prefix(self) -> str:
 42 |         """Prefix to append the observation with."""
 43 |         return "Observation: "
 44 | 
 45 |     @property
 46 |     def llm_prefix(self) -> str:
 47 |         """Prefix to append the llm call with."""
 48 |         return "Thought:"
 49 | 
 50 |     def _construct_scratchpad(
 51 |         self, intermediate_steps: List[Tuple[AgentAction, str]]
 52 |     ) -> str:
 53 |         agent_scratchpad = super()._construct_scratchpad(intermediate_steps)
 54 |         if not isinstance(agent_scratchpad, str):
 55 |             raise ValueError("agent_scratchpad should be of type string.")
 56 |         if agent_scratchpad:
 57 |             return (
 58 |                 f"This was your previous work "
 59 |                 f"(but I haven't seen any of it! I only see what "
 60 |                 f"you return as final answer):\n{agent_scratchpad}"
 61 |             )
 62 |         else:
 63 |             return agent_scratchpad
 64 | 
 65 |     @classmethod
 66 |     def _validate_tools(cls, tools: Sequence[BaseTool]) -> None:
 67 |         pass
 68 | 
 69 |     @classmethod
 70 |     def _get_default_output_parser(
 71 |         cls, llm: Optional[BaseLanguageModel] = None, **kwargs: Any
 72 |     ) -> AgentOutputParser:
 73 |         return StructuredChatOutputParserWithRetries.from_llm(llm=llm)
 74 | 
 75 |     @property
 76 |     def _stop(self) -> List[str]:
 77 |         return ["Observation:"]
 78 | 
 79 |     @classmethod
 80 |     def create_prompt(
 81 |         cls,
 82 |         tools: Sequence[BaseTool],
 83 |         prefix: str = PREFIX,
 84 |         suffix: str = SUFFIX,
 85 |         human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
 86 |         format_instructions: str = FORMAT_INSTRUCTIONS,
 87 |         input_variables: Optional[List[str]] = None,
 88 |         memory_prompts: Optional[List[BasePromptTemplate]] = None,
 89 |     ) -> BasePromptTemplate:
 90 |         """Create prompt template."""
 91 |         tool_strings = []
 92 |         for tool in tools:
 93 |             args_schema = re.sub("}", "}}}}", re.sub("{", "{{{{", str(tool.args)))
 94 |             tool_strings.append(f"{tool.name}: {tool.description}, args: {args_schema}")
 95 |         formatted_tools = "\n".join(tool_strings)
 96 |         tool_names = ", ".join([tool.name for tool in tools])
 97 |         format_instructions = format_instructions.format(tool_names=tool_names)
 98 |         template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
 99 |         if input_variables is None:
100 |             input_variables = ["input", "agent_scratchpad"]
101 |         _memory_prompts = memory_prompts or []
102 |         messages = [
103 |             SystemMessagePromptTemplate.from_template(template),
104 |             *_memory_prompts,
105 |             HumanMessagePromptTemplate.from_template(human_message_template),
106 |         ]
107 |         return ChatPromptTemplate(input_variables=input_variables, messages=messages)
108 | 
109 |     @classmethod
110 |     def create_prompt_for_ernie(
111 |         cls,
112 |         tools: Sequence[BaseTool],
113 |         prefix: str = PREFIX,
114 |         suffix: str = SUFFIX,
115 |         human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
116 |         format_instructions: str = FORMAT_INSTRUCTIONS,
117 |         input_variables: Optional[List[str]] = None,
118 |         memory_prompts: Optional[List[BasePromptTemplate]] = None,
119 |     ) -> BasePromptTemplate:
120 |         """Create prompt template."""
121 |         tool_strings = []
122 |         for tool in tools:
123 |             args_schema = re.sub("}", "}}}}", re.sub("{", "{{{{", str(tool.args)))
124 |             tool_strings.append(f"{tool.name}: {tool.description}, args: {args_schema}")
125 |         formatted_tools = "\n".join(tool_strings)
126 |         tool_names = ", ".join([tool.name for tool in tools])
127 |         format_instructions = format_instructions.format(tool_names=tool_names)
128 |         template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
129 |         if input_variables is None:
130 |             input_variables = ["input", "agent_scratchpad"]
131 |         _memory_prompts = memory_prompts or []
132 |         messages = [
133 |             HumanMessagePromptTemplate.from_template(template),
134 |             AIMessagePromptTemplate.from_template("YES, I Know."),
135 |             *_memory_prompts,
136 |             HumanMessagePromptTemplate.from_template(human_message_template),
137 |         ]
138 |         return ChatPromptTemplate(input_variables=input_variables, messages=messages)
139 | 
140 |     @classmethod
141 |     def from_llm_and_tools(
142 |         cls,
143 |         llm: BaseLanguageModel,
144 |         tools: Sequence[BaseTool],
145 |         callback_manager: Optional[BaseCallbackManager] = None,
146 |         output_parser: Optional[AgentOutputParser] = None,
147 |         prefix: str = PREFIX,
148 |         suffix: str = SUFFIX,
149 |         human_message_template: str = HUMAN_MESSAGE_TEMPLATE,
150 |         format_instructions: str = FORMAT_INSTRUCTIONS,
151 |         input_variables: Optional[List[str]] = None,
152 |         memory_prompts: Optional[List[BasePromptTemplate]] = None,
153 |         **kwargs: Any,
154 |     ) -> Agent:
155 |         """Construct an agent from an LLM and tools."""
156 |         cls._validate_tools(tools)
157 |         if "ERNIE" in llm.model_name:
158 |             prompt = cls.create_prompt_for_ernie(
159 |                 tools,
160 |                 prefix=prefix,
161 |                 suffix=suffix,
162 |                 human_message_template=human_message_template,
163 |                 format_instructions=format_instructions,
164 |                 input_variables=input_variables,
165 |                 memory_prompts=memory_prompts,
166 |             )
167 |         else:
168 |             prompt = cls.create_prompt(
169 |                 tools,
170 |                 prefix=prefix,
171 |                 suffix=suffix,
172 |                 human_message_template=human_message_template,
173 |                 format_instructions=format_instructions,
174 |                 input_variables=input_variables,
175 |                 memory_prompts=memory_prompts,
176 |             )
177 |         llm_chain = LLMChain(
178 |             llm=llm,
179 |             prompt=prompt,
180 |             callback_manager=callback_manager,
181 |         )
182 |         tool_names = [tool.name for tool in tools]
183 |         _output_parser = output_parser or cls._get_default_output_parser(llm=llm)
184 |         return cls(
185 |             llm_chain=llm_chain,
186 |             allowed_tools=tool_names,
187 |             output_parser=_output_parser,
188 |             **kwargs,
189 |         )
190 | 
191 |     @property
192 |     def _agent_type(self) -> str:
193 |         raise ValueError
194 | 


--------------------------------------------------------------------------------
/code/test_chat_coversation_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for chat conversation agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.chains import LLMMathChain
10 | from langchain.agents import Tool
11 | from langchain.memory import ConversationBufferMemory
12 | from langchain.agents import initialize_agent
13 | 
14 | memory = ConversationBufferMemory(memory_key="chat_history")
15 | 
16 | llm = ErnieBotChat(model_name="ERNIE-Bot-4")
17 | 
18 | # initialize the math tool
19 | llm_math = LLMMathChain(llm=llm)
20 | math_tool = Tool(
21 |     name='Calculator',
22 |     func=llm_math.run,
23 |     description='Useful for when you need to answer questions about math.'
24 | )
25 | 
26 | # when giving tools to LLM, we must pass as list of tools
27 | tools = [math_tool]
28 | 
29 | chat_conversation_agent = initialize_agent(
30 |     agent="chat-conversational-react-description",
31 |     tools=tools,
32 |     llm=llm,
33 |     verbose=True,
34 |     max_iterations=3,
35 |     memory=memory
36 | )
37 | 
38 | chat_conversation_agent("4.1*7.9=?")
39 | chat_conversation_agent("2 * 2")


--------------------------------------------------------------------------------
/code/test_chat_coversation_agent_1.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for chat conversation agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.chains import LLMMathChain
10 | from langchain.agents import Tool
11 | from langchain.memory import ConversationBufferMemory
12 | from langchain.agents import initialize_agent
13 | 
14 | memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) #<1>
15 | 
16 | llm = ErnieBotChat(model_name="ERNIE-Bot-4")
17 | 
18 | # initialize the math tool
19 | llm_math = LLMMathChain(llm=llm)
20 | math_tool = Tool(
21 |     name='Calculator',
22 |     func=llm_math.run,
23 |     description='Useful for when you need to answer questions about math.'
24 | )
25 | 
26 | # when giving tools to LLM, we must pass as list of tools
27 | tools = [math_tool]
28 | 
29 | chat_conversation_agent = initialize_agent(
30 |     agent="chat-conversational-react-description",
31 |     tools=tools,
32 |     llm=llm,
33 |     verbose=True,
34 |     max_iterations=3,
35 |     memory=memory
36 | )
37 | 
38 | chat_conversation_agent("4.1*7.9=?")
39 | chat_conversation_agent("2 * 2")
40 | 


--------------------------------------------------------------------------------
/code/test_conversation_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for conversation agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.prompts import ChatPromptTemplate
10 | from langchain.chains import LLMChain
11 | from langchain.chains import LLMMathChain
12 | from langchain.agents import Tool
13 | from langchain.memory import ConversationBufferMemory        # <1>
14 | from langchain.agents import initialize_agent
15 | 
16 | memory = ConversationBufferMemory(memory_key="chat_history") # <2>
17 | 
18 | llm = ErnieBotChat()
19 | llm_math = LLMMathChain(llm=llm)
20 | 
21 | template = ChatPromptTemplate.from_messages([
22 |     ("user", "你是一个能力非凡的人工智能机器人。"),
23 |     ("assistant", "你好~"),
24 |     ("user", "{user_input}"),
25 | ])
26 | llm_chain = LLMChain(llm=llm, prompt=template)
27 | 
28 | # initialize the math tool
29 | math_tool = Tool(
30 |     name='Calculator',
31 |     func=llm_math.run,
32 |     description='Useful for when you need to answer questions about math.'
33 | )
34 | 
35 | # initialize the general LLM tool
36 | llm_tool = Tool(
37 |     name='Language Model',
38 |     func=llm_chain.run,
39 |     description='Use this tool for general purpose queries.'
40 | )
41 | 
42 | # when giving tools to LLM, we must pass as list of tools
43 | tools = [math_tool, llm_tool]
44 | 
45 | conversation_agent = initialize_agent(
46 |     agent="conversational-react-description",     # <3>
47 |     tools=tools,
48 |     llm=llm,
49 |     verbose=True,
50 |     max_iterations=3,
51 |     memory=memory                                 # <4>
52 | )
53 | 
54 | res = conversation_agent("1768年，中国有什么重大事件发生？")
55 | print(res)
56 | 
57 | res = conversation_agent("同年，其他国家有什么重大事件发生？")
58 | print(res)


--------------------------------------------------------------------------------
/code/test_docstore_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for docstore agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.agents import Tool
10 | from langchain import Wikipedia
11 | from langchain.agents.react.base import DocstoreExplorer
12 | from langchain.agents import initialize_agent
13 | 
14 | docstore=DocstoreExplorer(Wikipedia())
15 | 
16 | # initialize the docstore search tool
17 | search_tool = Tool(
18 |     name="Search",
19 |     func=docstore.search,
20 |     description='search wikipedia'
21 | )
22 | 
23 | # intialize the docstore lookup tool
24 | lookup_tool = Tool(
25 |     name="Lookup",
26 |     func=docstore.lookup,
27 |     description='lookup a term in wikipedia'
28 | )
29 | 
30 | # when giving tools to LLM, we must pass as list of tools
31 | tools = [search_tool, lookup_tool]  # <1>
32 | 
33 | 
34 | llm = ErnieBotChat()
35 | docstore_agent = initialize_agent(
36 |     agent="react-docstore",
37 |     tools=tools,
38 |     llm=llm,
39 |     verbose=True,
40 |     max_iterations=3,
41 | )
42 | 
43 | docstore_agent("What were Archimedes' last words?")


--------------------------------------------------------------------------------
/code/test_embedding_query.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for milvus embedding query
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.embeddings import QianfanEmbeddingsEndpoint
 9 | from langchain.vectorstores import Milvus
10 | 
11 | vector_db = Milvus.from_documents(
12 |     [],
13 |     QianfanEmbeddingsEndpoint(),
14 |     connection_args ={"host": "127.0.0.1", "port": "8081"},
15 | )
16 | 
17 | query = "什么是 RD曲线？"
18 | docs = vector_db.similarity_search(query)
19 | print(docs)
20 | 
21 | 


--------------------------------------------------------------------------------
/code/test_embedding_visualization.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: demo for the embedding visualization.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | from pymilvus import connections
11 | from pymilvus import Collection
12 | from sklearn.manifold import TSNE
13 | 
14 | connections.connect(
15 |   host='127.0.0.1',
16 |   port='8081'
17 | )  # <1>
18 | 
19 | collection = Collection("LangChainCollection") # <2>
20 | 
21 | res = collection.query(
22 |   expr = "pk >= 0",
23 |   offset = 0,
24 |   limit = 500, 
25 |   output_fields = ["vector", "text", "source", "title"],
26 | ) # <3>
27 | 
28 | vector_list = [i["vector"] for i in res] # <4>
29 | 
30 | matrix = np.array(vector_list) # <5>
31 | 
32 | tsne = TSNE(n_components=2, perplexity=15, random_state=42, init='random', learning_rate=200)
33 | vis_dims = tsne.fit_transform(matrix) # <6>
34 | 
35 | plt.scatter(vis_dims[:, 0], vis_dims[:, 1]) # <7>
36 | plt.title("embedding visualized using t-SNE")
37 | plt.show()
38 | 


--------------------------------------------------------------------------------
/code/test_ernie.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for Ernie's calculate ability. 
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.prompts import ChatPromptTemplate
10 | from langchain.chains import LLMChain
11 | 
12 | template = ChatPromptTemplate.from_messages([
13 |     ("user", "你是一个能力非凡的人工智能机器人"),
14 |     ("assistant", "你好~"),
15 |     ("user", "{user_input}"),
16 | ])
17 | 
18 | chat = ErnieBotChat()
19 | chain =  LLMChain(llm=chat, prompt=template)
20 | res =  chain.run(user_input="4.1*7.9=?")
21 | print(res)


--------------------------------------------------------------------------------
/code/test_ernie_fc.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python3
 2 | """
 3 | @discribe: demo for the Ernie-Bot-4 Function Calling.
 4 | @author: wangwei1237@gmail.com
 5 | """
 6 | 
 7 | import json
 8 | import uuid
 9 | 
10 | from langchain.chains import LLMChain
11 | from langchain.chains.ernie_functions import (
12 |     create_ernie_fn_chain,
13 | )
14 | from langchain_community.chat_models import QianfanChatEndpoint
15 | from langchain_core.prompts.chat import (
16 |     ChatPromptTemplate,
17 | )
18 | 
19 | from utils.call_function import call_function
20 | 
21 | run_id = str(uuid.uuid4())
22 | print(run_id)
23 | 
24 | 
25 | def get_current_news(location: str) -> str:
26 |     """Get the current news based on the location.'
27 | 
28 |     Args:
29 |         location (str): The location to query.
30 |     
31 |     Returs:
32 |         str: Current news based on the location.
33 |     """
34 | 
35 |     news_info = {
36 |         "location": location,
37 |         "news": [
38 |             "I have a Book.",
39 |             "It's a nice day, today."
40 |         ]
41 |     }
42 | 
43 |     return json.dumps(news_info)
44 | 
45 | def get_current_weather(location: str, unit: str="celsius") -> str:
46 |     """Get the current weather in a given location
47 | 
48 |     Args:
49 |         location (str): location of the weather.
50 |         unit (str): unit of the tempuature.
51 |     
52 |     Returns:
53 |         str: weather in the given location.
54 |     """
55 | 
56 |     weather_info = {
57 |         "location": location,
58 |         "temperature": "27",
59 |         "unit": unit,
60 |         "forecast": ["sunny", "windy"],
61 |     }
62 |     return json.dumps(weather_info)
63 | 
64 | 
65 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
66 | 
67 | prompt = ChatPromptTemplate.from_messages(
68 |     [
69 |         ("human", "{query}"),
70 |     ]
71 | )
72 | chain = create_ernie_fn_chain([get_current_weather, get_current_news], llm, prompt, verbose=True)
73 | res = chain.invoke({"query": "北京今天的新闻是什么？"}, config={"metadata": {"run_id": run_id}})
74 | print(res)
75 | res = res["function"]
76 | 
77 | if res:
78 |     res_cf = call_function([get_current_news, get_current_weather], res)
79 |     print(res_cf)
80 |     prompt_2 = ChatPromptTemplate.from_messages(
81 |         [
82 |             ("human", "从 {function} 中，我们得到如下信息：{function_res}，那么 {query}"),
83 |         ]
84 |     )
85 |     chain_2 = LLMChain(llm=llm, prompt=prompt_2, verbose=True)
86 |     res_2 = chain_2.invoke({"function": res["name"], "function_res": res_cf, "query": "北京今天的新闻是什么？"}, config={"metadata": {"run_id": run_id}})
87 |     print(res_2)
88 | 
89 | 


--------------------------------------------------------------------------------
/code/test_langchain_rag.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for RAG 
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chains.qa_with_sources import load_qa_with_sources_chain
 9 | from langchain.chat_models import ErnieBotChat
10 | from langchain.embeddings import QianfanEmbeddingsEndpoint
11 | from langchain.vectorstores import Milvus
12 | 
13 | llm = ErnieBotChat()
14 | chain = load_qa_with_sources_chain(llm=llm, chain_type="refine", return_intermediate_steps=True)
15 | 
16 | query = "什么是度知了?"
17 | vector_db = Milvus.from_documents(
18 |     [],
19 |     QianfanEmbeddingsEndpoint(),
20 |     connection_args ={"host": "127.0.0.1", "port": "8081"},
21 | )
22 | 
23 | docs = vector_db.similarity_search(query)
24 | print(len(docs))
25 | 
26 | res = chain({"input_documents": docs, "question": query}, return_only_outputs=True)
27 | print(res)
28 | 
29 | 


--------------------------------------------------------------------------------
/code/test_milvus_embedding.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for milvus embedding 
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.document_loaders import WebBaseLoader
 9 | from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
10 | from langchain.text_splitter import RecursiveCharacterTextSplitter
11 | from langchain.document_loaders import WebBaseLoader
12 | from langchain.embeddings import QianfanEmbeddingsEndpoint
13 | from langchain.text_splitter import RecursiveCharacterTextSplitter
14 | from langchain.vectorstores import Milvus
15 | import time
16 | 
17 | URL_ROOT = "https://wangwei1237.github.io/2023/02/13/duzhiliao/"
18 | loader = RecursiveUrlLoader(url=URL_ROOT, max_depth=2)
19 | docs = loader.load()
20 | 
21 | URLS = []
22 | for doc in docs:
23 |     url   =  doc.metadata["source"]
24 |     URLS.append(url)
25 | 
26 | print("URLS length: ", len(URLS))
27 | 
28 | text_splitter = RecursiveCharacterTextSplitter(
29 |     chunk_size = 200,
30 |     chunk_overlap  = 20,
31 |     length_function = len,
32 |     add_start_index = True,
33 | )
34 | 
35 | for url in URLS:
36 |     print('-------------', url, '----------------')
37 |     loader = WebBaseLoader([url])
38 |     doc = loader.load()
39 |     texts = text_splitter.split_documents(doc)
40 |     vector_db = Milvus.from_documents(
41 |         texts,
42 |         QianfanEmbeddingsEndpoint(),
43 |         connection_args ={"host": "127.0.0.1", "port": "8081"},
44 |     )
45 |     print("    . insert ", len(texts), " texts embeddings successful")
46 |     time.sleep(5)


--------------------------------------------------------------------------------
/code/test_qianfanendpoint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @discribe: demo for the QianfanChatEndpoint.
 3 | @author: wangwei1237@gmail.com
 4 | """
 5 | 
 6 | from langchain_community.chat_models import QianfanChatEndpoint
 7 | from langchain.chains import LLMChain
 8 | from langchain_core.prompts.chat import ChatPromptTemplate
 9 | 
10 | system = "你是一个能力很强的机器人，你的名字叫 小叮当。"
11 | prompt = ChatPromptTemplate.from_messages(
12 |     [
13 |         ('system', system),
14 |         ("human", "{query}"),
15 |     ]
16 | )
17 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
18 | chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
19 | res = chain.invoke(input={"query": "你是谁？"})
20 | print(res)


--------------------------------------------------------------------------------
/code/test_retrievalQA.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for RetrivalQA.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.embeddings import QianfanEmbeddingsEndpoint
10 | from langchain.vectorstores import Milvus
11 | from langchain.chains import RetrievalQA
12 | from langchain.vectorstores.base import VectorStoreRetriever
13 | from retrieval_prompt import PROMPT_SELECTOR
14 | 
15 | retriever = VectorStoreRetriever(vectorstore=Milvus(embedding_function=QianfanEmbeddingsEndpoint(),
16 |                                                     connection_args={"host": "127.0.0.1", "port": "8081"})) # <1>
17 | 
18 | llm = ErnieBotChat()
19 | prompt = PROMPT_SELECTOR.get_prompt(llm)  # <2>
20 | retrievalQA = RetrievalQA.from_llm(llm=llm, prompt=prompt, retriever=retriever) # <3>
21 | 
22 | query = "什么是度知了?"
23 | 
24 | res = retrievalQA.run(query) # <4>
25 | print(res)
26 | 


--------------------------------------------------------------------------------
/code/test_sk_acrosticpoetry.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @discribe: Semantic Kernel Function.
 3 | @author: wangwei1237@gmail.com
 4 | """
 5 | 
 6 | import semantic_kernel as sk
 7 | from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
 8 | import asyncio
 9 | 
10 | topic = input("Your Request: ")
11 | prompt = f"写一首包含 {topic} 的藏头诗"  #<1>
12 | 
13 | kernel = sk.Kernel()  #<2>
14 | 
15 | api_key, org_id = sk.openai_settings_from_dot_env()
16 | kernel.add_text_completion_service("chat-gpt", 
17 |                                    OpenAIChatCompletion(ai_model_id="gpt-3.5-turbo", 
18 |                                                         api_key=api_key))  #<3>
19 | 
20 | semantic_function = kernel.create_semantic_function(prompt) #<4>
21 | result = asyncio.run(kernel.run_async(semantic_function))
22 | print(result)
23 | 
24 | 


--------------------------------------------------------------------------------
/code/test_sk_planner.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @discribe: Semantic Kernel Planner.
 3 | @author: wangwei1237@gmail.com
 4 | """
 5 | 
 6 | import asyncio
 7 | import semantic_kernel as sk
 8 | from plugins.Math import MathPlugin
 9 | from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
10 | from semantic_kernel.planning.sequential_planner import SequentialPlanner
11 | 
12 | kernel = sk.Kernel()
13 | 
14 | api_key, org_id = sk.openai_settings_from_dot_env()
15 | kernel.add_chat_service("chat-gpt", 
16 |                         OpenAIChatCompletion(ai_model_id="gpt-3.5-turbo", 
17 |                                              api_key=api_key))
18 | 
19 | math_plugins = kernel.import_skill(MathPlugin(), "MathPlugin")  #<1>
20 | planner = SequentialPlanner(kernel) #<2>
21 | 
22 | ask = "If my investment of 2130.23 dollars increased by 23%, how much would I have after I spent $5 on a latte?"
23 | plan = asyncio.run(planner.create_plan_async(ask)) #<3>
24 | result = plan.invoke() #<4>


--------------------------------------------------------------------------------
/code/test_wx_qianfan.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @discribe: demo for the ErnieBotChat.
 3 | @author: wangwei1237@gmail.com
 4 | """
 5 | 
 6 | from langchain.chains import LLMChain
 7 | from langchain.chat_models import ErnieBotChat
 8 | from langchain.prompts import ChatPromptTemplate
 9 | 
10 | system = "你是一个能力很强的机器人，你的名字叫 小叮当。"
11 | prompt = ChatPromptTemplate.from_messages(
12 |     [
13 |         ("human", "{query}"),
14 |     ]
15 | )
16 | llm = ErnieBotChat(model_name="ERNIE-Bot-4", system=system)
17 | chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
18 | res = chain.run(query="你是谁？")
19 | print(res)


--------------------------------------------------------------------------------
/code/test_zero_shot_agent copy.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for zero shot agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.prompts import ChatPromptTemplate
10 | from langchain.chains import LLMChain
11 | from langchain.chains import LLMMathChain
12 | from langchain.agents import Tool
13 | from langchain.agents import initialize_agent
14 | 
15 | llm = ErnieBotChat()
16 | llm_math = LLMMathChain(llm=llm)
17 | 
18 | template = ChatPromptTemplate.from_messages([
19 |     ("user", "你是一个能力非凡的人工智能机器人。"),
20 |     ("assistant", "你好~"),
21 |     ("user", "{user_input}"),
22 | ])
23 | llm_chain = LLMChain(llm=llm, prompt=template)
24 | 
25 | # initialize the math tool
26 | math_tool = Tool(
27 |     name='Calculator',
28 |     func=llm_math.run,
29 |     description='Useful for when you need to answer questions about math.'
30 | )
31 | 
32 | # initialize the general LLM tool
33 | llm_tool = Tool(
34 |     name='Language Model',
35 |     func=llm_chain.run,
36 |     description='use this tool for general purpose queries.'
37 | )
38 | 
39 | # when giving tools to LLM, we must pass as list of tools
40 | tools = [math_tool, llm_tool]
41 | 
42 | zero_shot_agent = initialize_agent(
43 |     agent="zero-shot-react-description",
44 |     tools=tools,
45 |     llm=llm,
46 |     verbose=True,
47 |     max_iterations=3
48 | )
49 | 
50 | res = zero_shot_agent("what's 4.1*7.9=?")
51 | print(res)


--------------------------------------------------------------------------------
/code/test_zero_shot_agent.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | """
 4 | @discribe: example for zero shot agent.
 5 | @author: wangwei1237@gmail.com
 6 | """
 7 | 
 8 | from langchain.chat_models import ErnieBotChat
 9 | from langchain.prompts import ChatPromptTemplate
10 | from langchain.chains import LLMChain
11 | from langchain.chains import LLMMathChain
12 | from langchain.agents import Tool
13 | from langchain.agents import initialize_agent
14 | 
15 | llm = ErnieBotChat()
16 | llm_math = LLMMathChain(llm=llm)
17 | 
18 | template = ChatPromptTemplate.from_messages([
19 |     ("user", "你是一个能力非凡的人工智能机器人。"),
20 |     ("assistant", "你好~"),
21 |     ("user", "{user_input}"),
22 | ])
23 | llm_chain = LLMChain(llm=llm, prompt=template)
24 | 
25 | # initialize the math tool
26 | math_tool = Tool(
27 |     name='Calculator',
28 |     func=llm_math.run,
29 |     description='Useful for when you need to answer questions about math.'
30 | )
31 | 
32 | # initialize the general LLM tool
33 | llm_tool = Tool(
34 |     name='Language Model',
35 |     func=llm_chain.run,
36 |     description='use this tool for general purpose queries.'
37 | )
38 | 
39 | # when giving tools to LLM, we must pass as list of tools
40 | tools = [math_tool, llm_tool]
41 | 
42 | zero_shot_agent = initialize_agent(
43 |     agent="zero-shot-react-description",
44 |     tools=tools,
45 |     llm=llm,
46 |     verbose=True,
47 |     max_iterations=3
48 | )
49 | 
50 | res = zero_shot_agent("what's 4.1*7.9=?")
51 | print(res)


--------------------------------------------------------------------------------
/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/cover.png


--------------------------------------------------------------------------------
/diabetologia.csl:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" default-locale="en-US" version="1.0" demote-non-dropping-particle="sort-only">
  3 |   <info>
  4 |     <title>Diabetologia</title>
  5 |     <id>http://www.zotero.org/styles/diabetologia</id>
  6 |     <link href="http://www.zotero.org/styles/diabetologia" rel="self"/>
  7 |     <link href="http://www.zotero.org/styles/springer-basic-brackets" rel="template"/>
  8 |     <link href="http://diabetologia-journal.org/for-authors/instructions-to-authors/" rel="documentation"/>
  9 |     <author>
 10 |       <name>Patrick O'Brien</name>
 11 |       <uri>https://twitter.com/patobrien333/</uri>
 12 |     </author>
 13 |     <category citation-format="numeric"/>
 14 |     <category field="biology"/>
 15 |     <category field="medicine"/>
 16 |     <issn>0012-186X</issn>
 17 |     <eissn>1432-0428</eissn>
 18 |     <updated>2019-02-06T16:38:10+00:00</updated>
 19 |     <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
 20 |   </info>
 21 |   <locale>
 22 |     <terms>
 23 |       <term name="et-al">et al</term>
 24 |     </terms>
 25 |   </locale>
 26 |   <macro name="author">
 27 |     <names variable="author">
 28 |       <name sort-separator=" " initialize-with="" name-as-sort-order="all" delimiter=", " delimiter-precedes-last="always"/>
 29 |       <substitute>
 30 |         <names variable="editor"/>
 31 |         <names variable="translator"/>
 32 |       </substitute>
 33 |     </names>
 34 |   </macro>
 35 |   <macro name="edition">
 36 |     <choose>
 37 |       <if is-numeric="edition">
 38 |         <group delimiter=" ">
 39 |           <number variable="edition" form="ordinal"/>
 40 |           <text term="edition" form="short" strip-periods="true"/>
 41 |         </group>
 42 |       </if>
 43 |       <else>
 44 |         <text variable="edition"/>
 45 |       </else>
 46 |     </choose>
 47 |   </macro>
 48 |   <macro name="title">
 49 |     <choose>
 50 |       <if type="book">
 51 |         <group delimiter=", ">
 52 |           <text variable="title"/>
 53 |           <text macro="edition"/>
 54 |         </group>
 55 |       </if>
 56 |       <else>
 57 |         <text variable="title"/>
 58 |       </else>
 59 |     </choose>
 60 |   </macro>
 61 |   <macro name="year-parenth">
 62 |     <date prefix="(" suffix=")" variable="issued">
 63 |       <date-part name="year"/>
 64 |     </date>
 65 |   </macro>
 66 |   <citation collapse="citation-number">
 67 |     <sort>
 68 |       <key variable="citation-number"/>
 69 |     </sort>
 70 |     <layout prefix="[" suffix="]" delimiter=", ">
 71 |       <text variable="citation-number"/>
 72 |     </layout>
 73 |   </citation>
 74 |   <bibliography et-al-min="7" et-al-use-first="3" second-field-align="flush">
 75 |     <layout>
 76 |       <text variable="citation-number" suffix=". "/>
 77 |       <group delimiter=" ">
 78 |         <text macro="author"/>
 79 |         <text macro="year-parenth"/>
 80 |         <text macro="title"/>
 81 |       </group>
 82 |       <choose>
 83 |         <if type="chapter paper-conference" match="any">
 84 |           <group delimiter=" " prefix=". ">
 85 |             <text term="in" text-case="capitalize-first" suffix=":"/>
 86 |             <names variable="editor">
 87 |               <name sort-separator=" " initialize-with="" name-as-sort-order="all" delimiter=", " delimiter-precedes-last="always"/>
 88 |               <label form="short" strip-periods="true" prefix=" (" suffix=")"/>
 89 |             </names>
 90 |             <group delimiter=", ">
 91 |               <text variable="container-title"/>
 92 |               <text macro="edition"/>
 93 |             </group>
 94 |           </group>
 95 |           <group prefix=". " delimiter=", ">
 96 |             <text variable="publisher"/>
 97 |             <text variable="publisher-place"/>
 98 |             <group delimiter=" ">
 99 |               <label variable="page" form="short" strip-periods="true"/>
100 |               <text variable="page"/>
101 |             </group>
102 |           </group>
103 |         </if>
104 |         <else-if type="article-journal">
105 |           <choose>
106 |             <if variable="page volume" match="any">
107 |               <group prefix=". " delimiter=". ">
108 |                 <group delimiter=" ">
109 |                   <text variable="container-title" form="short" strip-periods="true"/>
110 |                   <group delimiter=":">
111 |                     <group>
112 |                       <text variable="volume"/>
113 |                       <text variable="issue" prefix="(" suffix=")"/>
114 |                     </group>
115 |                     <text variable="page"/>
116 |                   </group>
117 |                 </group>
118 |                 <text prefix="https://doi.org/" variable="DOI"/>
119 |               </group>
120 |             </if>
121 |             <else>
122 |               <group prefix=". " delimiter=". ">
123 |                 <text variable="container-title" form="short" strip-periods="true"/>
124 |                 <text prefix="https://doi.org/" variable="DOI"/>
125 |               </group>
126 |             </else>
127 |           </choose>
128 |         </else-if>
129 |         <else-if type="bill book graphic legal_case legislation motion_picture report song" match="any">
130 |           <group prefix=". " delimiter=", ">
131 |             <text variable="publisher"/>
132 |             <text variable="publisher-place"/>
133 |           </group>
134 |         </else-if>
135 |         <else-if type="webpage post-weblog" match="any">
136 |           <group prefix=". " delimiter=". ">
137 |             <text prefix="In: " variable="container-title" form="short"/>
138 |             <text variable="URL"/>
139 |             <date variable="accessed">
140 |               <date-part prefix="Accessed " name="day" suffix=" "/>
141 |               <date-part name="month" form="short" suffix=" " strip-periods="true"/>
142 |               <date-part name="year"/>
143 |             </date>
144 |           </group>
145 |         </else-if>
146 |         <else-if type="thesis">
147 |           <group prefix=". " delimiter=", ">
148 |             <text variable="genre" text-case="capitalize-first"/>
149 |             <text variable="publisher"/>
150 |           </group>
151 |         </else-if>
152 |         <else>
153 |           <group prefix=". " delimiter=" ">
154 |             <text variable="container-title" form="short"/>
155 |             <group delimiter=":">
156 |               <text variable="volume"/>
157 |               <text variable="page"/>
158 |             </group>
159 |           </group>
160 |         </else>
161 |       </choose>
162 |     </layout>
163 |   </bibliography>
164 | </style>
165 | 


--------------------------------------------------------------------------------
/embedchain_intro.qmd:
--------------------------------------------------------------------------------
1 | # Embedchain 简介
2 | 


--------------------------------------------------------------------------------
/embedding.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | filters:
 3 |    - include-code-files
 4 | ---
 5 | 
 6 | # Embedding {#sec-embedding_intro}
 7 | 
 8 | 在机器学习和自然语言处理中，embedding 是指将高维度的数据（例如文字、图片、音频）映射到低维度空间的过程。embedding 向量通常是一个由实数构成的向量，它将输入的数据表示成一个连续的数值空间中的点。简单来说，embedding 就是一个N维的实值向量，它几乎可以用来表示任何事情，如文本、音乐、视频等。
 9 | 
10 | 对数据进行 embedding 的目的在于保留数据的内容或者其含义的各个特征。和不相关的数据相比，相似数据的 embedding 的大小和方向更接近，因此可以用于表述文本的相关性。
11 | 
12 | Embedding 的应用场景：
13 | 
14 | * 搜索：根据与查询字符串的相关性对结果进行排序
15 | * 聚类：对数据按相似性分组
16 | * 推荐：推荐具有相关内容的数据项
17 | * 分类：对数据按其最相似的标签进行分类
18 | * 异常检测：识别出相关性很小的异常值
19 | 
20 | Embedding 是一个浮点数类型的向量或列表。可以用向量之间的距离来测量它们的相关性：距离越小，表示相关性越高；距离越大，相关性越低。
21 | 
22 | ## 获取 Embedding
23 | 可以根据 [Embedding-V1 API 文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu) 的介绍，来获取基于百度文心大模型的字符串 Embedding。
24 | 
25 | 还可以使用 @lst-langchain_embed_query_wx 的方式来获取相同的基于文心大模型的 Embedding。
26 | 
27 | ```python
28 | embeddings = QianfanEmbeddingsEndpoint()
29 | query_result = embeddings.embed_query("你是谁？")
30 | ```
31 | 
32 | ```bash
33 | [0.02949424833059311, -0.054236963391304016, -0.01735987327992916, 
34 |  0.06794580817222595, -0.00020318820315878838, 0.04264984279870987, 
35 |  -0.0661700889468193, ……
36 |  ……]
37 | ```
38 | 
39 | ## 可视化
40 | Embedding 一般是一种高维数据，为了将这种高维数据可视化，我们可以使用 t-SNE [-@tsne_online] 算法将数据进行降维，然后再做可视化处理。
41 | 
42 | 利用 @lst-langchain_milvus_embedding 对文档进行向量化，然后将向量数据存储于 Milvus 向量数据库中（默认采用的 Collection 为 LangChainCollection）。
43 | 
44 | 可以通过 Milvus 提供的 HTTP API 来查看指定的 Collection 的结构：
45 | 
46 | ```bash
47 | http://{{MILVUS_URI}}/v1/vector/collections/describe?collectionName=LangChainCollection
48 | ```
49 | 
50 | ![Milvus向量数据的结构](./images/lc_milvus_coll_demo.jpg){#fig-lc_milvus_coll_demo}
51 | 
52 | ```{#lst-embedding_visualization .python include="./code/test_embedding_visualization.py" code-line-numbers="true" lst-cap="向量数据可视化"}
53 | ```
54 | 
55 | 1. 初始化 Milvus 链接
56 | 2. 选择 LangChainCollection
57 | 3. 从 LangChainCollection 中检索特定数据
58 | 4. 只提取结果中的 vector 字段，并生成新的列表
59 | 5. 将 python 列表转换成矩阵
60 | 6. 对向量数据进行降维
61 | 7. 对低维数据进行可视化
62 | 
63 | 结果如 @fig-vfe 所示：
64 | 
65 | ![向量数据可视化结果](./images/vfe.png){#fig-vfe}
66 | 


--------------------------------------------------------------------------------
/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/favicon.png


--------------------------------------------------------------------------------
/getuploadurl.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -ex
 4 | 
 5 | if [ $# -ne 2 ]
 6 | then
 7 |     echo "::error the parameters error, please check!!!"
 8 |     exit 1
 9 | fi
10 | 
11 | URL_PREFIX="https://api.github.com/repos/wangwei1237/LLM_in_Action/releases"
12 | 
13 | version=$1
14 | token=$2
15 | 
16 | get_release_url="${URL_PREFIX}/tags/${version}"
17 | upload_url=$(curl -H "Accept: application/vnd.github.v3+json" "${get_release_url}" | grep 'upload_url' | cut -d'"' -f4)
18 | 
19 | create_release_url="${URL_PREFIX}"
20 | 
21 | if [ "$upload_url" = "" ]
22 | then
23 |     upload_url=$(curl -X POST -H "Accept: application/vnd.github.v3+json" "${create_release_url}" -H "Authorization: token ${token}" -d "{\"tag_name\":\"${version}\", \"name\":\"Build for ${version}\"}" | grep 'upload_url' | cut -d'"' -f4)
24 | fi
25 | 
26 | if [ "$upload_url" = "" ]
27 | then
28 |     echo "::error create release error, please check!!!"
29 |     exit 1
30 | fi
31 | 
32 | #release_note_url="${URL_PREFIX}/generate-notes"
33 | #curl -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${token}" "${release_note_url}" -d "{\"tag_name\":\"${version}\"}"
34 | 
35 | echo $upload_url
36 | 


--------------------------------------------------------------------------------
/glossary.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "术语表"
 3 | ---
 4 | AGI(Artificial General Intelligence)：通用人工智能
 5 | 
 6 | Auto-Encoder LM：自编码语言模型
 7 | 
 8 | Auto-Decoder LM：自回归语言模型
 9 | 
10 | Embedding：向量
11 | 
12 | Emergent Ability：涌现
13 | 
14 | Hallucination：幻觉
15 | 
16 | ICL(In Context Learning)：上下文学习
17 | 
18 | LM(Language Model)：语言模型
19 | 
20 | LLM(Large Language Model)：大语言模型
21 | 
22 | NSFW(Not Safe for Work)：用于提醒内容不适合公开场合浏览
23 | 
24 | Prompt：提示词
25 | 
26 | Prompt Engineering：提示词工程
27 | 
28 | RAG(Retrieval Augmented Generation)：检索式增强生成
29 | 
30 | RATG(Retrieval Augmented Text Generation)：检索增强式文本生成
31 | 
32 | SD(Stable Diffusion)：
33 | 
34 | SFT(Supervised Fine Tuning): 微调
35 | t-SNE(t-Distributed, Stochastic neighbor Embedding)：T分布和随机近邻嵌入
36 | 


--------------------------------------------------------------------------------
/hallucination.qmd:
--------------------------------------------------------------------------------
 1 | # 幻觉 {#sec-hallucination}
 2 | 
 3 | 如其他技术一样，即便当前 LLM 在各个领域中有着惊人的表现，但是 LLM 也存在着缺陷和局限。而 “幻觉（Hallucination）”就是一种非常常见的缺陷。
 4 | 
 5 | :::{.callout-note title="幻觉"}
 6 | **幻觉**是自然语言生成领域的一个术语，是指模型生成了看似合理但实际上并不存在的内容。这些内容可能包含虚构的信息、存在前后矛盾的逻辑、甚至是毫无意义的内容。
 7 | 
 8 | **幻觉**原本是心理学领域的专有名词，用于描述一种特殊类型的知觉体验——在没有外部刺激的情况下，清醒的个体的虚假感觉。
 9 | 
10 | **幻觉**是一种不真实的、却又非分真实的虚幻感知。模型容易生成流畅但缺乏真实性的内容，这种现象与心理学中的**幻觉**极为相似，因此在 LLM 领域，我们把 LLM 的这种缺陷称之为 **幻觉**。
11 | :::
12 | 
13 | 幻觉会严重影响依赖 LLM 的下游业务的表现，导致这些业务在真实场景中无法满足用户需求。大语言模型生成内容的真实性是生成式模型接下来面临的重要科学问题之一。
14 | 
15 | 幻觉分为两类：
16 | 
17 | * 内在幻觉（Intrinsic Hallucinations）：生成的内容与输入的源信息冲突。
18 | 
19 |     ![内在幻觉的例子](./images/obtuse_angle.jpg){#fig-hal_botuse}
20 | 
21 | * 外在幻觉（Extrinsic Hallucinations）：生成了与源信息无关的内容。外在幻觉可能与事实冲突，也可能不冲突。在有些场景下，事实正确的外在幻觉可能会更好，但是事情往往并非总是如此。
22 |     ![外在幻觉的例子](./images/bj_autumn.jpg){#fig-bj_autumn}
23 | 
24 | :::{.callout-important}
25 | 幻觉，大模型的阿克琉斯之踵。
26 | :::
27 | 
28 | 更多关于幻觉的详细内容可以参见：[-@NLPHallucination]，[-@LLMHallucination]。


--------------------------------------------------------------------------------
/images/LLMTree.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/LLMTree.jpeg


--------------------------------------------------------------------------------
/images/RAG_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/RAG_arch.png


--------------------------------------------------------------------------------
/images/RATG_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/RATG_overview.jpg


--------------------------------------------------------------------------------
/images/agent_concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/agent_concept.png


--------------------------------------------------------------------------------
/images/agent_seq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/agent_seq.png


--------------------------------------------------------------------------------
/images/agent_tokens_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/agent_tokens_demo.jpg


--------------------------------------------------------------------------------
/images/autogen_2.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/autogen_2.webp


--------------------------------------------------------------------------------
/images/autogen_agentchat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/autogen_agentchat.png


--------------------------------------------------------------------------------
/images/bd2023.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/bd2023.jpg


--------------------------------------------------------------------------------
/images/bj_autumn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/bj_autumn.jpg


--------------------------------------------------------------------------------
/images/chain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/chain.png


--------------------------------------------------------------------------------
/images/code_freq_lc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/code_freq_lc.jpg


--------------------------------------------------------------------------------
/images/code_freq_sk.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/code_freq_sk.jpg


--------------------------------------------------------------------------------
/images/copilot_stack_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/copilot_stack_1.png


--------------------------------------------------------------------------------
/images/copilot_stack_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/copilot_stack_2.png


--------------------------------------------------------------------------------
/images/diagram-assistant.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/diagram-assistant.webp


--------------------------------------------------------------------------------
/images/diagram-status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/diagram-status.png


--------------------------------------------------------------------------------
/images/ernie_calc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/ernie_calc.jpg


--------------------------------------------------------------------------------
/images/fc_ls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/fc_ls.png


--------------------------------------------------------------------------------
/images/function_calling_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/function_calling_1.png


--------------------------------------------------------------------------------
/images/langchain_commit_counts.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langchain_commit_counts.jpg


--------------------------------------------------------------------------------
/images/langchain_core_0113_qianfan_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langchain_core_0113_qianfan_error.png


--------------------------------------------------------------------------------
/images/langchain_io.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langchain_io.png


--------------------------------------------------------------------------------
/images/langchain_io_example.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langchain_io_example.jpeg


--------------------------------------------------------------------------------
/images/langflow-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langflow-demo.gif


--------------------------------------------------------------------------------
/images/langflow-demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/langflow-demo.jpg


--------------------------------------------------------------------------------
/images/lc_milvus_coll_demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/lc_milvus_coll_demo.jpg


--------------------------------------------------------------------------------
/images/llm_chatgpt_wb_hs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/llm_chatgpt_wb_hs.jpg


--------------------------------------------------------------------------------
/images/llm_decision_flow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/llm_decision_flow.jpg


--------------------------------------------------------------------------------
/images/llm_in_action_ways.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/llm_in_action_ways.png


--------------------------------------------------------------------------------
/images/milvus_cli_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/milvus_cli_2.jpg


--------------------------------------------------------------------------------
/images/milvus_cli_case.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/milvus_cli_case.jpg


--------------------------------------------------------------------------------
/images/obtuse_angle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/obtuse_angle.jpg


--------------------------------------------------------------------------------
/images/pae_agent_seq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/pae_agent_seq.png


--------------------------------------------------------------------------------
/images/pe_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/pe_arch.png


--------------------------------------------------------------------------------
/images/pe_wx_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/pe_wx_1.jpg


--------------------------------------------------------------------------------
/images/pe_wx_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/pe_wx_2.jpg


--------------------------------------------------------------------------------
/images/progandagents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/progandagents.png


--------------------------------------------------------------------------------
/images/rag_langchain_overview.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/rag_langchain_overview.jpeg


--------------------------------------------------------------------------------
/images/react.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/react.png


--------------------------------------------------------------------------------
/images/sk_application_process.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/sk_application_process.jpg


--------------------------------------------------------------------------------
/images/sk_kernel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/sk_kernel.png


--------------------------------------------------------------------------------
/images/token_openai_demo_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_1.jpg


--------------------------------------------------------------------------------
/images/token_openai_demo_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_2.jpg


--------------------------------------------------------------------------------
/images/token_openai_demo_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_3.jpg


--------------------------------------------------------------------------------
/images/token_openai_demo_c_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_c_1.jpg


--------------------------------------------------------------------------------
/images/token_openai_demo_c_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_c_2.jpg


--------------------------------------------------------------------------------
/images/token_openai_demo_c_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/token_openai_demo_c_3.jpg


--------------------------------------------------------------------------------
/images/treand_lm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/treand_lm.jpg


--------------------------------------------------------------------------------
/images/trend_llm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/trend_llm.jpg


--------------------------------------------------------------------------------
/images/vector_stores.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/vector_stores.jpeg


--------------------------------------------------------------------------------
/images/vfe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/vfe.png


--------------------------------------------------------------------------------
/images/waizg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/waizg.jpg


--------------------------------------------------------------------------------
/images/weather_ernie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/weather_ernie.jpg


--------------------------------------------------------------------------------
/images/weather_gpt.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/weather_gpt.jpg


--------------------------------------------------------------------------------
/images/wenxi_tokenizer.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangwei1237/LLM_in_Action/80e065576d9562a20e2ccf7e2a69cc7b5153d2ba/images/wenxi_tokenizer.jpg


--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | description-meta: |
 3 |   这是一本关于 `大语言模型` 实践的书籍，如果您想大概了解一下相关的概念，并以此来指导自己的实践，想了解目前在应用开发中有哪些工具以及这些工具的具体实践用法，那么这本书正式为您而作。
 4 | ---
 5 | 
 6 | ## 欢迎阅读 {.unnumbered}
 7 | 这是一本关于 `大语言模型` **实践**的书籍，而不是一本深入研究 `大语言模型` 的运行原理和底层算法的书籍。
 8 | 
 9 | 如果您是一位想深入学习框架、算法并对其进行优化改进的研究者，本书可能并不适合您。
10 | 
11 | 如果您想大概了解一下相关的概念，并以此来指导自己的实践，想了解目前在应用开发中有哪些工具以及这些工具的具体实践用法，那么这本书正是为您而作。
12 | 
13 | 正如这本书的题目，这本书会更偏向**实践、应用**，但是我们也会在书中介绍大模型相关的概念，这些概念会让您对大模型有一个初步的认识，仅此而已。如果您想深入了解相关概念的底层细节，我们也提供了对应的文献，您可以深入阅读相关的文献。
14 | 
15 | 这是一个飞速发展的时代，技术、工具的发展亦是如此——每天有新的工具产生，也会有部分技术过时——因此我们以在线书籍的方式来构建这本书以保持内容的与时俱进。这也是一本开放的书籍，如果您希望为本书贡献自己的力量，您可以点击导航栏右上角的图标进入本书的代码仓库，提交您的内容。
16 | 
17 | 这本书整体会分为三大部分：
18 | 
19 | * PART 1：基本概念篇，主要介绍大模型相关的基本概念
20 | * PART 2：相关工具篇，主要介绍大模型相关的工具，LangChain, Semantic Kernel, Langflow, AutoGen……
21 | * PART 3：具体实践偏，主要介绍应用大模型的具体案例
22 | 
23 | ## 版权声明
24 | 本书采用“保持署名—非商用”创意共享 4.0 许可证。只要保持署名和非商用，您可以自由地阅读、分享本书。
25 | 
26 | 您可以：
27 | 
28 | * 下载、保存以及打印本书
29 | * 网络链接、转载本书的部分或者全部内容，但是必须在明显处提供读者访问本书发布网站的链接
30 | 
31 | **您不可以：**
32 | 
33 | * 以任何形式出售本书的电子版或者打印版
34 | * 擅自印刷、出版本书
35 | * 以纸媒出版为目的，改写、改编以及摘抄本书的内容
36 | 


--------------------------------------------------------------------------------
/langchain_agent_chat.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | filters:
 3 |    - include-code-files
 4 | code-annotations: below
 5 | ---
 6 | 
 7 | # LangChain Chat Agent
 8 | 
 9 | 在 @sec-lc_react 中，除了 `Structured Chat Agent` 外，我们所介绍的 Agent 都是非 `Chat` 模式的 Agent。这里的 `Chat` 模式类似 @sec-LLMs 中介绍的 LLM 的 `Chat` 模式，其最主要的特点就是：他们以聊天消息列表格式的提示词作为输入。
10 | 
11 | ## 修改 Chat Agent 的提示词逻辑
12 | 因为文心大模型 `Chat` 模式的 message 消息类型和 `OpenAI` 的不同——缺少 `SystemMessage` 类型，因此，如果要让 `Chat Agent` 支持文心，需要按照 @lst-lc_struct_agent_fixed_for_ernine 的思路对其 Prompt 的生成方式进行修改。
13 | 
14 | ```python
15 | @classmethod
16 | def create_prompt_for_ernie(
17 |     ......
18 | ) -> BasePromptTemplate:
19 |     ......
20 |     messages = [
21 |         HumanMessagePromptTemplate.from_template(template),     # <1>
22 |         AIMessagePromptTemplate.from_template("YES, I Know."),  # <2>
23 |         *_memory_prompts,
24 |         HumanMessagePromptTemplate.from_template(human_message_template),
25 |     ]
26 |     return ChatPromptTemplate(input_variables=input_variables, messages=messages)
27 | ```
28 | 
29 | ## ChatConversationAgent
30 | 参照 @lst-map_agent_type_class 和 @lst-la_conversation_demo_n，我们可以构建一个 `chat-conversational-react-description`。
31 | ```{#lst-la_chat_conversation_demo .python include="./code/test_chat_coversation_agent.py" code-line-numbers="true" lst-cap="Chat Conversation Agent"}
32 | ```
33 | 
34 | 但是，执行 @lst-la_chat_conversation_demo 时，却报错了：
35 | 
36 | ```bash
37 | Traceback (most recent call last):
38 |   File "code/test_chat_coversation_agent.py", line 38, in <module>
39 |     chat_conversation_agent("4.1*7.9=?")
40 |   ...
41 | ValueError: variable chat_history should be a list of base messages, got  #<1>
42 | ```
43 | 
44 | 1. chat_history 变量必须是一个消息列表
45 | 
46 | ## MessagesPlaceholder
47 | 我们说过，Agent 本质上就是 LLM，既然 @lst-la_chat_conversation_demo 执行有异常，那我们就看下他的提示词究竟是怎么实现的（具体实现位于 `langchain/agents/conversation_chat/base.py` 的 `create_prompt()`）。
48 | 
49 | ```python
50 | messages = [
51 |     SystemMessagePromptTemplate.from_template(system_message),
52 |     MessagesPlaceholder(variable_name="chat_history"),  #<1>
53 |     HumanMessagePromptTemplate.from_template(final_prompt),
54 |     MessagesPlaceholder(variable_name="agent_scratchpad"),
55 | ]
56 | ```
57 | 
58 | 1. 从提示词的构造方式上，`chat_history` 是通过 `MessagesPlaceholder` 构造的。而此处的 `chat_history` 又是通过 `ConversationBufferMemory` 获取的。
59 | 
60 | ```python
61 | memory = ConversationBufferMemory(memory_key="chat_history")
62 | ```
63 | 
64 | `ConversationBufferMemory` 返回的内容逻辑如下所示：
65 | 
66 | ```python
67 | @property
68 | def buffer(self) -> Any:
69 |     """String buffer of memory."""
70 |     return self.buffer_as_messages if self.return_messages else self.buffer_as_str #<1>
71 | 
72 | ```
73 | 
74 | 1. 根据 `return_messages` 来返回不同格式的记忆。
75 | 
76 | 而在 LangChain 中，`return_messages` 默认值为 `False`，因此，实际上 `buffer()` 返回的是**字符串格式**的内容。这就是导致执行异常的根本原因。
77 | 
78 | 为了解决这个问题，我们需要在初始化 `ConversationBufferMemory` 时，配置 `return_messages` = `True`。 
79 | 
80 | ```{#lst-la_chat_conversation_demo_1 .python include="./code/test_chat_coversation_agent_1.py" code-line-numbers="true" lst-cap="Chat Conversation Agent"}
81 | ```
82 | 
83 | 1. 通过设置 `return_messages` 为 `True` 以返回消息列表格式的记忆内容。
84 | 
85 | :::{.callout-warning}
86 | 例如本节中提到的 LangChain 相关基建对文心大模型支持不够友好的问题，最好的修复方案还是给 LangChain 提交 `PR` 来解决。我们给 LangChain 提交了 PR [12921](https://github.com/langchain-ai/langchain/pull/12921)，还在等待官方的审核。如果您有比较好的想法，可以直接给 [LangChain](https://github.com/langchain-ai/langchain/pulls) 提交 `PR`。
87 | :::
88 | 


--------------------------------------------------------------------------------
/langchain_agent_fc.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # LangChain OpenAI Function Agent {#sec-lc_fc}


--------------------------------------------------------------------------------
/langchain_agent_pae.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # LangChain PlanAndExcute Agent {#sec-lc_pae}


--------------------------------------------------------------------------------
/langchain_function_call.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | code-annotations: below
  5 | ---
  6 | 
  7 | # LangChain 函数调用
  8 | 
  9 | ::: {.callout-tip title="要点提示"} 
 10 | * OpenAI LLMs 中的 `函数调用（Function Calling）` 使得开发者可以对函数进行描述，而 `模型` 则可以用这些函数描述来生成函数调用参数，并与外部工具和 APIs 建立更为可靠、结构化的连接。[^1]
 11 | * 开发者可以使用 `JSON Schema` 定义函数，指导 `模型` 如何根据用户的输入信息来生成调用 `函数` 所需的参数，并调用函数。
 12 | * `函数调用` 会有非富多样的应用场景，例如：
 13 |   * 构建与外部工具或 APIs 交互的聊天机器人
 14 |   * 把自然语言查询转换为 API 调用，以便和现有的 `服务` 和 `数据库` 无缝整合
 15 |   * 从非结构化的文本中提取结构化数据
 16 | * `函数调用` 会涉及到如下的步骤：
 17 |   * 调用包含 `函数` 的 `模型`
 18 |   * 处理 `函数` 响应
 19 |   * 将 `函数` 响应返回给 `模型`，以进行进一步的处理货这生成更友好的用户响应
 20 | * 根据 [文心开发者文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/xlmokikxe)，在文心 4.0 中，也增加了 `函数调用` 的能力，其原理和使用和 OpenAI 相似。
 21 | :::
 22 | 
 23 | ## 大模型的时效性
 24 | 当我们问大模型“明天天气怎么样”时，因为大模型训练语料的时效性问题，如果不依赖外部信息，大模型是很难回答这种问题的，如 @fig-weather 所示。
 25 | 
 26 | ::: {#fig-weather layout-ncol=2}
 27 | 
 28 | ![ChatGPT](images/weather_gpt.jpg){#fig-weather_gpt}
 29 | 
 30 | ![文心一言](images/weather_ernie.jpg){#fig-weather_ernie}
 31 | 
 32 | 明天天气怎么样？
 33 | :::
 34 | 
 35 | 而 OpenAI 大语言模型提供的 `函数调用` 能力，恰恰非常完美的解决了类似的问题，从而使得大语言模型可以通过 `函数调用` 与外部系统通信，并获取更实时的信息，以解决类似的问题。
 36 | 
 37 | ## 函数调用流程
 38 | OpenAI 开发的大语言模型（例如GPT-3.5-turbo-0613，GPT-4-0613）提供了一种名为 `Function Calling(函数调用)` 的创新功能。`函数调用` 使得开发人员能够在模型中对函数进行描述，然后模型可以利用这些描述来巧妙地为函数生成调用参数。
 39 | 
 40 | 在 OpenAI 中，函数调用的步骤可以参考：@fig-function_calling_step
 41 | 
 42 | ![OpenAI 的函数调用流程](images/function_calling_1.png){#fig-function_calling_step}
 43 | 
 44 | ::: {.callout-caution title="注意"}
 45 | 需要特别注意的是，大语言模型本身并不会调用我们预定的 `函数`，大语言模型仅仅是生成我们所要调用的函数的调用参数而已，具体调用函数的动作，需要我们在自己的应用代码中来实现。[^2]
 46 | :::
 47 | 
 48 | ::: {.callout-important title="思考"}
 49 | 为什么模型不能直接调用函数？
 50 | :::
 51 | 
 52 | 利用 `函数调用`，LLMs 可以很方便的将自然语言指令转变为相关的函数调用，例如：可以把“给张三发一封邮件询问下他下周五下午是否需要一杯咖啡” 这样的提示转换为 `send_email(to: string, body: string)` 函数调用。
 53 | 
 54 | ## OpenAI 函数调用
 55 | ### OpenAI API
 56 | 
 57 | ```{#lst-fc_openai .python lst-cap="使用 OpenAI API 进行函数调用示例"}
 58 | import openai
 59 | import json
 60 | 
 61 | # Example dummy function hard coded to return the same weather
 62 | # In production, this could be your backend API or an external API
 63 | def get_current_weather(location, unit="celsius"):
 64 |     """Get the current weather in a given location"""
 65 |     weather_info = {
 66 |         "location": location,
 67 |         "temperature": "27",
 68 |         "unit": unit,
 69 |         "forecast": ["sunny", "windy"],
 70 |     }
 71 |     return json.dumps(weather_info)
 72 | 
 73 | def run_conversation():
 74 |     # Step 1: send the conversation and available functions to GPT
 75 |     messages = [{"role": "user", "content": "北京明天天气怎么样?"}]
 76 |     functions = [
 77 |         {
 78 |             "name": "get_current_weather",
 79 |             "description": "Get the current weather in a given location",
 80 |             "parameters": {
 81 |                 "type": "object",
 82 |                 "properties": {
 83 |                     "location": {
 84 |                         "type": "string",
 85 |                         "description": "The city and state, e.g. San Francisco, CA",
 86 |                     },
 87 |                     "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
 88 |                 },
 89 |                 "required": ["location"],
 90 |             },
 91 |         }
 92 |     ]
 93 |     response = openai.ChatCompletion.create(
 94 |         model="gpt-3.5-turbo-0613",
 95 |         messages=messages,
 96 |         functions=functions,
 97 |         function_call="auto",  # auto is default, but we'll be explicit
 98 |     )
 99 | 
100 |     print("---------step 1. the 1st LLMs response-----------")
101 |     print(response)
102 | 
103 |     response_message = response["choices"][0]["message"]
104 | 
105 |     # Step 2: check if GPT wanted to call a function
106 |     if response_message.get("function_call"):
107 |         # Step 3: call the function
108 |         # Note: the JSON response may not always be valid; be sure to handle errors
109 |         available_functions = {
110 |             "get_current_weather": get_current_weather,
111 |         }  # only one function in this example, but you can have multiple
112 |         function_name = response_message["function_call"]["name"]
113 |         fuction_to_call = available_functions[function_name]
114 |         function_args = json.loads(response_message["function_call"]["arguments"])
115 |         function_response = fuction_to_call(
116 |             location=function_args.get("location"),
117 |             #unit=function_args.get("unit"),
118 |         )
119 | 
120 |         print("---------step 2. function response-----------")
121 |         print(function_response)
122 | 
123 |         # Step 4: send the info on the function call and function response to GPT
124 |         messages.append(response_message)  # extend conversation with assistant's reply
125 |         messages.append(
126 |             {
127 |                 "role": "function",
128 |                 "name": function_name,
129 |                 "content": function_response,
130 |             }
131 |         )  # extend conversation with function response
132 | 
133 |         print("---------step 3. final messages-----------")
134 |         print(messages)
135 | 
136 |         second_response = openai.ChatCompletion.create(
137 |             model="gpt-3.5-turbo-0613",
138 |             messages=messages,
139 |         )  # get a new response from GPT where it can see the function response
140 |         return second_response
141 | 
142 | res = run_conversation()
143 | print("---------step 4. final LLMs response-----------")
144 | print(res)
145 | ```
146 | 
147 | @lst-fc_openai 的运行结果如 @lst-openai_fc_res：
148 | 
149 | ```{#lst-openai_fc_res .javascript lst-cap="运行结果"}
150 | ---------step 1. the 1st LLMs response-----------
151 | {
152 |   "id": "chatcmpl-7xnsEW2rSsec7Qd1FC60cKIT7TtuR",
153 |   "object": "chat.completion",
154 |   "created": 1694487422,
155 |   "model": "gpt-3.5-turbo-0613",
156 |   "choices": [
157 |     {
158 |       "index": 0,
159 |       "message": {
160 |         "role": "assistant",
161 |         "content": null,
162 |         "function_call": {
163 |           "name": "get_current_weather",
164 |           "arguments": "{\n  \"location\": \"北京\"\n}"
165 |         }
166 |       },
167 |       "finish_reason": "function_call"
168 |     }
169 |   ],
170 |   "usage": {
171 |     "prompt_tokens": 85,
172 |     "completion_tokens": 16,
173 |     "total_tokens": 101
174 |   }
175 | }
176 | ---------step 2. function response-----------
177 | {"location": "北京", "temperature": "27", "unit": null, "forecast": ["sunny", "windy"]}
178 | ---------step 3. final messages-----------
179 | [{'role': 'user', 'content': '北京明天天气怎么样?'}, <OpenAIObject at 0x1082907c0> JSON: {
180 |   "role": "assistant",
181 |   "content": null,
182 |   "function_call": {
183 |     "name": "get_current_weather",
184 |     "arguments": "{\n  \"location\": \"北京\"\n}"
185 |   }
186 | }, {'role': 'function', 'name': 'get_current_weather', 'content': '{"location": "\\u5317\\u4eac", "temperature": "27", "unit": null, "forecast": ["sunny", "windy"]}'}]
187 | ---------step 4. final LLMs response-----------
188 | {
189 |   "id": "chatcmpl-7xnsFw2dssMs3R0aGVMmjB0cjLugZ",
190 |   "object": "chat.completion",
191 |   "created": 1694487423,
192 |   "model": "gpt-3.5-turbo-0613",
193 |   "choices": [
194 |     {
195 |       "index": 0,
196 |       "message": {
197 |         "role": "assistant",
198 |         "content": "北京明天的天气预报是晴天，有很大的风。气温为27°C。"
199 |       },
200 |       "finish_reason": "stop"
201 |     }
202 |   ],
203 |   "usage": {
204 |     "prompt_tokens": 77,
205 |     "completion_tokens": 30,
206 |     "total_tokens": 107
207 |   }
208 | }
209 | ```
210 | 
211 | ### OpenAI 函数调用 LLMChain
212 | 
213 | 可以参考 LangChain 官方文档以在 LangChain 中使用 OpenAI `函数调用` 的能力。[^3]
214 | 
215 | ```{#lst-fc_langchain .python lst-cap="使用 LangChain 实现函数调用"}
216 | from langchain.chat_models import ChatOpenAI
217 | from langchain.prompts import ChatPromptTemplate
218 | from langchain.chains.openai_functions import (
219 |     create_openai_fn_chain,
220 | )
221 | from langchain.chains import LLMChain
222 | import json
223 | 
224 | def get_current_weather(location: str, unit: str="celsius") -> str:
225 |     """Get the current weather in a given location
226 | 
227 |     Args:
228 |         location (str): location of the weather.
229 |         unit (str): unit of the tempuature.
230 |     
231 |     Returns:
232 |         str: weather in the given location.
233 |     """
234 | 
235 |     weather_info = {
236 |         "location": location,
237 |         "temperature": "27",
238 |         "unit": unit,
239 |         "forecast": ["sunny", "windy"],
240 |     }
241 |     return json.dumps(weather_info)
242 | 
243 | llm = ChatOpenAI(model="gpt-3.5-turbo-0613")
244 | prompt = ChatPromptTemplate.from_messages(
245 |     [
246 |         ("human", "{query}"),
247 |     ]
248 | )
249 | 
250 | chain = create_openai_fn_chain([get_current_weather], llm, prompt, verbose=True)
251 | res = chain.run("What's the weather like in Beijing tomorrow?")
252 | print("-------------The 1-st langchain result-------------")
253 | print(res)
254 | 
255 | res_func = get_current_weather(res['location'])
256 | 
257 | chain = LLMChain(llm=llm, prompt=prompt, verbose=True)
258 | res = chain.run("extract the tomorrow weather infomation from ：%s， and answer the question: %s" % (res_func, "What's the weather like in Beijing tomorrow?"))
259 | print(res)
260 | ```
261 | 
262 | @lst-fc_langchain 的运行结果如下所示：
263 | 
264 | ```{#lst-fc_langchain_res .javascript lst-cap="运行结果"}
265 | > Entering new LLMChain chain...
266 | Prompt after formatting:
267 | Human: What's the weather like in Beijing tomorrow?
268 | 
269 | > Finished chain.
270 | -------------The 1-st langchain result-------------
271 | {'location': 'Beijing', 'unit': 'metric'}
272 | 
273 | > Entering new LLMChain chain...
274 | Prompt after formatting:
275 | Human: extract the tomorrow weather infomation from ：{"location": "Beijing", "temperature": "27", "unit": "celsius", "forecast": ["sunny", "windy"]}， and answer the question: What's the weather like in Beijing tomorrow?
276 | 
277 | > Finished chain.
278 | The weather in Beijing tomorrow is sunny and windy.
279 | ```
280 | 
281 | ::: {.callout-note}
282 | 在 `create_openai_fn_chain` 中，其第一个参数是一个函数列表，如果该列表只有 1 个函数时，则 `create_openai_fn_chain` 仅会返回大语言模型构造的调用该函数对应的参数。例如如上的例子，`create_openai_fn_chain` 仅返回了 `{'location': 'Beijing', 'unit': 'metric'}`。
283 | 而如果函数列表存在多个函数时，则会返回大语言模型分析之后所需要调用的函数名以及对应的参数，例如： `{'name': 'get_current_weather', 'arguments': {'location': 'Beijing'}}`。
284 | ::: 
285 | 
286 | ```{#lst-fc_multi_fcs .python lst-cap="create_openai_fn_chain() 传递多个函数调用示例"}
287 | # ...
288 | def get_current_news(location: str) -> str:
289 |     """Get the current news based on the location.'
290 | 
291 |     Args:
292 |         location (str): The location to query.
293 |     
294 |     Returs:
295 |         str: Current news based on the location.
296 |     """
297 | 
298 |     news_info = {
299 |         "location": location,
300 |         "news": [
301 |             "I have a Book.",
302 |             "It's a nice day, today."
303 |         ]
304 |     }
305 | 
306 |     return json.dumps(news_info)
307 | # ...
308 | 
309 | chain = create_openai_fn_chain([get_current_weather, get_current_news], llm, prompt, verbose=True)
310 | res = chain.run("What's the weather like in Beijing tomorrow?")
311 | print("-------------The 1-st langchain result-------------")
312 | print(res)
313 | ```
314 | 
315 | @lst-fc_multi_fcs 的运行结果如 @lst-fc_multi_res 所示：
316 | 
317 | ```{#lst-fc_multi_res .javascript lst-cap="运行结果"}
318 | > Entering new LLMChain chain...
319 | Prompt after formatting:
320 | Human: What's the weather like in Beijing tomorrow?
321 | 
322 | > Finished chain.
323 | -------------The 1-st langchain result-------------
324 | {'name': 'get_current_weather', 'arguments': {'location': 'Beijing'}}
325 | ```
326 | 
327 | ## 文心 4.0 函数调用
328 | ### 文心 API
329 | 在使用 文心 4.0 的函数调用之前，首先需要安装 `qianfan` 库：
330 | 
331 | ```bash
332 | pip install qianfan
333 | ```
334 | 
335 | 我们首先对本章前面提到的 `get_current_news` 和 `get_current_weather` 这两个函数实现其 JSON-Schema 描述：
336 | 
337 | ```{#lst-la_fc_fd .python include="./code/functions_desc.py" code-line-numbers="true" lst-cap="待调用函数的函数描述"}
338 | ```
339 | 
340 | ```{#lst-la_fc_fd_qf_demo .python code-line-numbers="true" lst-cap="使用千帆 API 实现文心大模型的函数调用"}
341 | import qianfan
342 | 
343 | chat_comp = qianfan.ChatCompletion()
344 | 
345 | resp = chat_comp.do(model="ERNIE-Bot-4",  #<1>
346 |                     messages=[{"role": "user", "content": "北京的新闻是什么？"}], #<2>
347 |                     functions=functions) #<3>
348 |  
349 | print(resp)
350 | ```
351 | 
352 | 1. 指定采用的模型名称
353 | 2. 和大模型交互的消息列表
354 | 3. 告诉大模型我们有哪些函数可以调用，以及对应函数的具体描述，具体参见 @lst-la_fc_fd
355 | 
356 | @lst-la_fc_fd_qf_demo 的运行结果如下：
357 | 
358 | ```bash
359 | QfResponse(code=200, headers={...}, body={'id': 'as-cvbbn9t0vq', 'object': 'chat.completion', 'created': 1699708273, 'result': '', 'is_truncated': False, 'need_clear_history': False, 'function_call': {'name': 'get_current_news', 'thoughts': '用户想要知道北京的新闻。我可以使用get_current_news工具来获取这些信息。', 'arguments': '{"location":"北京"}'}, 'usage': {...})
360 | ```
361 | 
362 | 通过结果我们可以发现，文心大模型的 `函数调用` 和 OpenAI 的 `函数调用` 虽然不完全一致，但是还是非常相似的。对于有可以调用的函数时，文心大模型的返回结果中的 `resp.result` 为空，同时用 `resp.function_call` 存储针对当前问题，经过大模型分析后可以调用的函数以及调用函数时所用到的参数。具体接下来的函数调用过程，就和 OpenAI 一致了，可以参考 @lst-fc_openai。
363 | 
364 | ### 文心函数调用 LLMChain
365 | 目前，LangChain 并不支持像 @lst-fc_multi_fcs 那样，通过 `create_openai_fn_chain()` 来进行函数调用。如果要实现该通能，需要对 LangChain 进行扩展，增加 `create_ernie_fn_chai()`。可以参照 `create_openai_fn_chain()` 来实现 `create_ernie_fn_chain()`，具体需要修改的代码参考：[feat: add ERNIE-Bot-4 Function Calling](https://github.com/langchain-ai/langchain/pull/13320)。
366 | 
367 | :::{.callout-tip title="GOOD NEWS"}
368 | [feat: add ERNIE-Bot-4 Function Calling](https://github.com/langchain-ai/langchain/pull/13320) 已经合入 LangChain 的代码，LangChain 已经原生支持文心大模型的 `函数调用` 功能。为了兼容 `QianfanChatEndpoint`，我们对 `create_ernie_fn_chain()` 进行了升级，具体参见：[langchain/pull/14275](https://github.com/langchain-ai/langchain/pull/14275)。
369 | :::
370 | 
371 | :::{.callout-note}
372 | 因为文心大模型的返回有自己的特性，在调用文心 API 时，对于存在 `functions` 参数的场景，其请求结果中的 `function_call` 字段是独立于 `result` 字段单独存在的。
373 | 
374 | ```python
375 | QfResponse(code=200, headers={...}, body={'id': 'as-cvbbn9t0vq', 'object': 'chat.completion', 'created': 1699708273, 'result': '', 'is_truncated': False, 'need_clear_history': False, 'function_call': {'name': 'get_current_news', 'thoughts': '用户想要知道北京的新闻。我可以使用get_current_news工具来获取这些信息。', 'arguments': '{"location":"北京"}'}, 'usage': {...})
376 | ```
377 | 
378 | 而当前 LangChain 中对 LLM 返回的解析一般是对结果中的 `result` 字段进行解析。因此，要使用文心大模型的 `函数调用` 能力，同时还需要对 `ErnieBotChat` 进行升级。
379 | 
380 | ::: {.panel-tabset group="ernie_update"}
381 | 
382 | ## 方式一
383 | ```python
384 | def _create_chat_result(self, response: Mapping[str, Any]) -> ChatResult:
385 |     if 'function_call' in response:
386 |         function_call_str = '{{"function_call": {}}}'.format(
387 |             json.dumps(response.get("function_call")))
388 |         generations = [
389 |             ChatGeneration(message=AIMessage(content=function_call_str))
390 |         ]
391 |     else:
392 |         generations = [
393 |             ChatGeneration(message=AIMessage(content=response.get("result")))
394 |         ]
395 |     #...
396 | ```
397 | 
398 | ## 方式二
399 | 
400 | ```python
401 | def _create_chat_result(self, response: Mapping[str, Any]) -> ChatResult:
402 |     if "function_call" in response:
403 |          additional_kwargs = {
404 |                 "function_call": dict(response.get("function_call", {}))
405 |             }
406 |     else:
407 |         additional_kwargs = {}
408 |     generations = [
409 |         ChatGeneration(
410 |             message=AIMessage(
411 |                 content=response.get("result"),
412 |                 additional_kwargs={**additional_kwargs},
413 |             )
414 |         )
415 |     ]
416 |     # ...
417 | ```
418 | :::
419 | 
420 | :::
421 | 
422 | 完成如上的修改之后，可以像 @lst-fc_multi_fcs 那样来简化大语言模型的 `函数调用` 过程。
423 | 
424 | ::: {.panel-tabset group="ernie_fc_d"}
425 | 
426 | ## ErnieBotChat
427 | ```{#lst-fc_multi_fcs_ernie .python code-line-numbers="true" lst-cap="使用 ErnieBotChat 执行文心大模型的函数调用"}
428 | from langchain.chat_models import ErnieBotChat
429 | from langchain.prompts import ChatPromptTemplate
430 | from langchain.chains.ernie_functions import (
431 |     create_ernie_fn_chain,
432 | )
433 | 
434 | llm = ErnieBotChat(model_name="ERNIE-Bot-4")
435 | prompt = ChatPromptTemplate.from_messages(
436 |     [
437 |         ("human", "{query}"),
438 |     ]
439 | )
440 | 
441 | chain = create_ernie_fn_chain([get_current_weather, get_current_news], llm, prompt, verbose=True)
442 | res = chain.run("北京今天新闻是什么？")
443 | print(res)
444 | ```
445 | 
446 | ## QianfanChatEndpoint
447 | 
448 | ```{#lst-fc_multi_fcs_qianfan .python code-line-numbers="true" lst-cap="使用 QianfanChatEndpoint 执行文心大模型的函数调用"}
449 | ffrom langchain_community.chat_models import QianfanChatEndpoint
450 | from langchain_core.prompts.chat import (
451 |     ChatPromptTemplate,
452 | )
453 | from langchain.chains.ernie_functions import (
454 |     create_ernie_fn_chain,
455 | )
456 | 
457 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
458 | prompt = ChatPromptTemplate.from_messages(
459 |     [
460 |         ("human", "{query}"),
461 |     ]
462 | )
463 | 
464 | chain = create_ernie_fn_chain([get_current_weather, get_current_news], llm, prompt, verbose=True)
465 | res = chain.run("北京今天新闻是什么？")
466 | print(res)
467 | ```
468 | 
469 | :::
470 | @lst-fc_multi_fcs_ernie，@lst-fc_multi_fcs_qianfan 的运行结果如下：
471 | 
472 | ```bash
473 | > Entering new LLMChain chain...
474 | Prompt after formatting:
475 | Human: 北京今天新闻是什么？
476 | 
477 | > Finished chain.
478 | {'name': 'get_current_news', 'thoughts': '用户想要知道北京今天的新闻。我可以使用get_current_news工具来获取这些信息。', 'arguments': {'location': '北京'}}
479 | ```
480 | 
481 | 接下来，根据文心大模型的返回内容，同时根据之前所述的 OpenAI 的 `函数调用` 方式来调用大模型返回的函数并获取对应信息即可。
482 | 
483 | ## 根据 LLM 的返回调用对应函数
484 | 如前所述，LLMs 会根据当前的信息返回它认为我们应该调用的函数以及函数对应的参数，具体的函数执行还是需要我们手动执行。为了进一步简化该过程，我们对这个过程进行了抽象，具体如 @lst-la_wx_fc_run。
485 | 
486 | ```{#lst-la_wx_fc_run .python include="./code/langchain/utils/call_function.py" code-line-numbers="true" lst-cap="utils.call_function.call_function()"}
487 | ```
488 | 
489 | 1. 方便 LangSmith 可以追踪到函数调用，方便 DEBUG。
490 | 
491 | 通过文心大模型的函数调用解决我们的问题的完整代码如 @lst-la_wx_fc_demo_all 所示。
492 | 
493 | ```{#lst-la_wx_fc_demo_all .python include="./code/test_ernie_fc.py" code-line-numbers="true" lst-cap="文心大模型利用函数调用解决问题"}
494 | ```
495 | 
496 | @lst-la_wx_fc_demo_all 的执行结果如下所示：
497 | 
498 | ```bash
499 | > Entering new LLMChain chain...
500 | Prompt after formatting:
501 | Human: 北京今天的新闻是什么？
502 | 
503 | > Finished chain.
504 | 
505 | 
506 | > Entering new LLMChain chain...
507 | Prompt after formatting:
508 | Human: 从 get_current_news 中，我们得到如下信息：{"location": "\u5317\u4eac", "news": ["I have a Book.", "It's a nice day, today."]}，那么 北京今天的新闻是什么？
509 | 
510 | > Finished chain.
511 | 根据提供的信息，`get_current_news` 返回的数据中，"北京"的新闻有两条，分别是 "I have a Book." 和 "It's a nice day, today."。所以，北京今天的新闻包括这两条信息。
512 | ```
513 | 
514 | 整个函数调用的的整个过程如 @fig-fc_ls 所示。
515 | 
516 | ![函数调用的 Trace 图](./images/fc_ls.png){#fig-fc_ls}
517 | 
518 | ## 参考文献
519 | [^1]: [Function calling and other API updates](https://openai.com/blog/function-calling-and-other-api-updates)
520 | 
521 | [^2]: [Guides: Function calling](https://platform.openai.com/docs/guides/gpt/function-calling)
522 | 
523 | [^3]: [Using OpenAI functions](https://python.langchain.com/docs/modules/chains/how_to/openai_functions)
524 | 
525 | 


--------------------------------------------------------------------------------
/langchain_install.qmd:
--------------------------------------------------------------------------------
1 | # LangChain 安装指南 {#sec-langchain_install}
2 | 
3 | :::{.callout-tip title="版本建议"}
4 | 使用 LangChain，建议使用 Python 3.10版本。
5 | 
6 | 因为基于 LangChain 的生态对 Python 版本也会有不同的要求，例如 Langflow 要求 Python 版本在 3.9~3.11。因此，如果想使用 LangChain，最好采用 Python 3.10.10 版本。
7 | :::
8 | 
9 | 


--------------------------------------------------------------------------------
/langchain_intro.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | code-annotations: below
  5 | ---
  6 | 
  7 | # LangChain 简介 {#sec-LC_intro}
  8 | 
  9 | ::: {.callout-tip title="要点提示"} 
 10 | 我们不想花更多的力气来实现和LLM交互的流程，而是更关注于业务逻辑的实现；我们也不想重复编写相似的流程代码，而是可以共享我们的成果，别人只需要键入 `docker pull` 就可以使用我们的成果……如果你有这样的想法，那么 LangChain 正是你的菜~
 11 | 
 12 | * LangChain 的目标
 13 | * LangChain 的基本概念
 14 | * 使用 LangChain 和文心大模型交互
 15 | :::
 16 | 
 17 | 随着大型语言模型（LLM）的引入，自然语言处理已经成为互联网上的热门话题。LangChain 是一个开源 Python 框架，利用 LangChain，开发人员能够非常方便的开发基于大型语言模型的应用程序（AI 原生应用），例如：聊天机器人，摘要，生成式问答……。
 18 | 
 19 | LangChain 虽然是一个非常年轻的框架，但又是一个发展速度非常快的框架。自从 2022 年 10 月 25 在 GitHub [第一次提交](https://github.com/langchain-ai/langchain/commit/18aeb7201)以来，在 11 个 月的时间里，累计发布了 200 多次，累计提交 [4000](https://github.com/langchain-ai/langchain/graphs/commit-activity) 多次代码。2023 年 3 月，ChatGPT 的 API 因升级降价大受欢迎，LangChain 的使用也随之爆炸式增长。
 20 | 
 21 | ![LangChain 代码提交趋势](images/langchain_commit_counts.jpg){#fig-langchain_github_commit_history}
 22 | 
 23 | 之后，LangChain 在没有任何收入也没有任何明显的创收计划的情况下，获得了 1000 万美元的种子轮融资和 2000-2500 万美元的 A 轮融资，估值达到 2 亿美元左右。[^7]
 24 | 
 25 | 作为一个年轻而又活力的框架，LangChain 正在彻底改变工业和技术，改变我们与技术的每一次互动。
 26 | 
 27 | 2024 年 1 月 8 日，经过了 1 年多 的迭代，LangChain 发布了第一个稳定版本 0.1.0 版本，这个版本完全向后兼容，并且有 `python` 和 `javascript` 两个版本。稳定版本的发布，无疑为社区广大开发者注入了 LLM 开发的强进信息。0.1.0 版本的主要改动大致如下[^8]：
 28 | 
 29 | * 架构上分为两大部分：
 30 |   * 主要的概念抽象、接口、核心功能，也就是 [`langchain-core`](https://github.com/langchain-ai/langchain/tree/master/libs/core?ref=blog.langchain.dev) 部分，该部分的代码是稳定的，并且会保持着非常严格的版本控制策略。
 31 |   * 第三方软件包，主要位于 [`langchain-community`](https://github.com/langchain-ai/langchain/tree/master/libs/community?ref=blog.langchain.dev) 和 [langchain-partners](https://github.com/langchain-ai/langchain/tree/master/libs/partners?ref=blog.langchain.dev) 部分，这种方式对于第三方的集成会更加友好。
 32 | * 通过引入 `LangSmith`（@sec-LS_intro） 增加 LangChain 的可观测性，这一点我认为是非常大的突破，这会更加便于我们使用 LangChain 开发 LLM 应用。
 33 | * Stream 响应的支持。LLM 有时可能需要一段时间才能做出响应，通过 Stream 方式的响应，LangChain 可以向用户展示结果的生成过程，就像我们使用 `文心一言` 或者 `ChatGPT` 显示的那样。
 34 | * Agent 架构的调整，Agent 的初始化方式以及调用方式都做了升级，在使用时大家要特别注意。
 35 | 
 36 | :::{.callout-tip}
 37 | 可以使用 @sec-lf_langflow 中介绍的 Langflow 来对 LangChain 进行可视化操作。
 38 | :::
 39 | 
 40 | ## LangChain 的目标
 41 | 不同的大语言模型都有各自的优势，我们可能会用 A 模型来进行自然语言理解，然后用 B 模型进行逻辑推理并获取结果……此时，如果使用大语言模型各自提供的 API 来和模型交互，那么就会存在非常多的重复工作。
 42 | 
 43 | 虽然大语言模型有很多，但是和大语言模型的交互流程又是非常类似（如 @fig-interact_with_llm 所示），如果每次和模型交互都需要重复如上的步骤，那听起来也是一件非常繁琐的事情。对于相同的提示词，我们不想每次都 `ctr+c`、`ctr+v`，这真是一件非常可怕的事情。
 44 | ```{mermaid}
 45 | %%| fig-cap: "和模型交互的流程"
 46 | %%| label: fig-interact_with_llm
 47 | 
 48 | flowchart LR
 49 |   A(构造提示词) --> B(LLMs)
 50 |   B --> C(模型生成结果)
 51 |   C --> D(结果处理)
 52 |   D --> E(最终结果)
 53 | ```
 54 | 
 55 | 和 FFmpeg 对视频的处理一样，FFmpeg 提供的 `filtergraph` [^1]机制大大增强了其音视频的处理能力，奠定其在视音频领域的地位。`filtergraph` 可以将不同的音视频处理能力以链条的形式组合起来，不但简化了音视频的处理流程，更让 FFmpeg 可以实现复杂的音视频处理。
 56 | 
 57 | 同理，和 LLMs 的单次交互并不会形成什么惊人的能量，而如果可以使用类似 `filtergraph` 的机制，将与 LLMs 的多次交互整合起来，那么其所释放的能量将是无穷的。
 58 | 
 59 | 而 LangChain 就是为了解决如上的问题而产生的。LangChain 可以提供给我们的最主要的价值如下[^2]：
 60 | 
 61 | * 组件化：LangChain 对与 LLMs 交互的流程进行了统一的抽象，同时也提供了不同 LLMs 的实现。这极大的提升了我们使用 LLMs 的效率。
 62 | * 序列化：LangChain 提供的序列化的能力，可以将`提示词`、`chain`等以文件的形式而不是以代码的形式进行存储，这样可以极大的方便我们共享 `提示词`，并对 `提示词` 进行版本管理。[^3]
 63 | * 丰富的 chains 套件：LangChain 提供了丰富、用于完成特定目的、开箱即用的 chains 套件，例如用于总结文档的 ` StuffDocumentsChain` 和 `MapReduceDocumentsChain`，这些套件将会降低我们使用 LLMs 的门槛。
 64 | 
 65 | 更具体的， LangChain 可以在如下的 6 大方向上给我们提供非常大的便利：
 66 | 
 67 | 1. **LLMs & Prompt**：LangChain 提供了目前市面上几乎所有 LLM 的通用接口，同时还提供了 `提示词` 的管理和优化能力，同时也提供了非常多的相关适用工具，以方便开发人员利用 LangChain 与 LLMs 进行交互。
 68 | 2. **Chains**：LangChain 把 `提示词`、`大语言模型`、`结果解析` 封装成 `Chain`，并提供标准的接口，以便允许不同的 `Chain` 形成交互序列，为 AI 原生应用提供了端到端的 `Chain`。
 69 | 3. **Data Augemented Generation**[^4]：`数据增强生成式` 是一种解决预训练语料数据无法及时更新而带来的回答内容陈旧的方式。LangChain 提供了支持 `数据增强生成式` 的 `Chain`，在使用时，这些 `Chain` 会首先与外部数据源进行交互以获得对应数据，然后再利用获得的数据与 `LLMs` 进行交互。典型的应用场景如：基于特定数据源的问答机器人。
 70 | 4. **Agent**：对于一个任务，`代理` 主要涉及让 `LLMs` 来对任务进行拆分、执行该行动、并观察执行结果，`代理` 会重复执行这个过程，直到该任务完成为止。LangChain 为 `代理` 提供了标准接口，可供选择的代理，以及一些端到端的 `代理` 的示例。
 71 | 5. **Memory**：`内存` 指的是 chain 或 agent 调用之间的状态持久化。LangChain 为 `内存` 提供了标准接口，并提供了一系列的 `内存` 实现。
 72 | 6. **Evaluation**：LangChain 还提供了非常多的评估能力以允许我们可以更方便的对 `LLMs` 进行评估。
 73 | 
 74 | :::{.callout-tip title="LangChain 安装"}
 75 | LangChain 的安装可以参见 @sec-langchain_install。
 76 | :::
 77 | 
 78 | ## LangChain 的基本概念
 79 | 使用 LLMs 和使用电脑一样，需要一些基本的架构体系。LangChain 把整体架构体系分为两部分：输入/输出系统，大语言模型。其中，输入部分为 `Prompt` 相关组件，输出为 `Output Parser` 相关组件。具体参见 @fig-langchain_io。
 80 | 
 81 | ![LangChain I/O](./images/langchain_io.png){#fig-langchain_io}
 82 | 
 83 | LangChain 提供了与 LLMs 交互的通用构件：
 84 | 
 85 | * `Prompts`：提示词模版，提示词动态选择，提示词序列化。
 86 | * `LLMs`：与 LLM 交互的通用接口。
 87 | * `Output Parsers`：对模型的输出信息进行解析，以输出符合特定格式的响应。
 88 | 
 89 | ![LangChain I/O 示例](./images/langchain_io_example.jpeg){#fig-langchain_io_example}
 90 | 
 91 | ### Prompt Templates
 92 | 提示词模版为不同的提示词提供预定义格式。就好像目前超市售卖的洗净切好、配好相关配菜源材料的预制菜一样，提示词模版可以简化我们和 LLMs 交互的效率。
 93 | 
 94 | 模版会包含：指令，少量的样本示例，相关的上下文信息。如 @sec-LLMs 所述，LLMs 会分为 `大语言模型` 和 `聊天模型` 两种类型，因此，LangChain 提供了两种类型的提示词模版：`prompt template`、`chat prompt template`。
 95 | 
 96 | * `prompt template`：提供字符串格式的提示词。
 97 | * `chat prompt template`：提示聊天消息格式的提示词。
 98 | 
 99 | ::: {.panel-tabset group="lc_i_prompt_demo"}
100 | 
101 | ## 0.1.0 版本
102 | ```{#lst-prompt_n .python lst-cap="PromptTemplte 示例"}
103 | from langchain_core.prompts import PromptTemplate
104 | 
105 | prompt_template = PromptTemplate.from_template(
106 |     "请以轻松欢快的语气写一篇描写 {topic} 的文章，字数不超过 {count} 字。"
107 | )
108 | res = prompt_template.format(topic="北京的秋天", count="100")
109 | 
110 | print(res)
111 | # 请以轻松欢快的语气写一篇描写 北京的秋天 的文章，字数不超过 100 字。
112 | ```
113 | 
114 | ## 0.0.xxx 版本
115 | ```{#lst-prompt .python lst-cap="PromptTemplte 示例"}
116 | from langchain import PromptTemplate
117 | 
118 | prompt_template = PromptTemplate.from_template(
119 |     "请以轻松欢快的语气写一篇描写 {topic} 的文章，字数不超过 {count} 字。"
120 | )
121 | res = prompt_template.format(topic="北京的秋天", count="100")
122 | 
123 | print(res)
124 | # 请以轻松欢快的语气写一篇描写 北京的秋天 的文章，字数不超过 100 字。
125 | ```
126 | :::
127 | 
128 | ::: {.panel-tabset group="lc_i_chat_prompt_demo"}
129 | ## 0.1.0 版本
130 | ```{#lst-chat_prompt_n .python lst-cap="ChatPromptTemplte 示例"}
131 | from langchain_core.prompts.chat import ChatPromptTemplate
132 | 
133 | template = ChatPromptTemplate.from_messages([
134 |     ("system", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
135 |     ("human", "你好！"),
136 |     ("ai", "你好~"),
137 |     ("human", "{user_input}"),
138 | ])
139 | 
140 | messages = template.format_messages(
141 |     name="小明",
142 |     user_input="你是谁？"
143 | )
144 | 
145 | print(messages)
146 | # [SystemMessage(content='你是一个能力非凡的人工智能机器人，你的名字是 小明。', 
147 | #                additional_kwargs={}), 
148 | # HumanMessage(content='你好！', additional_kwargs={}, example=False), 
149 | # AIMessage(content='你好~', additional_kwargs={}, example=False), 
150 | # HumanMessage(content='你是谁？', additional_kwargs={}, example=False)]
151 | ```
152 | 
153 | ## 0.0.xxx 版本
154 | ```{#lst-chat_prompt .python lst-cap="ChatPromptTemplte 示例"}
155 | from langchain.prompts import ChatPromptTemplate
156 | 
157 | template = ChatPromptTemplate.from_messages([
158 |     ("system", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
159 |     ("human", "你好！"),
160 |     ("ai", "你好~"),
161 |     ("human", "{user_input}"),
162 | ])
163 | 
164 | messages = template.format_messages(
165 |     name="小明",
166 |     user_input="你是谁？"
167 | )
168 | 
169 | print(messages)
170 | # [SystemMessage(content='你是一个能力非凡的人工智能机器人，你的名字是 小明。', 
171 | #                additional_kwargs={}), 
172 | # HumanMessage(content='你好！', additional_kwargs={}, example=False), 
173 | # AIMessage(content='你好~', additional_kwargs={}, example=False), 
174 | # HumanMessage(content='你是谁？', additional_kwargs={}, example=False)]
175 | ```
176 | :::
177 | 
178 | ### LLMs {#sec-LLMs}
179 | LangChain 提供了两种模型的通用接口：
180 | 
181 | * `LLMs`：模型以字符串格式的提示词作为输入，并返回字符串格式的结果。
182 | * `Chat models`：其背后也是由某种 LLM 来支撑，但是以聊天消息列表格式的提示词作为输入，并返回聊天消息格式的结果。
183 | 
184 | :::{.callout-note title="LLMs & Chat Models"}
185 | LLM 和 聊天模式 之间的区别虽然很微妙，但是却完全不同。
186 | 
187 | LangChain 中的 LLM 指的是纯文本 I/O 的模型，其包装的 API 将字符串提示作为输入，并输出字符串。OpenAI 的 GPT-3 就是 LLM。
188 | 
189 | 聊天模型通常由 LLM 支持，但专门针对对话进行了调整，其 API 采用聊天消息列表作为输入，而不是单个字符串。通常，这些消息都标有角色（例如，“System”，“AI”，“Human”）。聊天模型会返回一条 AI 聊天消息作为输出。OpenAI 的 GPT-4，Anthropic 的 Claude，百度的 Ernie-Bot 都是聊天模型。
190 | :::
191 | 
192 | 在 LangChain 中，LLM 和 聊天模式两者都实现了 `BaseLanguageModel` 接口，因此一般情况下，这两种模型可以混用。例如，两种模型都实现了常见的方法 `predict()` 和 `predict_messages()`。`predict()` 接受字符串并返回字符串，`predict_messages()` 接受消息并返回消息。
193 | 
194 | ```{#lst-basellm .python lst-cap="LLM 模式"}
195 | class OpenAI(BaseOpenAI):
196 |     # ...
197 | 
198 | class BaseOpenAI(BaseLLM):
199 |     # ...
200 | 
201 | class BaseLLM(BaseLanguageModel[str], ABC):
202 |     # ...
203 | ```
204 | 
205 | ```{#lst-chatmodel .python lst-cap="聊天模型"}
206 | class ErnieBotChat(BaseChatModel):
207 |     # ...
208 | 
209 | class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC):
210 |     # ...
211 | ```
212 | 
213 | 接下来，我们将 `Prompt` 和 `LLM` 整合起来，实现和大语言模型交互。
214 | 
215 | ::: {.panel-tabset group="llm_example_demo"}
216 | 
217 | ## 0.1.0 版本
218 | ```{#lst-llm_example_n .python lst-cap="LLM 模型示例"}
219 | from langchain_core.prompts import PromptTemplate
220 | from langchain_openai import OpenAI
221 | 
222 | prompt_template = PromptTemplate.from_template(
223 |     "请以轻松欢快的语气写一篇描写 {topic} 的文章，字数不超过 {count} 字。"
224 | )
225 | llm = OpenAI()
226 | 
227 | prompt = prompt_template.format(topic="北京的秋天", count="100")
228 | res = llm.invoke(prompt)
229 | print(res)
230 | 
231 | # 秋天来到了北京，一片金黄色的枫叶，漫山遍野。
232 | # 湖面上的微风，吹起柔和的秋意，空气中弥漫着淡淡的枫香。
233 | # 这时，每一个角落都洋溢着秋日的温馨，令人心旷神怡。
234 | # 古老的长城上披着红叶，熙熙攘攘的人群中，也多了几分热闹与欢畅，这就是北京的秋天
235 | ```
236 | 
237 | ## 0.0.xxx 版本
238 | ```{#lst-llm_example .python lst-cap="LLM 模型示例"}
239 | from langchain import PromptTemplate
240 | from langchain.llms import OpenAI
241 | 
242 | prompt_template = PromptTemplate.from_template(
243 |     "请以轻松欢快的语气写一篇描写 {topic} 的文章，字数不超过 {count} 字。"
244 | )
245 | llm = OpenAI()
246 | 
247 | prompt = prompt_template.format(topic="北京的秋天", count="100")
248 | res = llm.predict(prompt)
249 | print(res)
250 | 
251 | # 秋天来到了北京，一片金黄色的枫叶，漫山遍野。
252 | # 湖面上的微风，吹起柔和的秋意，空气中弥漫着淡淡的枫香。
253 | # 这时，每一个角落都洋溢着秋日的温馨，令人心旷神怡。
254 | # 古老的长城上披着红叶，熙熙攘攘的人群中，也多了几分热闹与欢畅，这就是北京的秋天
255 | ```
256 | :::
257 | 
258 | 由于文心聊天模型对 `message` 角色和条数有限制[^5] [^6]，因此我们需要对 `提示词` 做一些修改。
259 | 
260 | ```{#lst-chatmodel_example .python lst-cap="聊天模型示例"}
261 | from langchain.chat_models import ErnieBotChat
262 | from langchain.prompts import ChatPromptTemplate
263 | 
264 | template = ChatPromptTemplate.from_messages([
265 |     ("user", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
266 |     ("assistant", "你好~"),
267 |     ("user", "{user_input}"),
268 | ])
269 | chat = ErnieBotChat()
270 | 
271 | messages = template.format_messages(
272 |     name="小明",
273 |     user_input="你是谁？"
274 | )
275 | 
276 | res = chat.predict_messages(messages)
277 | print(res)
278 | # content='我是你的新朋友小明，一个拥有先进人工智能技术的人工智能机器人。' 
279 | # additional_kwargs={} example=False
280 | ```
281 | 
282 | #### 文心 4.0
283 | 在 LangChain 中，要使用 文心 4.0 模型，可以在初始化 LLM 时设置 `model_name` 参数为 `ERNIE-Bot-4`。
284 | 
285 | ```python
286 | llm = ErnieBotChat(model_name="ERNIE-Bot-4")
287 | ```
288 | 
289 | #### 百度千帆
290 | 根据 LangChain 官网的 [ErnieBotChat 文档](https://python.langchain.com/docs/integrations/chat/ernie)，已经不建议再使用 `ErnieBotChat` 进行文心大模型的调用，并且建议使用 [百度千帆 `QianfanChatEndpoint`](https://python.langchain.com/docs/integrations/chat/baidu_qianfan_endpoint)。
291 | 
292 | 建议使用百度千帆（`QianfanChatEndpoint`）主要基于如下的因素：
293 | 
294 | * QianfanChatEndpoint 支持千帆平台中的更多LLM
295 | * QianfanChatEndpoint 支持 Stream 传输
296 | * QianfanChatEndpoint 支持函数调用
297 | 
298 | 但是，除了 Stream 传输外，其余的两个优势目前 `ErnieBotChat` 也都具备了，并且 `ErnieBotChat` 的优点还在于不需要引入额外的 `qianfan` 库。所以在二者的使用上大家根据自己的具体需求来选择就好。
299 | 
300 | 在使用 `QianfanChatEndpoint` 时，需要将 `ernie_client_id` 改为 `qianfan_ak`，把 `ernie_client_secret` 改为 `qianfan_sk`。
301 | 
302 | ```bash
303 | export ERNIE_CLIENT_ID="……"
304 | export ERNIE_CLIENT_SECRET="……"
305 | export QIANFAN_AK="${ERNIE_CLIENT_ID}"
306 | export QIANFAN_SK="${ERNIE_CLIENT_SECRET}"
307 | ```
308 | 
309 | :::{.callout-caution}
310 | `langchain-core` 0.1.13 版本对于 `chunk meta` 做了特殊处理，因此在该版本下使用 `QianfanChatEndpoint` 会导致异常。目前，社区也已经有相关 [PR](https://github.com/langchain-ai/langchain/pull/16464) 在解决该问题，我们静待花开。
311 | 
312 | ```bash
313 | > Entering new AgentExecutor chain...
314 | Additional kwargs key created already exists in left dict and value has unsupported type <class 'int'>.
315 | {}
316 | ```
317 | 
318 | ![langchain-core 0.1.13 版本下千帆异常](./images/langchain_core_0113_qianfan_error.png){#fig-qianfan_error}
319 | 
320 | 如果遇到这种情况，可以对 `langchain-core` 降级到 0.1.12 版本来解决该问题。
321 | ```bash
322 | pip install -U "langchain-core==0.1.12"
323 | ```
324 | 
325 | :::
326 | 
327 | ::: {.panel-tabset group="ernie_and_qianfan"}
328 | ## QianfanChatEndpoint
329 | 
330 | ```{#lst-lc_intro_qianfan .python include="./code/test_qianfanendpoint.py" code-line-numbers="true" lst-cap="使用 QianfanChatEndpoint 调用文心大模型"}
331 | ```
332 | 
333 | ## ErnieBotChat
334 | ```{#lst-lc_intro_wx_qianfan .python include="./code/test_wx_qianfan.py" code-line-numbers="true" lst-cap="使用 ErnieBotChat 调用文心大模型"}
335 | ```
336 | :::
337 | 
338 | ### Output Parsers
339 | 大语言模型一般会输出文本内容作为响应，当然更高级的大语言模型（例如文心大模型）还可以输出图片、视频作为响应。但是，很多时候，我们希望可以获得更结构化的信息，而不仅仅是回复一串字符串文本。
340 | 
341 | 我们可以使用 `提示词工程` 来提示 LLMs 输出特定的格式，如 @lst-llm-prompt-parser 所示：
342 | 
343 | ```{#lst-llm-prompt-parser .python lst-cap="使用提示词工程来格式化输出内容"}
344 | from langchain_community.chat_models import QianfanChatEndpoint
345 | from langchain_core.prompts.chat import ChatPromptTemplate
346 | 
347 | template = ChatPromptTemplate.from_messages([
348 |     ("user", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
349 |     ("assistant", "你好~"),
350 |     ("user", "{user_input}"),
351 | ])
352 | 
353 | chat = QianfanChatEndpoint(model="ERNIE-Bot-4")
354 | 
355 | messages = template.format_messages(
356 |     name="小明",
357 |     user_input="请给出 10 个表示快乐的成语，并输出为 JSON 格式"
358 | )
359 | 
360 | res = chat.invoke(input=messages)
361 | print(res)
362 | 
363 | # content='```json\n[\n    "乐不可支",
364 | #                    \n    "喜从天降",
365 | #                    \n    "笑逐颜开",
366 | #                    \n    "手舞足蹈",
367 | #                     ......
368 | #                    \n    "弹冠相庆"\n]\n```' 
369 | # additional_kwargs={} example=False
370 | ```
371 | 
372 | 但是，使用 LangChain 提供的 `Output Parsers` 能力，会更加的方便。
373 | 
374 | ```{#lst-prompt-llm-parse .python lst-cap="使用 Output Parser 解析 LLM 结果"}
375 | from langchain_community.chat_models import QianfanChatEndpoint
376 | from langchain_core.prompts.chat import ChatPromptTemplate
377 | from langchain.output_parsers import CommaSeparatedListOutputParser
378 | 
379 | template = ChatPromptTemplate.from_messages([
380 |     ("user", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
381 |     ("assistant", "你好~"),
382 |     ("user", "{user_input}"),
383 | ])
384 | 
385 | chat = QianfanChatEndpoint(model="ERNIE-Bot-4")
386 | 
387 | messages = template.format_messages(
388 |     name="小明",
389 |     user_input="请给出 10 个表示快乐的成语，并输出为 JSON 格式"
390 | )
391 | 
392 | res = chat.invoke(input=messages)
393 | print(res)
394 | 
395 | # content='```json\n[\n    "乐不可支",
396 | #                    \n    "喜从天降",
397 | #                    \n    "笑逐颜开",
398 | #                    \n    "手舞足蹈",
399 | #                     ......
400 | #                    \n    "弹冠相庆"\n]\n```' 
401 | # additional_kwargs={} example=False
402 | 
403 | output_parser = CommaSeparatedListOutputParser()
404 | res =  output_parser.parse(res.content.replace('，', ', '))
405 | print(res)
406 | # ['乐不可支', '喜从天降', '笑逐颜开', '手舞足蹈', '弹冠相庆']
407 | ```
408 | 
409 | :::{.callout-warning}
410 | 由于文心大模型的指令遵循能力还有进一步提升的空间，因此这里的演示可能需要进行一些额外的操作，例如需要对模型返回的内容进行一些简单的字符串替换。
411 | 
412 | 2023 年 10 月 17 日，百度世界大会上发布了 文心 4.0，我们发现 文心 4.0 在 ICL、指令遵循、推理能力上都有比较大的提升。
413 | 
414 | 在 LangChain 中，要使用 文心 4.0 模型，可以在初始化 LLM 时设置 `model_name` 参数为 `ERNIE-Bot-4`。
415 | 
416 | ```python
417 | llm = QianfanChatEndpoint(model="ERNIE-Bot-4")
418 | ```
419 | :::
420 | 
421 | ### LLMChain
422 | 虽然一台独立的计算机也能实现很强大的功能，但是通过网络将更多的计算机链接起来，可能发挥出更大的性能。同样的，单独使用 LLMs 已经可以实现强大的功能，但是如果可以将更多次的交互有效的链接起来，则能发挥 LLMs 更大的能量。为了实现这个目标，LangChain 提供了 `Chain` 的概念，以实现对不同组件的一系列调用。
423 | 
424 | 在 LangChain 中，`提示词`、`LLM`、`输出解析` 这三者构成了 `Chain`，而不同的 `Chain` 则可以通过一定的方式链接起来，以实现强大的功能。具体如 @fig-chain_concept 所示。
425 | 
426 | ![LangChain 中 Chain 的概念](./images/chain.png){#fig-chain_concept}
427 | 
428 | 利用 `Chain` 的概念，我们可以对 @lst-prompt-llm-parse 的代码进行重构，
429 | 
430 | ```{#lst-chain_example .python lst-cap="使用 chain 与文心大模型进行交互"}
431 | from langchain_community.chat_models import QianfanChatEndpoint
432 | from langchain_core.prompts.chat import ChatPromptTemplate
433 | from langchain.output_parsers import CommaSeparatedListOutputParser
434 | from langchain.chains import LLMChain
435 | 
436 | template = ChatPromptTemplate.from_messages([
437 |     ("system", "你是一个能力非凡的人工智能机器人，你的名字是 {name}。"),
438 |     ("user", "{user_input}"),
439 | ])
440 | chat = QianfanChatEndpoint(model="ERNIE-Bot-4")
441 | 
442 | chain =  LLMChain(llm=chat, prompt=template, output_parser=CommaSeparatedListOutputParser())
443 | 
444 | query = "请仅给5个表示快乐的成语并以 , 分隔，除了成语外不要输出任何其他内容"
445 | res =  chain.invoke(input={"name": "小明", "user_input": query})
446 | 
447 | print(res)
448 | # ['以下是五个表示快乐的成语：\n\n1. 喜出望外\n2. 乐不可支\n3. 心花怒放\n4. 满心欢喜\n5. 手舞足蹈']
449 | ```
450 | 
451 | ## LangChain 的学习资料
452 | 
453 | * LangChain 官方文档：[https://python.langchain.com/docs/get_started](https://python.langchain.com/docs/get_started)
454 | * LangChain 的典型应用场景：[https://python.langchain.com/docs/use_cases](https://python.langchain.com/docs/use_cases)
455 | * LangChain 目前集成的能力：[https://python.langchain.com/docs/integrations](https://python.langchain.com/docs/integrations)
456 | * LangChain AI Handbook：[https://www.pinecone.io/learn/series/langchain/](https://www.pinecone.io/learn/series/langchain/)
457 | * LangChain Dart：[https://langchaindart.com/#/](https://langchaindart.com/#/)
458 | * 百度智能云千帆大模型平台：[https://cloud.baidu.com/product/wenxinworkshop](https://cloud.baidu.com/product/wenxinworkshop)
459 | * Langflow 官方文档：[https://docs.langflow.org/](https://docs.langflow.org/)
460 | 
461 | ## 参考文献
462 | [^1]: [FFmpeg Filters Documentation](https://ffmpeg.org/ffmpeg-filters.html)
463 | [^2]: [LangChain Introdction](https://python.langchain.com/docs/get_started/introduction)
464 | [^3]: [Prompt Serialization](https://python.langchain.com/docs/modules/model_io/prompts/prompt_templates/prompt_serialization)
465 | [^4]: [A Complete Guide to Data Augmentation](https://www.datacamp.com/tutorial/complete-guide-data-augmentation)
466 | [^5]: [ERNIE-Bot-turbo](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/4lilb2lpf)
467 | [^6]: [百度智能云千帆大模型平台](https://cloud.baidu.com/product/wenxinworkshop)
468 | [^7]: [LangChain 估值](https://ecosystem.lafrenchtech.com/companies/langchain)
469 | [^8]: [LangChain v0.1.0](https://blog.langchain.dev/langchain-v0-1-0/)
470 | 


--------------------------------------------------------------------------------
/langchain_openai_assistant.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # LangChain OpenAI Assistant {#sec-assistant}


--------------------------------------------------------------------------------
/langchain_retrieval.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | ---
  5 | 
  6 | # LangChain Retrieval 
  7 | 
  8 | 在 @sec-RAG 中，我们介绍了基于检索增强的生成式技术，这一章，我们重点介绍如何使用 LangChain 实现 RAG。
  9 | 
 10 | 无论是简单的 RAG 应用，还是复杂的 RGA 应用，LangChain 都为我们提供了相应的构建能力。在 LangChain 中，RAG 的整个过程涉及到如 @fig-rag_langchain_overview 的模块和步骤：
 11 | 
 12 | ![LangChain 中 RAG 的关键模块](./images/rag_langchain_overview.jpeg){#fig-rag_langchain_overview}
 13 | 
 14 | ## Document loaders
 15 | LangChain 提供了[100多种不同的文档加载器](https://python.langchain.com/docs/integrations/document_loaders)，并与该领域的其他主要供应商（如 [AirByte](https://airbyte.com/)、[Unstructured](https://unstructured.io/)）进行了集成，从而可以从任何地方（私有 s3 存储、网站）加载任何类型的文档（HTML、PDF、代码）。
 16 | 
 17 | 文档加载器提供了一个 `load()` 方法来从指定的加载源加载文档数据。文档加载器还提供了一个 `lazy_load()` 方法来实现现“延迟加载”，以避免一次将太多的数据加载到内存之中。
 18 | 
 19 | ```{#lst-langchain_loader .python lst-cap="加载远程网页"}
 20 | from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
 21 | 
 22 | URL_ROOT = "https://wangwei1237.github.io/"
 23 | loader = RecursiveUrlLoader(url=URL, max_depth=2)
 24 | docs = loader.load()
 25 | 
 26 | print(len(docs))
 27 | 
 28 | URLS = []
 29 | for doc in docs:
 30 |     url   =  doc.metadata["source"]
 31 |     title = doc.metadata["title"]
 32 |     print(url, "->", title)
 33 | ```
 34 | 
 35 | :::{.callout-warning}
 36 | `RecursiveUrlLoader()` 对中文的抓取看起来不是非常友好，中文内容显示成了乱码。可以使用 @lst-langchain_loader_2 所示的方法来解决中文乱码的问题，不过这种方式的缺点是需要 `load()` 两次。更好的方式后续再思考。
 37 | :::
 38 | 
 39 | ```{#lst-langchain_loader_2 .python lst-cap="解决中文乱码的方法"}
 40 | from langchain.document_loaders import WebBaseLoader
 41 | from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
 42 | 
 43 | URL_ROOT = "https://wangwei1237.github.io/"
 44 | loader = RecursiveUrlLoader(url=URL_ROOT, max_depth=2)
 45 | docs = loader.load()
 46 | 
 47 | print(len(docs))
 48 | 
 49 | URLS = []
 50 | for doc in docs:
 51 |     url   =  doc.metadata["source"]
 52 |     URLS.append(url)
 53 | 
 54 | loader = WebBaseLoader(URLS)
 55 | docs = loader.load()
 56 | 
 57 | print(len(docs))
 58 | 
 59 | for doc in docs:
 60 |     url   =  doc.metadata["source"]
 61 |     title =  doc.metadata["title"]
 62 |     print(url, "->", title)
 63 | 
 64 | ```
 65 | 
 66 | ## Document transformers
 67 | 检索的一个关键部分是**只获取文档的相关部分**而非获取全部文档。为了为最终的检索提供最好的文档，我们需要对文档进行很多的转换，这里的主要方法之一是将一个大文档进行拆分。LangChain 提供了[多种不同的拆分算法](https://python.langchain.com/docs/integrations/document_transformers)，并且还针对特定文档类型（代码、标记等）的拆分提供对应的优化逻辑。
 68 | 
 69 | 文档加载后，我们通常会对文档进行一系列的转换，以更好地适应我们的应用程序。最简单的文档转换的场景就是文档拆分成，以便可以满足模型的上下文窗口（不同模型的每次交互的最大 token 数可能不同）。
 70 | 
 71 | 尽管文档拆分听起来很简单，但实际应用中却有很多潜在的复杂性。理想情况下，我们希望将语义相关的文本片段放在一起。“语义相关”的含义会取决于文本的类型，例如：
 72 | 
 73 | * 对于代码文件而言，我们需要将一个函数置于一个完整的拆分块中；
 74 | * 普通的文本而言，可能需要将一个段落置于一个完整的拆分块中；
 75 | * ……
 76 | 
 77 | 我们利用 `RecursiveCharacterTextSplitter` 对 @lst-langchain_loader_2 的文档进行拆分。
 78 | 
 79 | ```{#lst-langchain_transfer .python lst-cap="使用 RecursiveCharacterTextSplitter 拆分文档"}
 80 | # ...
 81 | # ...
 82 | text_splitter = RecursiveCharacterTextSplitter(
 83 |     chunk_size = 1000,
 84 |     chunk_overlap  = 20,
 85 |     length_function = len,
 86 |     add_start_index = True,
 87 | )
 88 | 
 89 | for doc in docs:
 90 |     url   =  doc.metadata["source"]
 91 |     title =  doc.metadata["title"]
 92 |     print(url, "-->", title)
 93 |     texts = text_splitter.create_documents([doc.page_content])
 94 |     print(texts)
 95 | ```
 96 | 
 97 | LangChain 也可以对不同的编程语言进行拆分，例如 cpp，go，markdown，……，具体支持的语言可以参见 @lst-langchain_transfer_2。
 98 | 
 99 | ```{#lst-langchain_transfer_2 .python lst-cap="LangChain 支持拆分的语言类型"}
100 | from langchain.text_splitter import Language
101 | 
102 | [e.value for e in Language]
103 | 
104 | #['cpp',
105 | # 'go',
106 | # 'java',
107 | # 'js',
108 | # 'php',
109 | # 'proto',
110 | # 'python',
111 | # 'rst',
112 | # 'ruby',
113 | # 'rust',
114 | # 'scala',
115 | # 'swift',
116 | # 'markdown',
117 | # 'latex',
118 | # 'html',
119 | # 'sol']
120 | ```
121 | 
122 | ## Text embedding models
123 | 检索的另一个关键部分是为文档创建其向量（embedding）表示。Embedding 捕获文本的语义信息，使我们能够快速、高效地查找其他相似的文本片段。LangChain 集成了 [25 种不同的 embedding 供应商和方法](https://python.langchain.com/docs/integrations/text_embedding)，我们可以根据我们的具体需求从中进行选择。LangChain 还提供了一个标准接口，允许我们可以便捷的在不同的 embedding 之间进行交换。
124 | 
125 | 在 LangChain 中，`Embeddings` 类是用于文本向量模型的接口。目前，有很多的向量模型供应商，例如：OpenAI，Cohere，Hugging Face，……`Embeddings` 类的目的就是为所有这些向量模型提供统一的、标准的接口。
126 | 
127 | `Embeddings` 类可以为一段文本创建对应的向量表示，从而允许我们可以在向量空间中去考虑文本。在向量空间中，我们还可以执行语义搜索，从而允许我们在向量空间中检索最相似的文本片段。
128 | 
129 | 因为不同的向量模型供应商对文档和查询采用了不同的向量方法，`Embeddings` 提供了两个方法：
130 | 
131 | * `embed_documents()`：用于文档向量化
132 | * `embed_query()`：用于查询向量化
133 | 
134 | ```{#lst-langchain_embed_query_wx .python lst-cap="使用文心大模型的 Embedding-V1 查询向量化"}
135 | from langchain.embeddings import QianfanEmbeddingsEndpoint 
136 | 
137 | embeddings = QianfanEmbeddingsEndpoint()
138 | query_result = embeddings.embed_query("你是谁？")
139 | print(query_result)
140 | print(len(query_result))
141 | 
142 | # [0.02949424833059311, -0.054236963391304016, -0.01735987327992916, 
143 | #  0.06794580817222595, -0.00020318820315878838, 0.04264984279870987, 
144 | #  -0.0661700889468193, ……
145 | # ……]
146 | # 
147 | # 384
148 | ```
149 | 
150 | ```{#lst-langchain_embed_docs_wx .python lst-cap="使用文心大模型的 Embedding-V1 文档向量化"}
151 | from langchain.embeddings import QianfanEmbeddingsEndpoint 
152 | 
153 | embeddings = QianfanEmbeddingsEndpoint()
154 | docs_result = embeddings.embed_documents([
155 |     "你谁谁？",
156 |     "我是百度的智能助手，小度"
157 | ])
158 | print(len(docs_result), ":" , len(docs_result[0]))
159 | 
160 | # 2 : 384
161 | ```
162 | 
163 | :::{.callout-warning title="使用 QianfanEmbeddingsEndpoint 的注意事项"}
164 | LangChain 在 `0.0.300` 版本之后才支持 `QianfanEmbeddingsEndpoint`，并且 `QianfanEmbeddingsEndpoint` 还依赖 `qianfan` python 库的支持。
165 | 
166 | 因此，在使用 `QianfanEmbeddingsEndpoint` 之前，需要：
167 | 
168 | * 升级 LangChain 的版本：`pip install -U langchain`。
169 | * 安装 `qianfan` 库：`pip install qianfan`。
170 | :::
171 | 
172 | ## Vector stores {#sec-LC_RAG_vector}
173 | 为文档创建 embedding 之后，需要对其进行存储并实现对这些 embedding 的有效搜索，此时我们需要**向量数据库**的支持。LangChain 集成了 [50 多种不同的向量数据库](https://python.langchain.com/docs/integrations/vectorstores)，还提供了一个标准接口，允许我们轻松的在不同的向量存储之间进行切换。
174 | 
175 | ![向量数据库检索的基本流程](./images/vector_stores.jpeg){#fig-vector_stores}
176 | 
177 | 这里，我们使用 [Milvus](https://milvus.io/) 向量数据库来进行相关的演示。Milvus 安装和使用方式可以参见：@sec-milvus_install。
178 | 
179 | 利用 Milvus 对 @lst-langchain_embed_docs_wx 进行优化：
180 | 
181 | ```{#lst-langchain_embed_docs_wx_2 .python lst-cap="使用 Milvus 存储千帆 Embedding-V1 的结果"}
182 | from langchain.document_loaders import WebBaseLoader
183 | from langchain.embeddings import QianfanEmbeddingsEndpoint
184 | from langchain.text_splitter import RecursiveCharacterTextSplitter
185 | from langchain.vectorstores import Milvus
186 | 
187 | url = 'https://wangwei1237.github.io/2023/02/13/duzhiliao/'
188 | loader = WebBaseLoader([url])
189 | docs  = loader.load()
190 | 
191 | text_splitter = RecursiveCharacterTextSplitter(
192 |     chunk_size = 200,
193 |     chunk_overlap  = 20,
194 |     length_function = len,
195 |     add_start_index = True,
196 | )
197 | texts = text_splitter.create_documents([docs[0].page_content])
198 | 
199 | vector_db = Milvus.from_documents(
200 |     texts,
201 |     QianfanEmbeddingsEndpoint(),
202 |     connection_args ={"host": "127.0.0.1", "port": "8081"},
203 | )
204 | 
205 | query = "什么是度知了？"
206 | docs = vector_db.similarity_search(query)
207 | print(docs)
208 | 
209 | ```
210 | 
211 | @lst-langchain_embed_docs_wx_2 的运行结果中，之所以会有两条重复的结果，是因为在执行文档向量化的时候，执行了两遍。在初始化 Milvus 实例时，如果只是查询操作，可以使用如下的方式：
212 | 
213 | ```{#lst-langchain_milvus_search_2 .python lst-cap="Milvus 实例初始化"}
214 | vector_db = Milvus.from_documents(
215 |     [],
216 |     QianfanEmbeddingsEndpoint(),
217 |     connection_args ={"host": "127.0.0.1", "port": "8081"},
218 | )
219 | ```
220 | 
221 | `Milvus.from_documents` 会创建一个名为 `LangChainCollection` 的 `Collection`。可以使用 milvus_cli 工具来查看该 `Collection` 的信息，也可以使用 Milvus 提供的 http 端口来查看相关信息：
222 | 
223 | ```html
224 | http://127.0.0.1:8081/v1/vector/collections/describe?collectionName=LangChainCollection
225 | ```
226 | 
227 | :::{.callout-note title="修改 Collection 名称"}
228 | 为了方便使用，可以使用 `collection_name` 参数以实现将不同的专有数据源存储在不同的 Collection。
229 | 
230 | ```python
231 | vector_db = Milvus.from_documents(
232 |     texts,
233 |     QianfanEmbeddingsEndpoint(),
234 |     connection_args={"host": "127.0.0.1", "port": "8081"},
235 |     collection_name="test", # <1>
236 | )
237 | ```
238 | 
239 | 1. 设置数据存储的 Collection，类似于在关系数据库中，将数据存储在不同的表中。
240 | 
241 | :::
242 | 
243 | :::{.callout-warning}
244 | 使用千帆进行 Embedding 时，每次 Embedding 的 token 是有长度限制的，目前的最大限制是 384 个 token。因此，我们在使用 `RecursiveCharacterTextSplitter` 进行文档拆分的时候要特别注意拆分后文档的长度。
245 | 
246 | ```javascript
247 | qianfan.errors.APIError: api return error, 
248 | code: 336003, 
249 | msg: embeddings max tokens per batch size is 384
250 | ```
251 | :::
252 | 
253 | 在使用时，为了方便，我们可以把 embedding 和 query 拆分为两个部分：
254 | 
255 | * 先将数据源进行向量化，然后存储到 Milvus 中
256 | * 检索的时候，直接从 Milvus 中检索相关信息
257 | 
258 | 对 @lst-langchain_embed_docs_wx 的代码进行优化：
259 | 
260 | ```{#lst-langchain_milvus_embedding .python include="./code/test_milvus_embedding.py" code-line-numbers="true" lst-cap="文档向量化后存入 Milvus"}
261 | ```
262 | 
263 | 检索相似内容的代码可以简化为：
264 | 
265 | ```{#lst-langchain_milvus_embedding_search .python include="./code/test_embedding_query.py" code-line-numbers="true" lst-cap="内容检索"}
266 | ```
267 | 
268 | :::{.callout-warning}
269 | 因为千帆向量化的 API 有 QPS 限制，因此，在使用千帆进行 embedding 时尽量控制一下 QPS。
270 | :::
271 | 
272 | ## Retrivers
273 | 检索是 LangChain 花费精力最大的环节，LangChain 提供了[许多不同的检索算法](https://python.langchain.com/docs/integrations/retrievers)，LangChain 不但支持简单的语义检索，而且还增加了很多算法以提高语义检索的性能。
274 | 
275 | 一旦我们准备好了相关的数据，并且将这些数据存储到向量数据库（例如 Milvus），我们就可以配置一个 `chain`，并在 `提示词` 中包含这些相关数据，以便 LLM 在回答我们的问题时可以利用这些数据作为参考。
276 | 
277 | 对于参考外部数据源的 QA 而言，LangChain 提供了 4 种 `chain`：**stuff**，**map_reduce**，**refine**，**map_rerank**。`stuff chain` 把文档作为整体包含到 `提示词` 中，这只适用于小型文档。由于大多数 LLM 对 `提示次` 可以包含的 token 最大数量存在限制，因此建议使用其他三种类型的 `chain`。对于非 `stuff chain`，LangChain 将输入文档分割成更小的部分，并以不同的方式将它们提供给 LLM。这 4 种 `chain` 的具体信息和区别可以参见：[docs/modules/chains/document](https://python.langchain.com/docs/modules/chains/document)。
278 | 
279 | 我们利用 `QAWithSourcesChain` 对 @lst-langchain_milvus_embedding_search 进行优化，以实现一个完整的利用外部数据源的 **Retrival Augment Generation**（需要配合 @lst-langchain_milvus_embedding）。
280 | 
281 | ```{#lst-langchain_rag_demo .python include="./code/test_langchain_rag.py" code-line-numbers="true" lst-cap="基于 LangChain 和 Milvus 的 RAG"}
282 | ```
283 | 
284 | @lst-langchain_rag_demo 的运行结果如下，结果包括 `intermediate_steps` 和 `output_text`：
285 | 
286 | * `intermediate_steps` 表示搜索过程中所指的文档
287 | * `output_text` 表示是问题的最终答案
288 | 
289 | ```javascript
290 | 4
291 | 
292 | {'intermediate_steps': 
293 |     [
294 |         '根据提供的上下文信息，回答问题：\n\n「度知了」是一个在线问答平台，使用指南是由作者严丽编写的。该平台供了一个问答系统，用户可以在其中提出问题和获取答案。「度知了」的目的是帮助用户更好地理解和掌握知识，并提供了一个方便的途径来获取所需的信息。', 
295 |         '根据提供的上下文信息，「度知了」是一个在线问答平台，使用指南是由作者严丽编写的。该平台提供了一个问答系统，用户可以在其中提出问题和获取答案。「度知了」的目的是帮助用户更好地理解和掌握知识，并提供了一个方便的途径来获取所需的信息。度知了基于ITU标准，依托自研的10+项专利技术，在不断实践的基础之上而形成的一款支持多端（PC，Android，iOS）评测的视频画质评测服务。\n\n因此，「度知了」是一个在线问答平台，提供视频画质评测服务。', 
296 |         '根据提供的上下文信息，「度知了」是一个在线问答平台，提供视频画质评测服务。它基于ITU标准，依托自研的10+项专利技术，支持多端（PC，Android，iOS）评测。该平台旨在帮助用户更好地理解和掌握知识，并提供了一个方便的途径来获取所需的信息。「度知了」已上架各大商店应用市场，安卓端可通过华为应用商店、百度手机助手、小米应用商店、oppo应用商店、vivo应用商店直接搜索「度知了」进行安装。在APP端，用户可以通过快捷创建创建一个评测任务。', 
297 |         "Based on the new context, the existing answer is still accurate. The 'duzhiliao' in the original answer refers to the online platform 'Du Zhili', which provides video quality evaluation services. It is a multi-platform application (PC, Android, iOS) that uses 10+ self-developed patent technologies based on ITU standards to help users better understand and master knowledge, and provide a convenient way to obtain needed information. The platform has been uploaded to various store application markets, and users can install it through search for 'Du Zhili' on Huawei App Store, Baidu App Store, Xiaomi App Store, OPPO App Store, Vivo App Store. In the app, users can quickly create a review task."
298 |     ], 
299 |     'output_text': "Based on the new context, the existing answer is still accurate. The 'duzhiliao' in the original answer refers to the online platform 'Du Zhili', which provides video quality evaluation services. It is a multi-platform application (PC, Android, iOS) that uses 10+ self-developed patent technologies based on ITU standards to help users better understand and master knowledge, and provide a convenient way to obtain needed information. The platform has been uploaded to various store application markets, and users can install it through search for 'Du Zhili' on Huawei App Store, Baidu App Store, Xiaomi App Store, OPPO App Store, Vivo App Store. In the app, users can quickly create a review task."
300 | }
301 | ```
302 | 
303 | 为了显示 RAG 的优点，我们可以利用 @lst-chain_example 所示的代码向 LLM 问同样的问题：
304 | 
305 | ```python
306 | res =  chain.run(name="小明", user_input="什么是度知了?")
307 | print(res)
308 | 
309 | # ['度知了是一款智能问答产品，它能够理解并回答问题，提供信息和建议，主要应用在搜索、智能问答、智能语音交互等领域。\n\n度知了运用了文心大模型的能力，涵盖了海量数据，可以更好地理解和回答各种各样的问题。文心大模型是中国的一个大规模语言模型，它可以用于各种自然语言处理任务，包括文本分类、问答、文本摘要等。']
310 | ```
311 | 
312 | ## RetrievalQA
313 | 使用 RetrievalQA 也可以实现 @lst-langchain_rag_demo 同样的功能，并且代码整体会更简洁。
314 | 
315 | ```{#lst-langchain_rag_retrievalQA .python include="./code/test_retrievalQA.py" code-line-numbers="true" lst-cap="基于 RetrievalQA 和 Milvus 的 RAG"}
316 | ```
317 | 
318 | 1. 使用 Milvus 初始化向量检索器
319 | 2. 因为文心对 MessageList 的限制，所以此处要重写 Prompt，否则执行时会报 Message 类型错误。具体提示词的修改可以参考：@lst-langchain_rag_retrievalQA_prompt。
320 | 3. 使用向量检索器初始化 RetrievalQA 实例
321 | 4. 执行 RAG 检索并提炼最终结果
322 | 
323 | ```{#lst-langchain_rag_retrievalQA_prompt .python include="./code/retrieval_prompt.py" code-line-numbers="true" lst-cap="RetrievalQA 的提示词"}
324 | ```
325 | 
326 | 1. 修改 `SystemMessagePromptTemplate` 为 `HumanMessagePromptTemplate`。
327 | 2. 增加一条 `AIMessagePromptTemplate` 消息。
328 | 
329 | @lst-langchain_rag_retrievalQA 的运行结果如下所示：
330 | 
331 | ```bash
332 | 度知了是一款视频画质评测服务，基于ITU标准，依托自研的10+项专利技术，支持多端（PC、Android、iOS）评测，提供画质评测工具。
333 | ```
334 | 


--------------------------------------------------------------------------------
/langchain_serialization.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | code-annotations: below
  5 | ---
  6 | 
  7 | # LangChain 序列化 {#sec-LS}
  8 | 使用 [Docker Hub](https://dockerhub.p2hp.com/)，我们可以非常方便的查找、使用和共享容器，这简直是开发者的福音。
  9 | 
 10 | ```bash
 11 | $ docker pull ubuntu
 12 | $ docker run -it ubuntu
 13 | ```
 14 | 
 15 | 正如 @sec-prompt_engineering 所述，prompt 在 AI 原生应用具有非常重要的地位。为了能够方便 prompt 的查找、使用和共享，LangChain 为我们提供了一系列的序列化的能力，包括 prompt 序列化，chain 序列化……
 16 | 
 17 | 序列化能力可以让我们实现代码模块化，并且会大大简化在团队内部或更广泛的组织之间共享 prompt 的过程，也更有利于 prompt 的壮大和发展。
 18 | 
 19 | ## prompt 序列化
 20 | 在 LangChain 中，prompt 的序列化支持两种格式：`YAML`，`JSON`，可以使用文件扩展名来标识序列化的文件格式。
 21 | 
 22 | 我们可以用 `PromptTemplate.save()` 对 @lst-prompt 所示的 prompt 进行序列化，并使用 `load_prompt` 从序列化文件中加载已经存储好的 prompt。
 23 | 
 24 | ```python
 25 | from langchain import PromptTemplate
 26 | 
 27 | prompt_files = ["prompt_template.json", "prompt_template.yaml"]
 28 | prompt_template = PromptTemplate.from_template(
 29 |     "请以轻松欢快的语气写一篇描写 {topic} 的文章，字数不超过 {count} 字。"
 30 | )
 31 | [prompt_template.save(f) for f in prompt_files]
 32 | ```
 33 | 
 34 | 生成的序列化 prompt 文件如下所示：
 35 | 
 36 | ::: {.panel-tabset group="prompt_save_types"}
 37 | 
 38 | ## YAML
 39 | ```yaml
 40 | _type: prompt #<1>
 41 | input_types: {}
 42 | input_variables: #<2>
 43 | - count
 44 | - topic
 45 | output_parser: null
 46 | partial_variables: {}
 47 | template: "\u8BF7\u4EE5\u8F7B\u677E\u6B22\u5FEB\u7684\u8BED\u6C14\u5199\u4E00\u7BC7\
 48 |   \u63CF\u5199 {topic} \u7684\u6587\u7AE0\uFF0C\u5B57\u6570\u4E0D\u8D85\u8FC7 {count}\
 49 |   \ \u5B57\u3002" #<3>
 50 | template_format: f-string
 51 | validate_template: false
 52 | ```
 53 | 
 54 | 1. 该序列化的类型 
 55 | 2. prompt 模版中的变量名
 56 | 3. prompt 模版内容
 57 | 
 58 | ## JSON
 59 | ```json
 60 | {
 61 |     "input_variables": [
 62 |         "count",
 63 |         "topic"
 64 |     ],  
 65 |     "input_types": {},
 66 |     "output_parser": null,
 67 |     "partial_variables": {},
 68 |     "template": "\u8bf7\u4ee5\u8f7b\u677e\u6b22\u5feb\u7684\u8bed\u6c14\u5199\u4e00\u7bc7\u63cf\u5199 {topic} \u7684\u6587\u7ae0\uff0c\u5b57\u6570\u4e0d\u8d85\u8fc7 {count} \u5b57\u3002",
 69 |     "template_format": "f-string",
 70 |     "validate_template": false,
 71 |     "_type": "prompt"
 72 | }
 73 | ```
 74 | :::
 75 | 
 76 | 从 `prompt_template.json` 文件中加载其中存储的 prompt。
 77 | 
 78 | ```python
 79 | from langchain.prompts import load_prompt
 80 | 
 81 | prompt = load_prompt("prompt_template.json")
 82 | res = prompt.format(topic="秋天", count=100)
 83 | print(res)
 84 | # 请以轻松欢快的语气写一篇描写 秋天 的文章，字数不超过 100 字。
 85 | ```
 86 | 
 87 | ## LangChain Hub
 88 | 如果我们希望在团队内部或者和其他人共享我们的 prompt，那么仅依靠序列化还是远远不够的。和 Docker Hub 类似，[LangChain Hub](https://docs.smith.langchain.com/hub/quickstart) 为我们共享、查找 prompt 提供了非常好的能力支撑。
 89 | 
 90 | 但是，比较遗憾的是，目前 LangChain Hub 还处于内测期，非内测用户无法获取 `LANGCHAIN_HUB_API_KEY`，因此也无法把自己的 prompt 上传到 LangChain Hub 中，也无法使用 `hub.pull()` 加载 prompt。
 91 | 
 92 | 但是，好消息是，我们可以通过 LangChain Hub 的 [web 页面](https://smith.langchain.com/hub?organizationId=c4887cc4-1275-5361-82f2-b22aee75bad1) 以访问现存的所有的开放 prompt，这个对于我们学习 prompt 还是有很大帮助的。
 93 | 
 94 | ## LangChainHub
 95 | 岁让 LangChain Hub 还在内测中，但是这点困难毫不影响我们分享 prompt 的决心。[hwchase17/langchain-hub](hwchase17/langchain-hub) 这个项目就实现了 LangChain Hub 的功能，并且目前该项目也已经集成到了 LangChain，LangChain 可以原生支持从 [hwchase17/langchain-hub](hwchase17/langchain-hub) 仓库中拉取 prompt。我们可以非常方便的将我们自己的 prompt 提交到该仓库以供其他人使用。
 96 | 
 97 | 为了研究 LangChain 是如何使用 `hwchase17/langchain-hub` 的，我们需要分析 `load_prompt()` 的底层原理。
 98 | 
 99 | ```{#lst-load_prompt_function .python lst-cap="load_prompt() 的实现"}
100 | HUB_PATH_RE = re.compile(r"lc(?P<ref>@[^:]+)?://(?P<path>.*)") #<1>
101 | 
102 | def try_load_from_hub(
103 |     path: Union[str, Path],
104 |     loader: Callable[[str], T],
105 |     valid_prefix: str,
106 |     valid_suffixes: Set[str],
107 |     **kwargs: Any,
108 | ) -> Optional[T]:
109 |     """Load configuration from hub.  Returns None if path is not a hub path."""
110 |     if not isinstance(path, str) or not (match := HUB_PATH_RE.match(path)): #<2>
111 |         return None
112 |     #……
113 |     #……
114 | 
115 | def load_prompt(path: Union[str, Path]) -> BasePromptTemplate:
116 |     """Unified method for loading a prompt from LangChainHub or local fs."""
117 |     if hub_result := try_load_from_hub(
118 |         path, _load_prompt_from_file, "prompts", {"py", "json", "yaml"}
119 |     ): #<3>
120 |         return hub_result
121 |     else:
122 |         return _load_prompt_from_file(path)
123 | 
124 | ```
125 | 
126 | 1. 加载的 `hwchase17/langchain-hub` 文件名的正则表达式
127 | 2.  如果 path 不符合 `lc://prompts/path/to/file.json` 的格式，直接返回
128 | 3. 如果 path 符合 `lc://prompts/path/to/file.json` 的格式，则尝试从 `hwchase17/langchain-hub` 下载对应文件，并通过 `_load_prompt_from_file()` 加载 prompt
129 | 
130 | ```{#lst-load_prompt_from_lc .python lst-cap="加载 hwchase17/langchain-hub 中的 prompt"}
131 | from langchain.prompts import load_prompt
132 | prompt = load_prompt('lc://prompts/hello-world/prompt.yaml')
133 | res = prompt.format()
134 | print(res)
135 | 
136 | # No `_type` key found, defaulting to `prompt`.
137 | # Say hello world.
138 | ```
139 | 
140 | ## 自定义 LangChainHub
141 | 从 `try_load_from_hub()` 的代码实现我们发现，LangChain 默认从 `{URL_BASE}` 下加载我们给定的资源，也就是加载 `{URL_BASE}/prompt_file` 文件。
142 | 
143 | ```python
144 | DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master")
145 | URL_BASE = os.environ.get(
146 |     "LANGCHAIN_HUB_URL_BASE",
147 |     "https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/",
148 | )
149 | 
150 | def try_load_from_hub(): 
151 |     #……
152 |     full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__())
153 | ```
154 | 
155 | 而 `URL_BASE` 是通过环境变量的方式来配置的，这使得我们自定义一个 LangChainHub 变得非常简单。我们只需要搭建一个自己的文件服务器（`my_file_server_domain`），然后用 `my_file_server_domain` 替换 `URL_BASE` 即可。
156 | 
157 | ```bash
158 | $ export LANGCHAIN_HUB_URL_BASE=${my_file_server_domain}
159 | ```
160 | 
161 | 


--------------------------------------------------------------------------------
/langflow_intro.qmd:
--------------------------------------------------------------------------------
 1 | # Langflow {#sec-lf_langflow}
 2 | 
 3 | [Langflow](https://github.com/logspace-ai/langflow) 是 LangChain 的非官方的 UI。使用 Langflow，我们可以更简便的以可视化的方式来体验 LangChain 并为基于 LangChain 的大语言应用提供原型设计能力。
 4 | 
 5 | ::: {.content-visible when-format="html"}
 6 | 
 7 | ![Langflow 示例](./images/langflow-demo.gif){#fig-langflow_demo}
 8 | 
 9 | :::
10 | 
11 | ::: {.content-visible when-format="pdf"}
12 | 
13 | ![Langflow 示例](./images/langflow-demo.jpg){#fig-langflow_demo}
14 | 
15 | :::
16 | 
17 | 想要深入了解 Langflow，可以阅读 Langflow 的[官方文档](https://docs.langflow.org/)。
18 | 


--------------------------------------------------------------------------------
/langsmith_intro.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # LangSmith 简介 {#sec-LS_intro}


--------------------------------------------------------------------------------
/llm_intro.qmd:
--------------------------------------------------------------------------------
  1 | # LLM 的前世今生
  2 | 
  3 | 2022 年 11 月 30 日，OpenAI 正式发布了其面向消费用户的产品——ChatGPT。ChatGPT 一经发布便激起了圈内、圈外的广泛讨论——毕竟已经很长时间没有一种类似的技术可以引起如此广泛的讨论，ChatGPT 的发布也标志着大语言模型（LLM: Large Language Model） 时代的到来。看起来，刚刚要崛起的`元宇宙`，在这股风潮之下，也失去了往日的喧嚣。
  4 | 
  5 | 根据 [热搜引擎](https://weibo.zhaoyizhe.com/superInfo.html?topic=ChatGPT) 提供的微博热搜历史数据，我们发现，2022 年 12 月 5 日，ChatGPT 第一次登上微博热搜榜，其最后的在榜时间为 2023 年 3 月 31 日，累计在榜时长达到了 1391 分钟。
  6 | 
  7 | ![ChatGPT登上微博热搜](./images/llm_chatgpt_wb_hs.jpg){#fig-llm_chatgpt_wb_hs}
  8 | 
  9 | 根据 [百科星图](https://baike.baidu.com/starmap/view?nodeId=e0a309bf5b0f017891c7b859) 可知，目前谷歌、亚马逊、百度、阿里等多个科技巨头都加入到了 *对话式大语言模型* 的研发中。
 10 | 
 11 | * 2023 年 2 月 6 日，谷歌宣布将推出一款聊天机器人——Bard，2 月 9 日，谷歌 Bard 发布会试演翻车，回答内容出现错误，当日市值暴跌1000亿美元。
 12 | * 2023 年 2 月 24 日，Meta 官宣 SOTA 大语言模型 LLaMA，对非商用的研究用例开源。
 13 | * 2023 年 3 月 14 日，斯坦福发布了一个由 LLaMA 7B 微调的模型 Alpaca，性能和 GPT-3.5 不相上下。
 14 | * 2023 年 3 月 14 日，OpenAI 发布 GPT-4。
 15 | * 2023 年 3 月 16 日，百度举办“百度文心一言新闻发布会”，正式发布 *文心一言*。
 16 | * 2023 年 4 月 11 日，阿里在阿里云峰会上，正式宣布推出大语言模型 *通义千问*。
 17 | * 2023 年 7 月 18 日，Meta 官宣发布 LLaMA2。
 18 | * 2023 年 10 月 17 日，百度在 2023 年的百度世界大会上宣布发布 *文心 4.0*。
 19 | * ……
 20 | 
 21 | 根据 [-@zhao2023survey]，在学术界，ChatGPT 发布之后，和大模型相关的论文的数量也呈现出爆发式增长。
 22 | 
 23 | ::: {#fig-trend_for_llm layout-ncol=2}
 24 | 
 25 | ![Query = "Language Model"](./images/treand_lm.jpg){#fig-trend_lm}
 26 | 
 27 | ![Query = "Large Language Model"](./images/trend_llm.jpg){#fig-trend_llm}
 28 | 
 29 | [arXiv](https://arxiv.org/) 论文库中“大语言模型”的论文数量趋势图
 30 | :::
 31 | 
 32 | ## 大语言模型族谱
 33 | [-@vaswani2023attention] 可以称之为大语言模型的鼻祖和源泉，在 [-@vaswani2023attention] 中，谷歌机器翻译团队提出了由多组 Encoder/Decoder 构成的机器翻译模型 Transformer，而 Transformer 模型也成为了一切的起点。之后，大模型的发展大致走上了两条路：
 34 | 
 35 | * 一条路是舍弃 Decoder 部分，仅仅使用 Encoder 部分的自编码语言模型[^1]，其最出名的代表就是 Bert 家族。
 36 | * 一条路是舍弃 Encoder 部分，仅仅基于 Decoder 部分的自回归语言模型[^2]，而 ChatGPT 背后的 GPT[^3] 家族则属于 Decoder-only 的分支。
 37 | 
 38 | [-@yang2023harnessing] 给出了如 @fig-llm_family_tree 所示的大语言模型的族谱。
 39 | 
 40 | ![大语言模型族谱](./images/LLMTree.jpeg){#fig-llm_family_tree}
 41 | 
 42 | 在大语言模型发展的早期，以 Bert 为代表的自编码模型突飞猛进，但是由于没有突破 Scale Law [^4] 法则，因此其发展速度也凋零。反之，由于 GPT 的研究人员发现：扩大语言模型的规模可以显著提高零样本与小样本的学习的能力——也即突破了 Scale Law，以 GPT 为代表的自回归分支则更加枝繁叶茂，成为了当下大模型发展的主流分支。
 43 | 
 44 | ## GPT 的贡献
 45 | [-@zhao2023survey] 对 LLM 的相关能力和 GPT 的相关进展做了详细的描述，这里我们重点说一下 GPT 对 LLM 发展的核心贡献。
 46 | 
 47 | ### 预训练+微调的模型架构
 48 | 2018 年，OpenAI 发表了论文 [-@radford2018improving]，这就是 GPT-1。GPT-1 提出的预训练+微调的方法可以更好的利用大量的预训练数据，从而让模型能够更好的适应各种特定任务。虽然当时还存在一些局限性，例如当时还不能根据一个给定的标题来生成一篇新闻报道，但是 GPT-1 所开创的这种 预训练+微调 的模型架构，对 NLP 的后续发展具有深远的影响。
 49 | 
 50 | * 在预训练阶段，模型会在大规模无标注文本上进行无监督学习，提取通用的语言特征。
 51 | * 在微调阶段，模型会在特定任务上进行有监督的学习，以适应不同的任务需求。
 52 | 
 53 | ### 迁移学习能力
 54 | 为了解决 GPT-1 的问题，2019 年，OpenAI 发布了 GPT-2，论文 [-@Radford2019LanguageMA] 对 GPT-2 进行了详细的阐述。通过增加模型参数和数据量，GPT-2 极大的提高了模型的泛化能力和生成能力。除了在特定任务上表现较好（例如根据标题生成文章）之外，GPT-2 还初步表现出一定的零样本或少量样本学习能力。这使得 GPT-2 能够适用于多种自然语言处理任务，例如：翻译，问答，摘要生成，文本生成等，而在 GPT-2 之前，这些特殊任务需要设计专门的模型来分别实现。GPT-2 通过实践证明通过海量数据和大量参数训练出来的词向量模型在不经过额外的特殊训练下就可以迁移到不同类别的任务。
 55 | 
 56 | GPT-2 最大的贡献也在于他通过实践验证了大模型的迁移学习能力。
 57 | 
 58 | ### 上下文学习能力和涌现
 59 | 2020 年，OpenAI 发布了 1750 亿参数规模的、性能更加强大的 GPT-3。[-@NEURIPS2020_1457c0d6] 中提到，GPT-3 提出了上下文学习（ICL：in-context learning）的概念。ICL 可以指导 LLM 理解以自然语言形式呈现的任务，利用 ICL 的能力，我们可以通过优化给 LLM 的输入以获取更好的结果。在 ICL 的加持下，@sec-prompt_engineering 中介绍的提示词工程才得以成为可能。
 60 | 
 61 | GPT-3 在多种自然语言处理任务上展现出了惊人的性能，甚至可以仅通过简单的提示词来适应不同的处理任务。研究人员并未在 GPT-3 训练完成之前预测到该模型具备如此强大的能力。GPT-3 的实践证明，LLM 可以具备涌现能力（Emergent Ability）。
 62 | 
 63 | ### 代码能力和指令遵循能力
 64 | 为了进一步提升模型的性能，OpenAI 继续探索了两种主要方法：基于代码数据训练和，与人类偏好保持一致。
 65 | 
 66 | 2021 年，OpenAI 在 [-@chen2021evaluating] 中推出了在大量 GitHub 代码语料库上微调的 GPT 模型——Codex。Codex 可以解决非常复杂的编程问题，并且还可以显著提高解决数学问题的性能。目前，大名鼎鼎的 [Github Copilot](https://github.com/features/copilot) 就是基于 [Codex](https://openai.com/blog/openai-codex) 模型而研发。
 67 | 
 68 | 2022 年，OpenAI 在 [-@ouyang2022training] 中推出了基于 RLHF 技术的增强版 GPT-3——InstructGPT。InstructGPT 在指令遵循方面对 GPT-3 模型做了微调，使得其更善于遵循用户的意图。
 69 | 
 70 | 代码能力和指令遵循能力进一步增强了 GPT-3 模型的能力，OpenAI 将其称之为 GPT-3.5。而 ChatGPT 刚刚推出的时候，其背后默认的模型就是 GPT-3.5。
 71 | 
 72 | 所以，从整个的 GPT 的历程看，从 2018 年 ~ 2022 年，在长达 5 年多的时间里，OpenAI 一步一步通过探索和实践，让大模型应该具备的相关能力一点一点的浮出水面，进入我们的视野。
 73 | 
 74 | ## 如何使用 LLM
 75 | 大模型虽然好，但是我们该如何使用大模型呢？是自己训练一个大模型，还是微调（@sec-sft），亦或是用提示词工程（@sec-prompt_engineering），还是随便选择一种方案？
 76 | 
 77 | ![使用 LLM 的不同方式，每种方式对应不同的成本，可以解决的问题也不相同](./images/llm_in_action_ways.png){#fig-llm_action_w}
 78 | 
 79 | [-@yang2023harnessing] 中给出了一种决策流程以帮助我们决策具体采用哪种方案：
 80 | 
 81 | ![用户选择 LLM 或微调模型的决策流程。该决策流程帮助用户评估其应用场景是否满足特定条件，并根据评估结果确定 LLM 或微调模型是否最适合其应用场景。在图中的决策过程中，Y 表示满足条件，N 表示不满足条件。最后一个条件的 Y 的黄色圆圈表示没有模型在这种应用上运行良好。](./images/llm_decision_flow.jpg){#fig-llm_decision_flow}
 82 | 
 83 | ### 传统自然语言理解任务
 84 | 对于大多数传统自然语言理解的任务，微调模型的效果更好。
 85 | 
 86 | * 文本分类
 87 | * 情感分析
 88 | * 信息检索
 89 | 
 90 | 当然 LLMs 的潜力受限于 Prompt 工程可能仍未完全释放。在一些小众的领域，如 Miscellaneous Text Classification，Adversarial NLI 等任务中 ，LLMs 由于更强的泛化能力因而具有更好的性能，但是在目前而言，对于有成熟标注的数据而言，微调模型可能仍然是对传统任务的最优解。
 91 | 
 92 | ### 自然语言生成任务
 93 | 相较于自然语言理解，自然语言生成就是大模型的战场了。自然语言生成的目标主要是创建连贯、通顺、有意义的序列，LLM 对这种场景有天然的优势，例如：机器翻译、段落信息摘、写作、画图……。有时候，我们使用简单的 `提示词工程`（@sec-prompt_engineering）就可以实现强大的内容生成工作。
 94 | 
 95 | ### 知识密集型任务
 96 | 知识密集型任务一般指强烈依赖背景知识、领域专业知识或者一般世界知识的任务，知识密集型任务区别于简单的模式识别与句法分析，需要对我们的现实世界拥有“常识”并能正确的使用。在涉及这类场景时，虽然大模型不是“百灵鸟”，但是我们可以采用 RAG（@sec-RAG） 的模式来增强大语言的性能。
 97 | 
 98 | ### 推理任务
 99 | LLM 的扩展能力可以极大的增强预训练语言模型的能力，当模型规模指数增加时，一些关键能力（如推理的能力）会逐渐随参数的扩展而被激活，LLM 的算术推理与常识推理的能力肉眼可见的异常强大。当然，随着模型规模的增长，模型还会表现出一些 Emergent Ability，例如符合操作、逻辑推导、概念理解等等。当然，虽然 LLM 具备一定的算术推理能力，但是在涉及到数学计算等场景时，我们最好还是采用 Agent（@sec-agent） 调用外部工具的方式避免大模型的幻觉（@sec-hallucination）以获得始终精确的结果。
100 | 
101 | ## LLM 的缺陷
102 | 虽然 LLM 目前已经具备非常强大的性能，虽然我们已经开始在尝试着和 LLM 协作，虽然 LLM 已经开始在提升我们的工作质效方面发挥着强大的作用，但是我们还是要认识到：LLM 并非完美无暇。
103 | 
104 | 除了性能、效率、成本等问题外，LLM 的安全问题几乎是大模型所面对的所有挑战之中的重中之重。另外，机器幻觉（@sec-hallucination）也是大模型目前还没有特别好的解决方案的主要问题，大模型输出的有偏差或有害的幻觉将会对使用者造成严重后果。
105 | 
106 | ![幻觉的例子](./images/obtuse_angle.jpg){#fig-huanjue_llm}
107 | 
108 | 目前，LLM 面临的主要挑战可以分为：
109 | 
110 | * 实践验证：当前针对大模型的评估数据集往往是更像“玩具”的学术数据集，但是这些学术数据集无法完全反应现实世界中形形色色的问题与挑战，因此亟需实际的数据集在多样化、复杂的现实问题上对模型进行评估，确保模型可以应对现实世界的挑战。更多的大模型评估的内容可以参见 [-@LLMEvaluationSurvey]。
111 | * 模型对齐：大模型的强大也引出了另一个问题——模型应该与人类的价值观对齐，确保模型行为符合预期，不会“强化”不良结果。作为一个高级的复杂系统，如果不认真处理这种道德问题，有可能会为人类酝酿一场灾难。
112 | * 安全隐患：大模型的研究要进一步强调安全问题，消除安全隐患，需要具体的研究确保大模型的安全研发，需要更多的做好模型的可解释性、监督管理工作，安全问题应该是模型开发的重要组成部分，而非锦上添花可有可无的装饰。
113 | * 模型的可解释性：我们针对大模型神奇现象的了解仍然十分有限，针对大模型原理性的见解仍然十分珍贵。
114 | 
115 | [^1]: 自编码模型：自编码语言模型通过随机Mask输入的部分单词，然后预训练的目标是预测被Mask的单词，不仅可以融入上文信息，还可以自然的融入下文信息。
116 | [^2]: 自回归模型：自回归语言模型根据输入序列中的前面的内容来预测序列中的下一个词。自回归模型只能利用上文或者下文的信息，不能同时利用上文和下文的信息。
117 | [^3]: GPT，Generative Pre-trained Transformer，基于 Transformer 的生成式预训练模型。
118 | [^4]: Scaling Laws：随着模型大小、数据集大小和用于训练的计算浮点数的增加，模型的性能会提高。为了获得模型的最佳性能，所有三个因素必须同时放大。当不受其他两个因素的制约时，模型性能与每个单独的因素都有幂律关系。
119 | 


--------------------------------------------------------------------------------
/milvus_install.qmd:
--------------------------------------------------------------------------------
  1 | # Milvus Beginner {#sec-milvus_install}
  2 | 
  3 | ## Milvus 安装
  4 | ### 1. 安装 docker-ce
  5 | 
  6 | https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository。
  7 | 
  8 | ### 2. 安装 docker-composer
  9 | 
 10 | ```bash
 11 | $ curl -L "https://github.com/docker/compose/releases/download/2.22.0/docker-compose-$(uname -s | tr 'A-Z' 'a-z')-$(uname -m)" -o /usr/local/bin/docker-compose
 12 | 
 13 | $ sudo chmod +x /usr/local/bin/docker-compose
 14 | 
 15 | $ docker-compose --version
 16 | ```
 17 | 
 18 | ### 3. 安装 docker-milvus 并启动
 19 | 
 20 | ```bash
 21 | $ mkdir milvus && cd milvus 
 22 | 
 23 | $ wget https://github.com/milvus-io/milvus/releases/download/v2.3.1/milvus-standalone-docker-compose.yml -O docker-compose.yml
 24 | 
 25 | $ sudo docker compose up -d
 26 | 
 27 | $ sudo docker compose ps
 28 | 
 29 | ```
 30 | 
 31 | ## Milvus 测试
 32 | 
 33 | :::{.callout-warning}
 34 | 为了避免不同网络环境下的端口限制，可以使用 Nginx 的 TCP Proxy 功能代理 Milvus 默认的 `19530` 端口和 `9091` 端口。具体配置参见：@lst-milvus_port。
 35 | 
 36 | ```{#lst-milvus_port .bash lst-cap="Nginx 反向代理配置"}
 37 | stream {
 38 |     server {
 39 |         listen 8081;
 40 |         proxy_pass 127.0.0.1:19530;
 41 |     }
 42 | 
 43 |     server {
 44 |         listen 8082;
 45 |         proxy_pass 127.0.0.1:9091;
 46 |     }
 47 | }
 48 | ```
 49 | :::
 50 | 
 51 | ### 安装 Milvus SDK
 52 | 
 53 | ```bash
 54 | python3 -m pip install pymilvus
 55 | ```
 56 | 
 57 | ### 测试 Milvus
 58 | 
 59 | ```python
 60 | from pymilvus import connections,db
 61 | 
 62 | res = connections.connect(
 63 |   host='127.0.0.1',
 64 |   port='8081'
 65 | )
 66 | 
 67 | # database = db.create_database("test")
 68 | res = db.list_database()
 69 | print(res)
 70 | 
 71 | # ['default', 'test']
 72 | ```
 73 | 
 74 | 执行 `docker-compose logs -f | grep 'test'` 可以看到 Milvus 创建 `test` 数据库的日志：
 75 | 
 76 | ```{#lst-milvus_test .bash lst-cap="创建数据库日志"}
 77 | milvus-standalone  | [2023/09/26 05:30:03.922 +00:00] [INFO] [proxy/impl.go:174] ["CreateDatabase received"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [role=proxy] [dbName=test]
 78 | milvus-standalone  | [2023/09/26 05:30:03.922 +00:00] [INFO] [proxy/impl.go:182] ["CreateDatabase enqueued"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [role=proxy] [dbName=test]
 79 | milvus-standalone  | [2023/09/26 05:30:03.923 +00:00] [INFO] [rootcoord/root_coord.go:772] ["received request to create database"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [role=rootcoord] [dbName=test] [msgID=444519207108608004]
 80 | milvus-standalone  | [2023/09/26 05:30:03.925 +00:00] [INFO] [rootcoord/meta_table.go:272] ["create database"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [db=test] [ts=444519207108608005]
 81 | milvus-standalone  | [2023/09/26 05:30:03.925 +00:00] [INFO] [rootcoord/root_coord.go:804] ["done to create database"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [role=rootcoord] [dbName=test] [msgID=444519207108608004] [ts=444519207108608005]
 82 | milvus-standalone  | [2023/09/26 05:30:03.925 +00:00] [INFO] [proxy/impl.go:190] ["CreateDatabase done"] [traceID=91fb5dbbd0a5a8028b7c048552bbbbb9] [role=proxy] [dbName=test]
 83 | ```
 84 | 
 85 | ## Milvus CLI
 86 | 很多时候，使用类似 [mysql](https://dev.mysql.com/doc/refman/8.0/en/mysql.html) 这样的客户端工具来连接数据库并进行相关操作会更便捷。Milvus 也提供了类似的客户端端工具 `milvus_cli`，来方便我们对 Milvus 进行相关操作。
 87 | 
 88 | 可以采用如下命令来安装 `milvus_cli` 客户端：
 89 | 
 90 | ```bash
 91 | pip install milvus-cli
 92 | ```
 93 | 
 94 | 具体的使用如图：@fig-milvus_cli。
 95 | 
 96 | ![使用 milvus_cli 连接 Milvus](./images/milvus_cli_case.jpg){#fig-milvus_cli}
 97 | 
 98 | `milvus_cli` 的使用命令参考：[Milvus Client Commands](https://milvus.io/docs/cli_commands.md)。
 99 | 
100 | :::{.callout-warning}
101 | 在安装 `milvus_cli` 的时候，可能会存在依赖库的版本冲突，这可能会导致安装的 `milvus_cli` 无法正常使用，如图 @fig-milvus_cli_2 所示。此时，更新相关依赖的版本，并重新安装 `milvus_cli` 即可。
102 | 
103 | ![milvus_cli 连接超时](./images/milvus_cli_2.jpg){#fig-milvus_cli_2}
104 | :::


--------------------------------------------------------------------------------
/preface.qmd:
--------------------------------------------------------------------------------
 1 | # 序言 {.unnumbered}
 2 | 这本书将教大家了解大模型相关的基本概念，在应用大模型解决自己的具体问题时，对这些基本概念的熟悉是非常必要的。了解我们的工具，才会最大限度的发挥出工具的性能。但是，作为应用者，我们没有必要了解大模型的全部细节，“不求甚解”可能是一种更好的学习方法。就像我们无需了操作系统的底层实现，也可以基于操作系统开发出丰富的上层应用一样。
 3 | 
 4 | 本书的大部分实践会基于百度的文心大模型，当然也会有部分基于 ChatGTP 的实践内容。
 5 | 
 6 | 作为大模型领域的非原著居民，我们将自己在实践中探索的过程整理到本书之中，希望帮助和我们一样正在实践的大模型领域的爱好者。这本书从整体上会划分为三大部分：
 7 | 
 8 | * 介绍大模型领域的相关概念
 9 | * 介绍大模型相关的工具&平台
10 | * 介绍具体的应用大模型的案例
11 | 
12 | ## 书中的样式惯例
13 | 在本书中，遵循以下的排版约定：
14 | 
15 | * *斜体*：表示新术语、URL、电子邮件地址、文件名和文件扩展名。
16 | * `固定宽度`：用于程序列表、段落中引用程序元素，如变量或函数名称、数据库、数据类型、环境变量、语句和关键字。
17 | * **粗体**：突出显示重要文本。
18 | * :::{.callout-note}
19 | 需要注意的事项或额外的内容补充将如此表示，包括需要注意的事情，因为它可能会影响到我们的操作。
20 | :::
21 | 
22 | * :::{.callout-tip}
23 | 技巧类信息将如此表示， 并提供可能对我们有用或简化操作方式的内容。
24 | :::
25 | 
26 | * :::{.callout-warning}
27 | 警告信息将如此表示，并介绍如果不注意此处的信息则可能导致损失的事项。
28 | :::
29 | 
30 | ## 致谢
31 | 
32 | * 本书通过 [quarto](https://quarto.org/) 构建，quarto 是一款非常优秀的科学、技术内容发布系统，感谢 quarto 社区的努力，加速了本书的构建进程。
33 | * 感谢在探索之路上，一起打怪升级的队友和朋友们~
34 | 


--------------------------------------------------------------------------------
/prompt_engineer.qmd:
--------------------------------------------------------------------------------
 1 | # 提示词工程 {#sec-prompt_engineering}
 2 | 
 3 | 在和 LLM 的交互中，`提示词` 发挥着至关重要的作用，`提示词` 是我们和 LLM 沟通的桥梁。
 4 | 
 5 | ## 什么是提示词
 6 | 在 NLP 领域，`提示` 是一种用于引导预训练语言模型解决特定任务的方法。`提示` 通常是一段文本，用于构建问题或任务的描述，以便预训练语言模型可以根据其内在知识生成合适的输出。
 7 | 
 8 | 如果大语言模型是一个 5 岁的小孩，那么他基本上可以做非常多的事情了，关键是我们如何与一个 5 岁的小孩进行良好的沟通，以让他可以完成我们期望的任务？其中的关键就是 `提示词`。你是否还能记起你和孩子在一起的很多互动场景：
 9 | 
10 | * 出门的场景
11 |   * 快点，要迟到嘞，别磨蹭了
12 |   * 紧急呼叫汪汪队到门口集合
13 | * 打扫卫生的场景
14 |   * 帮爸爸收拾一下桌子
15 |   * 呼叫无敌小帮手，爸爸需要支援
16 | * ……
17 | 
18 | 不同的 `提示词`，既可以让孩子帮我们完成工作，又可以让孩子在这个过程中得到成长和锻炼。对于 LLM 而言，也是如此。
19 | 
20 | 从 @fig-bd_2023 也能看出，`提示词` 的重要性。
21 | 
22 | ![2023年百度世界大会主题](./images/bd2023.jpg){#fig-bd_2023}
23 | 
24 | ## 什么是提示词工程
25 | 提示工程起源于对预训练模型如何将知识应用于具体任务的探讨。
26 | 
27 | 预训练语言模型通常在大规模语料库上进行预训练，从而学习到大量的语言知识。然而，将这些知识应用于具体任务时，往往需要对模型进行微调（SFT：Supervised Fine Tuning）。微调过程中，模型需要根据标注的任务数据学习任务相关的知识。
28 | 
29 | 这种根据专有数据进行微调的方法，在许多情况下取得了很好的效果，但仍然存在一些问题。例如：
30 | 
31 | * 微调过程可能需要大量的标注数据，而这些数据往往难以获得。
32 | * 微调后的模型可能会存在过拟合现象，导致模型的泛化能力下降。
33 | 
34 | 为了解决这些问题，研究人员开始关注如何通过优化输入和问题表述来引导模型产生更好的输出结果，而无须进行昂贵的微调，这种方法被称 `提示词工程`，如 @fig-pe_arch 所示。
35 | 
36 | ![微调和提示词工程的关系](./images/pe_arch.png){#fig-pe_arch}
37 | 
38 | 通过精心设计 `提示词`，我们可以引导模型关注输入数据中的关键信息，从而提高模型在各种自然语言处理任务上的性能。`提示词工程` 的核心思想是: **将问题表述为一种容易被模型理解和解答的形式**。可以通过多种方式来实现 `提示词工程`，例如：重述问题，给出示例或采用渐进式提示等。`提示词工程` 的关键在于找到一种能够充分发挥模型潜力的问题表述方式。
39 | 
40 | `提示词工程` 是一种优化和设计 `提示词` 的技术，从而可以更好的应用预训练大语言模型，使其可以更好的解决各种任务。
41 | 
42 | ## 一种新的职业
43 | `提示词工程` 对于任何使用 LLM 的人来说都是一项关键技能。随着越来越多的组织采用 LLM 来实现任务自动化并提高其生产效能，`提示词工程` 更是一项需求量很大的技能和职业。作为一个新兴的领域，`提示词工程` 特别需要创造力和对细节的关注。`提示词工程` 包括但不限于：选择正确的单词、短语、符号和格式，以指导模型生成高质量的、相关的文本。一个好的提示工程师可以通过设计产生所需输出的提示来帮助组织充分利用 LLM。
44 | 
45 | 如果大家已经使用过文心一言来和文心大模型互动，那么对于 `提示词` 应该比较熟悉。我们来看一下如下两个 `提示词` 的结果：
46 | 
47 | ::: {#fig-wx_pe layout-ncol=2}
48 | 
49 | ![请给出描述高兴的成语。](./images/pe_wx_1.jpg){#fig-pe_wx_1}
50 | 
51 | ![请给出5个描述高兴的成语，并给出其解释。](./images/pe_wx_2.jpg){#fig-pe_wx_2}
52 | 
53 | 不同提示词的结果差异
54 | :::
55 | 
56 | :::{.callout-note title="扩展资料"}
57 | 更多关于提示词工程的具体实践方法可以参考：Learn Prompting [^1] 和 Stabel Diffusion 提示词手册 [^2]。
58 | :::
59 | 
60 | [^1]: [Learn Prompting](https://learnprompting.org/zh-Hans/docs/intro)
61 | [^2]: [Stabel Diffusion 提示词手册](https://pan.baidu.com/s/1dciwgbhO-lfKyRo8lOqW9Q?pwd=cm9q)
62 | 


--------------------------------------------------------------------------------
/rag_intro.qmd:
--------------------------------------------------------------------------------
 1 | # RAG {#sec-RAG}
 2 | :::{.callout-tip}
 3 | Retrieval Augment Generation: A LLM that uses an external datastore at test time（not at pre-training time）.
 4 | 
 5 | > 在运行时（而非预训练时），使用外部数据的大语言模型称之为基于检索增强的生成式。
 6 | :::
 7 | 
 8 | ## RAG 基本概念
 9 | 根据 *A Survey on Retrieval-Augmented Text Generation* [@RATGSurvey] 所述：RAG 是深度学习和传统检索技术（Retrieval Technology）的有机结合，在生成式大模型时代，有着以下优势：
10 | 
11 | * 知识库和模型分离，知识不以参数的形式存储在模型中，而是明文存储在数据库中，灵活性更高；
12 | * 文本生成转变为文本总结，生成结果的可信度更高，同时还降低了文本生成的难度；
13 | 
14 | ![RATG 综述研究概览](./images/RATG_overview.jpg){#fig-ratg_overview}
15 | 
16 | 根据 @fig-ratg_overview，RAG 范式有三个重要的组成部分：Retrieval Source，Retrieval Metric，Integration Method。
17 | 
18 | ### RAG 的表示方法
19 | 传统的文本生成方法可以用如下公式表示：
20 | 
21 | $$\boldsymbol{y}=f(\boldsymbol{x})$$ {#eq-RATG_1}
22 | 
23 | 其中，$\boldsymbol{x}$ 代表输入的文本（字符串序列），$f$ 表示模型，$\boldsymbol{y}$ 表示模型输出的文本。
24 | 
25 | RAG 则可以用如下公式表示：
26 | 
27 | $$\boldsymbol{y}=f(\boldsymbol{x}, \boldsymbol{z}), \boldsymbol{z} = \{(\boldsymbol{x}^\gamma, \boldsymbol{y}^\gamma)\}$$ {#eq-RATG_2}
28 | 
29 | 其中，$\boldsymbol{x}$ 代表输入的文本（字符串序列），$\boldsymbol{z}$ 代表知识库，$f$ 表示模型，$\boldsymbol{x}^\gamma$ 表示作为输入文本 $\boldsymbol{x}$ 的检索 *key*，$\boldsymbol{y}^\gamma$ 是与模型输出相关的知识。
30 | 
31 | ### Retrieval Source 类型
32 | * **Training Corpus**：有标注的训练数据直接作为外部知识。
33 | * **External Data**：支持提供训练数据之外的外部知识作为检索来源，比如于任务相关的领域数据，实现模型的快速适应。
34 | * **Unsupervised Data**：前两种知识源都需要一定的人工标注来完善“检索依据-输出”的对齐工作，无监督知识源可以直接支持无标注/对齐的知识作为检索来源。
35 | 
36 | ### Retrieval Metrics 类型
37 | * **Sparse-vector Retrieval**（浅层语义）：针对稀疏向量场景的度量方法，比如TF-IDF, BM25等。
38 | * **Dense-vector Retrieval**（深层语义）：针对稠密向量的度量方法，比如文本相似度。
39 | * **Task-specific Retrieval**：在通用的度量场景下，度量得分高并不能代表召回知识准确，因此有学者提出基于特定任务优化的召回度量方法，提高度量的准确率。
40 | 
41 | ### Integration Method 类型
42 | * **Data Augmentation**：直接拼接用户输入文本和知识文本，然后输入文本生成模型。
43 | * **Attention Mechanisms**：引入额外的Encoder，对用户输入文本和知识文本进行注意力编码后输入文本生成模型。
44 | * **Skeleton Extraction**：前两种方法都是通过文本向量化的隐式方法完成知识重点片段的抽取，Skeleton Extraction方法可以显式地完成类似工作。
45 | 
46 | 在 RAG 模式下，AI 应用发生了新的范式变化，从传统的 `Pre-training` + `Fine-tune` 的模式转换为了 `Pre-training` + `Prompt` 模式。这种模式的转变简化了对于不同任务而言模型训练的工作量，降低了 AI 的开发和使用门槛，同时也使得 `Retriveval` + `Generation` 成为可能。
47 | 
48 | ![RAG 基本架构](./images/RAG_arch.png){#fig-RAG_arch}
49 | 
50 | ## 为什么要使用 RAG
51 | 仅依靠大模型已经可以完成很多任务，`Fine-tune` 也可以起到补充领域知识的作用，为什么 RAG 仍然如此重要呢？
52 | 
53 | * **幻觉问题**：尽管大模型的参数量很大，但和人类的所有知识相比，仍然有非常大的差距。所以，大模型在生成内容时，很有可能会捏造事实，导致如 @sec-hallucination 所述的“幻觉”。因此，对于 LLMs 而言，通过搜索召回相关领域知识来作为特定领域的知识补充是非常必要的。
54 | 
55 | * **语料更新时效性问题**：大模型的训练数据存在时间截止的问题。尽管可以通过 `Fine-tune` 来为大模型加入新的知识，但大模型的的训练成本和时间依然是需要面对的严峻难题：通常需要大量的计算资源，时间也难做到天级别更新。在 RAG 模式下，向量数据库和搜索引擎数据的更新都更加容易，这有助于业务数据的实时性。
56 | 
57 | * **数据泄露问题**：尽管，可以利用 `Fine-tune` 的方式增强 LLM 在特定领域的处理能力。但是，用于 `Fine-tune` 的这些领域知识很可能包含个人或者公司的机密信息，且这些数据很可能通过模型而不经意间泄露出去[^1]。RAG 可以通过增加私有数据存储的方式使得用户的数据更加安全。
58 | 
59 | ## 更多内容
60 | 更详细、深入的内容可以参考如下几篇文章：[-@RATGSurvey]，[-@AugmentedLM]。
61 | 
62 | [^1]: [ChatGPT致三星半导体机密泄漏](https://zhuanlan.zhihu.com/p/619432239)
63 | 


--------------------------------------------------------------------------------
/references.bib:
--------------------------------------------------------------------------------
  1 | @misc{zhao2023survey,
  2 |   title={A Survey of Large Language Models}, 
  3 |   author={Wayne Xin Zhao and Kun Zhou and Junyi Li and Tianyi Tang and Xiaolei Wang and Yupeng Hou and Yingqian Min and Beichen Zhang and Junjie Zhang and Zican Dong and Yifan Du and Chen Yang and Yushuo Chen and Zhipeng Chen and Jinhao Jiang and Ruiyang Ren and Yifan Li and Xinyu Tang and Zikang Liu and Peiyu Liu and Jian-Yun Nie and Ji-Rong Wen},
  4 |   year={2023},
  5 |   eprint={2303.18223},
  6 |   archivePrefix={arXiv},
  7 |   primaryClass={cs.CL}
  8 | }
  9 | 
 10 | @misc{vaswani2023attention,
 11 |   title={Attention Is All You Need}, 
 12 |   author={Ashish Vaswani and Noam Shazeer and Niki Parmar and Jakob Uszkoreit and Llion Jones and Aidan N. Gomez and Lukasz Kaiser and Illia Polosukhin},
 13 |   year={2017},
 14 |   eprint={1706.03762},
 15 |   archivePrefix={arXiv},
 16 |   primaryClass={cs.CL}
 17 | }
 18 | 
 19 | @misc{yang2023harnessing,
 20 |   title={Harnessing the Power of LLMs in Practice: A Survey on ChatGPT and Beyond}, 
 21 |   author={Jingfeng Yang and Hongye Jin and Ruixiang Tang and Xiaotian Han and Qizhang Feng and Haoming Jiang and Bing Yin and Xia Hu},
 22 |   year={2023},
 23 |   eprint={2304.13712},
 24 |   archivePrefix={arXiv},
 25 |   primaryClass={cs.CL}
 26 | }
 27 | 
 28 | @article{radford2018improving,
 29 |   added-at = {2020-07-14T16:37:42.000+0200},
 30 |   author = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
 31 |   biburl = {https://www.bibsonomy.org/bibtex/273ced32c0d4588eb95b6986dc2c8147c/jonaskaiser},
 32 |   interhash = {5c343ed9a31ac52fd17a898f72af228f},
 33 |   intrahash = {73ced32c0d4588eb95b6986dc2c8147c},
 34 |   keywords = {final thema:transformer},
 35 |   timestamp = {2020-07-14T16:49:42.000+0200},
 36 |   title = {Improving language understanding by generative pre-training},
 37 |   year = 2018
 38 | }
 39 | 
 40 | @inproceedings{Radford2019LanguageMA,
 41 |   title={Language Models are Unsupervised Multitask Learners},
 42 |   author={Alec Radford and Jeff Wu and Rewon Child and David Luan and Dario Amodei and Ilya Sutskever},
 43 |   year={2019},
 44 |   url={https://api.semanticscholar.org/CorpusID:160025533}
 45 | }
 46 | 
 47 | @inproceedings{NEURIPS2020_1457c0d6,
 48 |   author = {Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel and Wu, Jeffrey and Winter, Clemens and Hesse, Chris and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
 49 |   booktitle = {Advances in Neural Information Processing Systems},
 50 |   editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
 51 |   pages = {1877--1901},
 52 |   publisher = {Curran Associates, Inc.},
 53 |   title = {Language Models are Few-Shot Learners},
 54 |   url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf},
 55 |   volume = {33},
 56 |   year = {2020}
 57 | }
 58 | 
 59 | @misc{chen2021evaluating,
 60 |   title={Evaluating Large Language Models Trained on Code}, 
 61 |   author={Mark Chen and Jerry Tworek and Heewoo Jun and Qiming Yuan and Henrique Ponde de Oliveira Pinto and Jared Kaplan and Harri Edwards and Yuri Burda and Nicholas Joseph and Greg Brockman and Alex Ray and Raul Puri and Gretchen Krueger and Michael Petrov and Heidy Khlaaf and Girish Sastry and Pamela Mishkin and Brooke Chan and Scott Gray and Nick Ryder and Mikhail Pavlov and Alethea Power and Lukasz Kaiser and Mohammad Bavarian and Clemens Winter and Philippe Tillet and Felipe Petroski Such and Dave Cummings and Matthias Plappert and Fotios Chantzis and Elizabeth Barnes and Ariel Herbert-Voss and William Hebgen Guss and Alex Nichol and Alex Paino and Nikolas Tezak and Jie Tang and Igor Babuschkin and Suchir Balaji and Shantanu Jain and William Saunders and Christopher Hesse and Andrew N. Carr and Jan Leike and Josh Achiam and Vedant Misra and Evan Morikawa and Alec Radford and Matthew Knight and Miles Brundage and Mira Murati and Katie Mayer and Peter Welinder and Bob McGrew and Dario Amodei and Sam McCandlish and Ilya Sutskever and Wojciech Zaremba},
 62 |   year={2021},
 63 |   eprint={2107.03374},
 64 |   archivePrefix={arXiv},
 65 |   primaryClass={cs.LG}
 66 | }
 67 | 
 68 | @misc{ouyang2022training,
 69 |   title={Training language models to follow instructions with human feedback}, 
 70 |   author={Long Ouyang and Jeff Wu and Xu Jiang and Diogo Almeida and Carroll L. Wainwright and Pamela Mishkin and Chong Zhang and Sandhini Agarwal and Katarina Slama and Alex Ray and John Schulman and Jacob Hilton and Fraser Kelton and Luke Miller and Maddie Simens and Amanda Askell and Peter Welinder and Paul Christiano and Jan Leike and Ryan Lowe},
 71 |   year={2022},
 72 |   eprint={2203.02155},
 73 |   archivePrefix={arXiv},
 74 |   primaryClass={cs.CL}
 75 | }
 76 | 
 77 | @article{LLMEvaluationSurvey,
 78 |   title={A Survey on Evaluation of Large Language Models}, 
 79 |   author={Chang, Yupeng and Wang, Xu and Wang, Jindong and Wu, Yuan and Zhu, Kaijie and Chen, Hao and Yang, Linyi and Yi, Xiaoyuan and Wang, Cunxiang and Wang, Yidong and Ye, Wei and Zhang, Yue and Chang, Yi and Yu, Philip S. and Yang, Qiang and Xie, Xing},
 80 |   year={2023},
 81 |   journal={arXiv preprint arXiv:2307.03109},
 82 |   url={https://arxiv.org/abs/2307.03109},
 83 | }
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | @article{LLMInstructionTuningSurvey,
 92 |   title={Instruction Tuning for Large Language Models: A Survey}, 
 93 |   author={Zhang, Shengyu and Dong, Linfeng and Li, Xiaoya and Zhang, Sen and Sun, Xiaofei and Wang, Shuhe and Li, Jiwei and Hu, Runyi and Zhang, Tianwei and Wu, Fei and others},
 94 |   year={2023},
 95 |   journal={arXiv preprint arXiv:2308.10792},
 96 |   url={https://arxiv.org/abs/2308.10792},
 97 | }
 98 | 
 99 | @article{LLMAgentSurvey,
100 |   title   = {The Rise and Potential of Large Language Model Based Agents: A Survey}, 
101 |   author  = {Zhiheng Xi and Wenxiang Chen and Xin Guo and Wei He and Yiwen Ding and Boyang Hong and Ming Zhang and Junzhe Wang and Senjie Jin and Enyu Zhou and Rui Zheng and Xiaoran Fan and Xiao Wang and Limao Xiong and Yuhao Zhou and Weiran Wang and Changhao Jiang and Yicheng Zou and Xiangyang Liu and Zhangyue Yin and Shihan Dou and Rongxiang Weng and Wensen Cheng and Qi Zhang and Wenjuan Qin and Yongyan Zheng and Xipeng Qiu and Xuanjing Huang and Tao Gui},
102 |   year    = {2023},
103 |   journal = {arXiv preprint arXiv:2309.07864},
104 |   url     = {https://arxiv.org/abs/2309.07864},
105 | }
106 | 
107 | @article{RATGSurvey,
108 |   title   = {A Survey on Retrieval-Augmented Text Generation}, 
109 |   author  = {Huayang Li and Yixuan Su and Deng Cai and Yan Wang and Lemao Liu},
110 |   year    = {2022},
111 |   journal = {arXiv preprint arXiv:2202.01110},
112 |   url     = {https://arxiv.org/abs/2202.01110},
113 | }
114 | 
115 | @book{zhang2023dive,
116 |   title     = {Dive into Deep Learning},
117 |   author    = {Zhang, Aston and Lipton, Zachary C. and Li, Mu and Smola, Alexander J.},
118 |   publisher = {Cambridge University Press},
119 |   url       = {https://zh.d2l.ai/index.html},
120 |   note      = {\url{https://D2L.ai}},
121 |   year      = {2023}
122 | }
123 | 
124 | @article{LLMHallucination,
125 |   title   = {Siren's Song in the AI Ocean: A Survey on Hallucination in Large Language Models}, 
126 |   author  = {Zhang, Yue and Li, Yafu and Cui, Leyang and Cai, Deng and Liu, Lemao and Fu, Tingchen and Huang, Xinting and Zhao, Enbo and Zhang, Yu and Chen, Yulong and Wang, Longyue and Luu, Anh Tuan and Bi, Wei and Shi, Freda and Shi, Shuming},
127 |   journal = {arXiv preprint arXiv:2309.01219},
128 |   year    = {2023},
129 |   url     = {https://arxiv.org/abs/2309.01219}
130 | }
131 | 
132 | @article{NLPHallucination,
133 |   doi     = {10.1145/3571730},
134 |   url     = {https://doi.org/10.1145/3571730},
135 |   year    = {2023},
136 |   volume  = {55},
137 |   number  = {12},
138 |   pages   = {1--38},
139 |   author  = {Ziwei Ji and Nayeon Lee and Rita Frieske and Tiezheng Yu and Dan Su and Yan Xu and Etsuko Ishii and Ye Jin Bang and Andrea Madotto and Pascale Fung},
140 |   title   = {Survey of Hallucination in Natural Language Generation},
141 |   journal = {{ACM} Computing Surveys}
142 | }
143 | 
144 | @article{AugmentedLM,
145 |   title         = {Augmented Language Models: a Survey}, 
146 |   author        = {Grégoire Mialon and Roberto Dessì and Maria Lomeli and Christoforos Nalmpantis and Ram Pasunuru and Roberta Raileanu and Baptiste Rozière and Timo Schick and Jane Dwivedi-Yu and Asli Celikyilmaz and Edouard Grave and Yann LeCun and Thomas Scialom},
147 |   year          = {2023},
148 |   eprint        = {2302.07842},
149 |   archivePrefix = {arXiv},
150 |   primaryClass  = {cs.CL},
151 |   url           = {https://arxiv.org/abs/2302.07842},
152 | }
153 | 
154 | @article{liu2023lost,
155 |   title         = {Lost in the Middle: How Language Models Use Long Contexts}, 
156 |   author        = {Nelson F. Liu and Kevin Lin and John Hewitt and Ashwin Paranjape and Michele Bevilacqua and Fabio Petroni and Percy Liang},
157 |   year          = {2023},
158 |   eprint        = {2307.03172},
159 |   archivePrefix = {arXiv},
160 |   primaryClass  = {cs.CL},
161 |   url           = {https://arxiv.org/abs/2307.03172}
162 | }
163 | 
164 | @article{li2023multimodal,
165 |   title={Multimodal Foundation Models: From Specialists to General-Purpose Assistants}, 
166 |   author={Chunyuan Li and Zhe Gan and Zhengyuan Yang and Jianwei Yang and Linjie Li and Lijuan Wang and Jianfeng Gao},
167 |   year={2023},
168 |   eprint={2309.10020},
169 |   archivePrefix={arXiv},
170 |   primaryClass={cs.CV},
171 |   url={https://arxiv.org/abs/2309.10020},
172 | }
173 | 
174 | @article{yang2023dawn,
175 |   title={The Dawn of LMMs: Preliminary Explorations with GPT-4V(ision)}, 
176 |   author={Zhengyuan Yang and Linjie Li and Kevin Lin and Jianfeng Wang and Chung-Ching Lin and Zicheng Liu and Lijuan Wang},
177 |   year={2023},
178 |   eprint={2309.17421},
179 |   archivePrefix={arXiv},
180 |   primaryClass={cs.CV},
181 |   url={https://arxiv.org/abs/2309.17421}
182 | }
183 | 
184 | @article{yao2022react,
185 |   title={ReAct: Synergizing Reasoning and Acting in Language Models},
186 |   author={Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
187 |   journal={arXiv preprint arXiv:2210.03629},
188 |   year={2022},
189 |   url={https://arxiv.org/abs/2210.03629}
190 | }
191 | 
192 | @article{karpas2022mrkl,
193 |   title={MRKL Systems: A modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning}, 
194 |   author={Ehud Karpas and Omri Abend and Yonatan Belinkov and Barak Lenz and Opher Lieber and Nir Ratner and Yoav Shoham and Hofit Bata and Yoav Levine and Kevin Leyton-Brown and Dor Muhlgay and Noam Rozen and Erez Schwartz and Gal Shachaf and Shai Shalev-Shwartz and Amnon Shashua and Moshe Tenenholtz},
195 |   year={2022},
196 |   eprint={2205.00445},
197 |   archivePrefix={arXiv},
198 |   primaryClass={cs.CL}
199 | }
200 | 
201 | @misc{wang2023planandsolve,
202 |   title={Plan-and-Solve Prompting: Improving Zero-Shot Chain-of-Thought Reasoning by Large Language Models}, 
203 |   author={Lei Wang and Wanyu Xu and Yihuai Lan and Zhiqiang Hu and Yunshi Lan and Roy Ka-Wei Lee and Ee-Peng Lim},
204 |   year={2023},
205 |   eprint={2305.04091},
206 |   archivePrefix={arXiv},
207 |   primaryClass={cs.CL}
208 | }
209 | 
210 | @Inbook{multiagentintro,
211 |   author="Balaji, P. G.
212 |   and Srinivasan, D.",
213 |   editor="Srinivasan, Dipti
214 |   and Jain, Lakhmi C.",
215 |   title="An Introduction to Multi-Agent Systems",
216 |   bookTitle="Innovations in Multi-Agent Systems and Applications - 1",
217 |   year="2010",
218 |   publisher="Springer Berlin Heidelberg",
219 |   address="Berlin, Heidelberg",
220 |   pages="1--27",
221 |   abstract="Multi-agent systems is a subfield of Distributed Artificial Intelligence that has experienced rapid growth because of the flexibility and the intelligence available solve distributed problems. In this chapter, a brief survey of multi-agent systems has been presented. These encompass different attributes such as architecture, communication, coordination strategies, decision making and learning abilities. The goal of this chapter is to provide a quick reference to assist in the design of multi-agent systems and to highlight the merit and demerits of the existing methods.",
222 |   isbn="978-3-642-14435-6",
223 |   doi="10.1007/978-3-642-14435-6_1",
224 |   url="https://doi.org/10.1007/978-3-642-14435-6_1"
225 | }
226 | 
227 | @misc{talebirad2023multiagent,
228 |   title={Multi-Agent Collaboration: Harnessing the Power of Intelligent LLM Agents}, 
229 |   author={Yashar Talebirad and Amirhossein Nadiri},
230 |   year={2023},
231 |   eprint={2306.03314},
232 |   archivePrefix={arXiv},
233 |   primaryClass={cs.AI}
234 | }
235 | 
236 | @misc{sennrich2016neural,
237 |   title={Neural Machine Translation of Rare Words with Subword Units}, 
238 |   author={Rico Sennrich and Barry Haddow and Alexandra Birch},
239 |   year={2016},
240 |   eprint={1508.07909},
241 |   archivePrefix={arXiv},
242 |   primaryClass={cs.CL}
243 | }
244 | 
245 | @misc{wang2019neural,
246 |   title={Neural Machine Translation with Byte-Level Subwords}, 
247 |   author={Changhan Wang and Kyunghyun Cho and Jiatao Gu},
248 |   year={2019},
249 |   eprint={1909.03341},
250 |   archivePrefix={arXiv},
251 |   primaryClass={cs.CL}
252 | }
253 | 
254 | @misc{pub37842,
255 |   title = {Japanese and Korean Voice Search},
256 |   author = {Mike Schuster and Kaisuke Nakajima},
257 |   year = {2012},
258 |   booktitle = {International Conference on Acoustics, Speech and Signal Processing},
259 |   pages = {5149--5152},
260 |   url = {https://research.google/pubs/pub37842/},
261 | }
262 | 
263 | @misc{kudo2018subword,
264 |   title={Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates}, 
265 |   author={Taku Kudo},
266 |   year={2018},
267 |   eprint={1804.10959},
268 |   archivePrefix={arXiv},
269 |   primaryClass={cs.CL}
270 | }
271 | @onlilne{LLMTutorial,
272 |   title={A Tutorial on LLM},
273 |   author={Haifeng Li},
274 |   year={2023},
275 |   url={https://medium.com/@haifengl/a-tutorial-to-llm-f78dd4e82efc},
276 | }
277 | 
278 | @online{yao2022react_online,
279 |   title={ReAct: Synergizing Reasoning and Acting in Language Models},
280 |   author={Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and Shafran, Izhak and Narasimhan, Karthik and Cao, Yuan},
281 |   year={2022},
282 |   url={https://react-lm.github.io/},
283 | }
284 | 
285 | @online{tsne_online,
286 |   title={Introduction to t-SNE},
287 |   author={Abid Ali Awan},
288 |   year={2023},
289 |   url={https://www.datacamp.com/tutorial/introduction-t-sne},
290 | }
291 | 
292 | @online{ms_prompt_engineer,
293 |   title={What are Prompts?},
294 |   year={2023},
295 |   url={https://learn.microsoft.com/en-us/semantic-kernel/prompt-engineering/},
296 | }
297 | 


--------------------------------------------------------------------------------
/references.qmd:
--------------------------------------------------------------------------------
1 | # References {.unnumbered}
2 | 
3 | ::: {#refs}
4 | :::
5 | 


--------------------------------------------------------------------------------
/semantickernel_intro.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | filters:
  3 |    - include-code-files
  4 | code-annotations: below
  5 | ---
  6 | 
  7 | # Semantic Kernel 简介 {#sec-SK}
  8 | 
  9 | 和 @sec-LC_intro 中介绍的 LangChain 一样，Semantic Kernel 也是一种便于使用 LLM 开发应用的框架，如果特殊说明，当提到 SK 的时候，我们一般说的就是 Semantic Kernel 框架。使用 SK，我们可以更加轻松的将传统的编程语言和 LLM 技术结合在一起，并使用 SK 中提供的开箱即用的各种组件，更加便利的开发我们的 AI 原生应用。
 10 | 
 11 | 自 从 2023 年 3 月在 GitHub 开源以来，SK 已经获得了 [15.9K](https://github.com/microsoft/semantic-kernel) 的 Starred，虽然比 [LangChain](https://github.com/langchain-ai/langchain) 的 Starred 少，但是也足以看出 SK 在社区的流行度。从 @fig-code_freq_sdk 也可以看出，SK 的代码改动频率持续维持在相对较高的水平，这也表明 SK 的能力也在不断完善。
 12 | 
 13 | ::: {#fig-code_freq_sdk layout-ncol=2}
 14 | 
 15 | ![LangChain 的代码提交频率](./images/code_freq_lc.jpg){#fig-code_freq_lc}
 16 | 
 17 | ![Semantic Kernel 的代码提交频率](./images/code_freq_sk.jpg){#fig-code_freq_sk}
 18 | 
 19 | LangChain 和 Semantic Kernel 的代码提交频率
 20 | :::
 21 | 
 22 | 在 2023 年 5 月 举办的 [Microsoft Build 2023](https://news.microsoft.com/build-2023/) 大会上，微软 CTO——Kevin Scott 做了题为 [The era of the AI Copilot](https://build.microsoft.com/en-US/sessions/bb8f9d99-0c47-404f-8212-a85fffd3a59d?source=/speakers/ef864919-5fd1-4215-b611-61035a19db6b) 的分享，在这次分享中，Kevin Scott 介绍了微软如何通过大模型技术堆栈（@fig-copilot_stack）来实现 AI Copilot 产品，并介绍了处于该技术堆栈中心的 AI 编排层，AI 编排层可以将 大模型和插件集成在一起，为用户创造全新的体验。
 23 | 
 24 | ::: {#fig-copilot_stack layout-nrow=2}
 25 | 
 26 | ![Microsoft's Copilot Stack](./images/copilot_stack_1.png){#fig-copilot_stack_1}
 27 | 
 28 | ![Copilot Stack 简图](./images/copilot_stack_2.png){#fig-copilot_stack_2}
 29 | 
 30 | Kevin Scott 分享中提到的 Microsoft Copilot 技术堆栈
 31 | 
 32 | :::
 33 | 
 34 | 而在微软的内部，Kevin Scott 提到的 AI 编排层指的就是 Semantic kernel。因此，虽然 SK 还比较年轻，但是他已经在微软的明星产品（Bing、Microsoft Copilot……）中发挥着重要的作用。
 35 | 
 36 | 如 @fig-sk_application_process，我们也可以利用 SK，非常方便的让我们的应用也具备大模型 AI 的能力。
 37 | 
 38 | ![利用 SK 把大模型能力加入到应用的过程](./images/sk_application_process.jpg){#fig-sk_application_process}
 39 | 
 40 | 当然，LangChain 的 联合创始人 & CEO 也受邀参加了此次 Microsoft Build 大会，Kevin Scott 也在分享中提到（分享的第 28:20 处）：在 AI 编排层，LangChain 也是非常优秀的开源框架之一。
 41 | 
 42 | :::{.callout-tip}
 43 | 在我看来，Semantic 和 LangChain 就好像是 [Vim](https://www.vim.org/) 和 [Emacs](https://www.gnu.org/software/emacs/) 一样，两者都是年轻又优秀的 AI 编排框架。但是个人认为，从应用开发的角度上讲，SK 对于应用开发更为友好，比如：提示词和代码的分离，基于 Planner 概念的 Agent 设计等。
 44 | 
 45 | 关于 Semantic 和 LangChain 的区别，我们后续会再详细介绍。
 46 | :::
 47 | 
 48 | ## SK 中的五大概念
 49 | LangChain 框架把整个编排的过程抽象出了 Chian 的概念，而为了更好的对 AI 能力进行编排，SK 抽象出了五大概念，因此从这个角度讲，了解 SK 的成本还是比 LangChain 要大一点的。但是，我认为，SK 从另外的视角给出了不同的看法，深入的了解不同框架的不同思想，对于我们后期的应用设计而言，也是有非常大的帮助的。因此，画一些时间来了解 SK 的相关概念，是一件非常值得的事情。
 50 | 
 51 | | 概念 | 含义 |
 52 | | --------- | --------- |
 53 | | Kernel | Kernel 会把用户的问题编排为一个待解决的任务 |
 54 | | Plugins | 我们定制的、可用于解决某类任务的提示词、外部工具都会以 Plugin 的形式注册到 Kernel 中，以供 Planner 调用|
 55 | | Planner | Planner 会根据当前 Kernel 可用的资源对任务拆解为多个步骤，并且逐步执行 |
 56 | | Memories | Memory 主要用户保存和大模型交互的历史信息 |
 57 | | Connectors | 可以访问外部数据的工具称之为 Connector |
 58 | 
 59 | : SK 中的五大概念 {#tbl-sk_5_concepts}
 60 | 
 61 | :::{.callout-note title="SK SDK 的语言版本"}
 62 | 为了保持一致，对于 Semantic Kernl 的示例代码，我们会统一采用 Python 版本。当然，SK 目前也支持 [C# 和 Java](https://learn.microsoft.com/en-us/semantic-kernel/get-started/supported-languages)，您可以根据自己的需要来选择语言版本。
 63 | 
 64 | 对于 Python 版本，我们使用 `python -m pip install semantic-kernel` 来安装 SK SDK。
 65 | :::
 66 | 
 67 | ## Kernel
 68 | 我们可以用 @lst-sk_init_kernel 所示的代码来初始化一个 `Kernl` 实例。是的，就是这么简单。
 69 | 
 70 | ```{#lst-sk_init_kernel .python code-line-numbers="true" lst-cap="初始化 Kernel 实例"}
 71 | import semantic_kernel as sk
 72 | kernel = sk.Kernel()
 73 | ```
 74 | 
 75 | 正如它的名字一样，`Kernel` 本身已经暗示着它在整个框架中的重要地位。我能脱口而出的另一个比较重要的 Kernl 就是 Linux Kernel 了。如 @fig-sk_kernel，与 Linxu Kernel 类似，SK 中的 `Kernl` 负责管理运行 AI 应用所需的资源，例如：所需要的大模型，服务，插件……
 76 | 
 77 | ![Kernl 在 AI 应用中的位置](./images/sk_kernel.png){#fig-sk_kernel}
 78 | 
 79 | 为了实现 AI 应用的运行，我们会在 `Kernel` 中做某些必要的配置，同时我们还将在 `Kernel` 中注册所有可能用到的 `Connector` 和 `Plugin`，`Kernel` 会根据我们的配置来控制构造提示词、调用大模型、返回应用结果……等各个环节。
 80 | 
 81 | 虽然这里提前讲到了很多其他概念，但是也不用过分担心，本章接下来的部分会一步一步的对 @fig-sk_kernel 中的名词进行解释，并最终给出一个可运行的示例。
 82 | 
 83 | ## Plugins
 84 | 在介绍 `Plugin` 之前，我们有必要先介绍 `Function` 的概念。在 SK 中，`Function`（`SKFunctionBase`） 指代了所有的能力，这真是一个非常巧妙的设计理念。
 85 | 
 86 | 例如，我们可以使用如下的提示词让大模型帮助我们生成藏头诗：
 87 | 
 88 | ```{#lst-sk_prompt_function_poem .bash code-line-numbers="true" lst-cap="藏头诗的提示词"}
 89 | 写一首包含 {topic} 的藏头诗
 90 | ```
 91 | 
 92 | 例如，利用文心大模型，我们可以使用 @lst-sk_prompt_function_poem 生成各种类型的藏头诗。
 93 | 
 94 | ![利用大模型生成藏头诗](./images/waizg.jpg){#fig-waizg}
 95 | 
 96 | 在 SK 中，该 @lst-sk_prompt_function_poem 就是一个 `AcrosticPoetryFunction`，是一种可以生成藏头诗的能力。仔细想想，确实如此，不同的提示词不就是不同的能力吗？
 97 | 
 98 | 在 @lst-sk_init_kernel 的基础上，我们为 `Kernel` 增加编写藏头诗的能力。
 99 | 
100 | ```{#lst-sk_add_function_for_kernel .python include="./code/test_sk_acrosticpoetry.py" code-line-numbers="true" lst-cap="Prompt & Function"}
101 | ```
102 | 
103 | 1. 构造提示词
104 | 2. 初始化 `Kernel`
105 | 3. 为 `Kernel` 选择大模型
106 | 4. 根据提示词生成 `AcrosticPoetryFunction`
107 | 
108 | 无论是由提示词和大模型驱动的能力（Semantic Function），还是由类似 `def add(a:int, b:int) -> int` 这种原生函数（Native Function）[^1]驱动的能力，在 SK 中，统称为 `Function`。当然，为了能够让 Native Function 具备 `Function` 的作用，需要使用 `@sk_function` 装饰器为其增加函数功能相关的语义描述。
109 | 
110 | * **Semantic Function**：使用自然语言来和用户交互，并将用户的请求以提示词的形式提交给大模型，并将大模型返回的自然语言形式的结果返回给用户。
111 | * **Native Function**：用 Python 编写的函数，用于处理 LLM 不擅长的事情，例如：数学运算、I/O 处理、访问 REST API、……
112 | 
113 | :::{.callout-tip}
114 | 我认为，`Function` 的这种抽象和设计真的非常经典。
115 | :::
116 | 
117 | 在 SK 中，`Plugin` 就是一系列 `Function` 的集合。可以利用 SK 暴漏出来的各种 API 为 `Kernel` 增加能力支持，而大模型可以根据 `Function` 的语义描述来选择合适的能力以解决用户的提问。
118 | 
119 | ### Planner
120 | `Planner` 的概念有点类似 @sec-lc_react 中介绍的基于 ReAct 模式的 LangChain Agent，但是和 LangChain Agent 的概念又不完全一致。
121 | 
122 | `Planner` 可以接受用户请求并返回完成用户请求的步骤，`Planner` 通过 LLMs 来分析 `Kernel` 中注册的插件以及用户请求，然后根据任务目标把插件重新组合成一系列步骤，最终通过执行这一些列步骤的来完成用户请求。因此，`Planner` 更类似 @sec-agent_pae 中介绍的 PlanAndExecute 模式的 Agent。
123 | 
124 | 和 LangChain Agent 相比，`Planner` 的优势在于我们可以通过其拆解的步骤来评估大模型的能力，而不需要像 LangChain Agent 那样得等到运行结束才知道大模型是否解决的我们的请求。对于大模型调优以及稳定性而言，`Planner` 的方式会更胜一筹。
125 | 
126 | SK 提供了如下的预定义的 `Planner`：
127 | 
128 | * `SequentialPlanner`：创建具有多 `Function` 的计划，并通过这些 `Function` 的输入和输出链接起来。
129 | * `ActionPlanner`：创建具有单个 `Function` 的计划。
130 | * `StepwisePlanner`：逐步执行每一步，在执行下一步之前会观察当前步骤的执行结果，类似 ReAct 模式的 Agent。
131 | 
132 | 微软的官方文档 [Automatically orchestrate AI with planners](https://learn.microsoft.com/en-us/semantic-kernel/agents/planners/?tabs=python) 中不建议使用如上三种类型的，更推荐使用最新的 `Planner`：Handlebars calling stepwise 以及 Function calling stepwise。
133 | 
134 | :::{.callout-note}
135 | 从 Python 版本的代码看，Handlebars calling stepwise 以及 Function calling stepwise 这两种类型的 `Planner` 还未实现。
136 | :::
137 | 
138 | 我们可以使用 `Planner` 来解决 @lst-sk_planner_math_question 所示的问题。
139 | 
140 | ```{#lst-sk_planner_math_question .bash lst-cap="一个需要解决的数学问题"}
141 | If my investment of 2130.23 dollars increased by 23%, how much would I have after I spent $5 on a latte?
142 | ```
143 | 
144 | ```{#lst-sk_planner_math_problem .python include="./code/test_sk_planner.py" code-line-numbers="true" lst-cap="使用 Planner 解决复杂的数学问题"}
145 | ```
146 | 
147 | 1. 为 Kernel 增加数学计算能力
148 | 2. 创建 SequentialPlanner 类型的 `Planner`
149 | 3. 为任务生成拆解步骤和计划
150 | 4. 逐步执行第 3 步生成的计划
151 | 
152 | @lst-sk_planner_math_problem 会为 @lst-sk_planner_math_question 生成如下的计算步骤，并且会最终给出正确的结果：2615.1829。
153 | 
154 | ```{#lst-sk_planner_math_problem_step .xml code-line-numbers="true" lst-cap="SK 对复杂问题的拆解计划"}
155 | <plan>
156 |     <!-- Calculate the increased amount after the investment -->
157 |     <function.MathPlugin.Multiply input="2130.23" number2="1.23" setContextVariable="INCREASED_AMOUNT"/>
158 |     <!-- Calculate the remaining amount after spending on a latte -->
159 |     <function.MathPlugin.Subtract input="$INCREASED_AMOUNT" number2="5" appendToResult="RESULT__FINAL_AMOUNT"/>
160 | </plan>
161 | ```
162 | 
163 | ## Memories
164 | 很多时候，应用会和大模型进行多轮的交互以解决用户的问题，例如和日期相关的任务：
165 | * 今天是几号？
166 | * 今天有什么重要的事情发生吗？
167 | * 历史上的今天都有哪些重要的事情发生呢？
168 | 
169 | 对于这种场景，我们希望能够把历史对话结果作为背景信息一并提供给大模型，以便大模型能够给出更好的结果，这种提供历史信息的能力就称之为 `Memories`。在 SK 中，`Memories` 的概念和 LangChain 是一致的，并没有太多的区别。
170 | 
171 | SK 提供了三种 `Memories` 方式：
172 | 
173 | * **传统的 KV 键值对方式**：存储方式和搜索方式与环境变量一致，这意味着待搜索的 `Key` 与用户输入的文本之间必须存在严格匹配的关系。
174 | * **传统的本地存储方式**：当我们有大量的 KV 信息时，最好将其保存在磁盘上，此时就是本地存储的方式。
175 | * **语义检索方式**：采用 @sec-embedding_intro 中的 `embedding` 方式进行信息的存储和检索，和 @sec-LC_RAG_vector 中介绍的基于 LangChain 和 VectorDB 实现的 RAG 类似。
176 | 
177 | ## Connectors
178 | 如 @fig-sk_application_process，在 SK 中，`Connectors` 是不同组件之间的桥梁，使得 SK 中的不同组件可以交换彼此的信息，因此，`Connectors` 有着非常重要的作用。`Connectors` 类似 Linux 中的 `pipe` 的概念，利用 `pipe`，我们可以把各种不同的命令组合起来，以实现更加强大的能力。
179 | 
180 | `Connectors` 可以用于和外部系统交互——例如与 HuggingFace 模型交互，也可以用于和外部数据交互——例如与 SQLite 交互以使其作为 `Memories`。
181 | 
182 | SK 提供了很多预定义的 `Connectors`，这些预定义的 `Connectors` 主要用于两大领域：大模型集成，外部数据集成，具体可以参考 [semantic_kernel/connectors](https://github.com/microsoft/semantic-kernel/tree/main/python/semantic_kernel/connectors) 的代码实现。
183 | 
184 | [^1]: 原生函数主要指的是用对应的编程语言编写的函数，这里主要是用来区别于 SK 中的 `Function` 的抽象概念。
185 | 


--------------------------------------------------------------------------------
/semantickernel_plugins.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # SK 中的 Plugin
8 | 
9 | 


--------------------------------------------------------------------------------
/semantickernel_prompt.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # SK 中的 Prompt 
8 | 


--------------------------------------------------------------------------------
/semantickernel_promptflow.qmd:
--------------------------------------------------------------------------------
1 | ---
2 | filters:
3 |    - include-code-files
4 | code-annotations: below
5 | ---
6 | 
7 | # Prompt flow 工具 


--------------------------------------------------------------------------------
/sft.qmd:
--------------------------------------------------------------------------------
 1 | # 微调 {#sec-sft}
 2 | 对于 ChatGPT 或者 文心 这些大模型而言，对其进行预训练的成本非常大，整个训练需要数千个 GPU 并行处理才可以解决海量数据的算力问题。而仅 GPU 的成本就可能就高达数百万美元。根据 OpenAI 的 GPT-3 技术概述，每次训练至少需要价值 500 万美元的 GPU。在 7 月份的麻省理工学院活动中，当被问及训练基础模型的成本是否在 5000 万至 1 亿美元之间时，OpenAI 的联合创始人 Sam Altman 回答说，这“不止于此”，而且越来越贵。据估计，2023 年 1 月，ChatGPT 使用了近 30000 个 GPU 来处理数亿的日常用户请求。华盛顿大学电气和计算机工程助理教授 Sajjad Moazeni 表示，这些查询每天可能消耗约 1 GWh，相当于约 3.3 万个美国家庭的日常能源消耗。[^1]
 3 | 
 4 | 基于对成本的考虑，因此，预训练一个大模型并非是人人都可以做到的事情。
 5 | 
 6 | 微调（SFT：Supervised Fine Tuning）是一种机器学习技术，涉及对预训练模型进行小的调整，以提高其在特定任务中的性能。因为模型已经对世界有了很好的了解，并且可以利用这些知识更快地学习新任务，因此微调比从头开始训练模型更有效，通常也会产生更好的结果。从成本的角度看，微调的成本也会比预训练大模型的成本要低的多。虽然微调也需要成本，但我们基本可以承担这些成本。
 7 | 
 8 | ## 为什么微调
 9 | 除了成本因素之外，微调在机器学习中具有如此重要意义的原因还包括：
10 | 
11 | * 数据效率：微调允许使用有限的特定任务数据进行有效的模型自适应。可以使用现有的预先训练模型，并根据任务对其进行优化，而不是收集和标注新的数据集。因此，从数据处理效率层面而言，微调会节省更多的时间和资源。
12 | * 时间效率：从头开始训练模型需要很长时间，而因为微调是从模型已经学习的特征开始，因此减少了收敛所需的时间，进而加快了训练的过程，提升了训练的效率。
13 | * 知识迁移：预训练模型在最初的训练中已经从大量数据集中学习到了有价值的特征和模式。微调可以将所获得的知识转移到特定任务中，微调可以使预训练模型在特定任务上有一种增强的效果。
14 | * 专业化：微调可以允许我们自定义一个模型，使其在特定任务中表现更加出色。通过调整模型的设置，可以创建一个在特定情况下非常有效的工具。
15 | 
16 | ## 何时微调
17 | 虽然微调的成本比从头预训练大模型的成本要小的多的多，但是对模型进行微调仍然需要我们投入时间和精力。微调不是没有成本，只是和预训练大模型相比成本小而已。
18 | 
19 | 因此，在准备微调之前，我们最好先尝试通过提示工程（@sec-prompt_engineering）、RAG（@sec-RAG）、或通过类似 Agent （@sec-agent）的函数调用来获得更好的结果。
20 | 
21 | :::{.callout-note}
22 | 并非所有的大模型都支持函数调用，目前 OpenAI 的 GPT 是支持函数调用的，所以如果使用 GPT，则可以直接使用函数调用 API 来实现函数调用。
23 | 
24 | 对于不具备函数调用能力的大模型，可以考虑通过 Agent 的方式（例如使用 （@sec-lc_react）的 LangChain Agent 能力）来调用外部工具或函数。
25 | :::
26 | 
27 | 在准备微调之前，我们需要进行仔细的分析和考虑：
28 | 
29 | * 模型在许多任务上可能最初表现不佳，但使用正确的提示词可以改善结果，此时可能不需要微调。
30 | * 迭代微调需要创建数据集并运行训练任务，因此迭代提示词比迭代微调快得多。
31 | 
32 | ## 如何微调
33 | 不同平台的微调方式各不相同，所以微调的方式需要参考具体使用的平台：
34 | 
35 | * 百度文心大模型微调方式：[SFT 文档](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/qlgujhcpo)
36 | * GPT 大模型微调方式：[Fine-tuning 文档](https://platform.openai.com/docs/guides/fine-tuning)
37 | 
38 | 
39 | [^1]: [What Large Models Cost You – There Is No Free AI Lunch](https://www.forbes.com/sites/craigsmith/2023/09/08/what-large-models-cost-you--there-is-no-free-ai-lunch/)


--------------------------------------------------------------------------------
/theme.scss:
--------------------------------------------------------------------------------
 1 | /*-- scss:defaults --*/
 2 | $link-color: #39729E;
 3 | $text-muted: #6a737b;
 4 | 
 5 | /*-- scss:rules --*/
 6 | 
 7 | .layout-example {
 8 |   background: $gray-500;
 9 |   color: $white;
10 |   text-align: center;
11 |   margin-bottom: 1em;
12 |   font-family: $font-family-monospace;
13 |   font-size: .875em;
14 |   font-weight: 600;
15 |   padding-top: 1em;
16 |   border-radius: 3px;
17 | }
18 | 
19 | .left {
20 |   text-align: left;
21 |   padding-left: 1em;
22 | }
23 | 
24 | .right {
25 |   text-align: right;
26 |   padding-right: 1em;
27 | }
28 | 
29 | .hello-quarto-banner h1 {
30 |   margin-top: 0;
31 |   margin-bottom: 0.5rem;
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/tokens.qmd:
--------------------------------------------------------------------------------
  1 | # Tokens
  2 | 
  3 | Tokens（分词） 是 LLM AI 处理文本或代码的基本单位。Tokens 可以是字符（characters）、单词（words）、子单词（subwords）或其他文本段落（segments of text）或代码段落（segments of code），tokens 的具体内容取决于所选的 token 化（tokenization）算法和方法。Tokenization 算法和 tokenizer 是 LLM 的基础组件。
  4 | 
  5 | 在 token 化过程中，我们会给 token 分配一个数值 ID 对该 token 进行标记，模型的处理过程中处理的实际上是这些 token 的数值化的 ID 标记。
  6 | 
  7 | ## Tokenizer 工具
  8 | 我们可以使用 OpenAI 提供的在线 [Tokenizer Tool](https://platform.openai.com/tokenizer) 来加深对 token 的理解和认识。
  9 | 
 10 | ![待 token 化的原始文本](./images/token_openai_demo_1.jpg){#fig-token_openai_1}
 11 | 
 12 | ::: {#fig-elephants layout-ncol=2}
 13 | 
 14 | ![token 化结果](./images/token_openai_demo_2.jpg){#fig-token_openai_2}
 15 | 
 16 | ![各 token 的 ID](./images/token_openai_demo_3.jpg){#fig-token_openai_3}
 17 | 
 18 | OpenAI GTP-4 tokenization 结果
 19 | :::
 20 | 
 21 | 在 OpenAI 中，一个 token 大概约等于 4 个英文字母的长度，换算一下的话，大概约是 $\frac{3}{4}$ 个单词。
 22 | 
 23 | 当然，OpenAI 也支持对中文的 tokenization 处理。 
 24 | 
 25 | ![待 token 化的中文原始文本](./images/token_openai_demo_c_1.jpg){#fig-token_openai_c1}
 26 | 
 27 | ::: {#fig-open_token layout-ncol=2}
 28 | 
 29 | ![中文 token 化结果](./images/token_openai_demo_c_2.jpg){#fig-token_openai_c2}
 30 | 
 31 | ![各中文 token 的 ID](./images/token_openai_demo_c_3.jpg){#fig-token_openai_c3}
 32 | 
 33 | 利用 OpenAI GTP-4 对中文进行 tokenization 的结果
 34 | :::
 35 | 
 36 | :::{.callout-warning}
 37 | 在 @fig-token_openai_c2 中，我们会发现有乱码出现，这主要是因为部分中文会包含一个或多个映射到多个 token 的 unicode 字符。在线化工具会以非标准的方式显示每个 token 中的字节。
 38 | :::
 39 | 
 40 | 从 @fig-token_openai_c2 中，我们也会发现，对中文的 tokenization 有其独特性——并非将每个汉字都处理为一个 token，有时候一个 token 可能是一个词。例如，”北京“ 在 tokenization 之后是一个 token，其 ID 为 70090。
 41 | 
 42 | 如果想在代码中使用 OpenAI 的 tokenizer 工具进行 token 化处理，可以使用如下的库：
 43 | 
 44 | * Python：[tiktoken](https://github.com/openai/tiktoken)
 45 | * JavaScript: [dqbd/tiktoken](https://github.com/dqbd/tiktoken)
 46 | 
 47 | 对于文心大模型而言，我们可以使用 [千帆Token计算器](https://console.bce.baidu.com/tools/#/tokenizer) 来计算输入文本的 token 数量。
 48 | 
 49 | ![千帆Token计算器](./images/wenxi_tokenizer.jpg){#fig-wenxin_tokenizer}
 50 | 
 51 | ## Tokenization 方式
 52 | 把输入/输出文本拆分为 LLM AI 模型可以处理的、更小单元的过程，我们称之为：**Token 化**。如前所述，token 可以是单词、字符、子单词或符号。文本 Token 化之后，模型可以在 token 的基础上处理不同的语言、词汇和格式，并降低处理过程的计算成本和资源成本。Token 化还可以通过影响 token 的含义和上下文来影响生成文本的质量和多样性。
 53 | 
 54 | 目前主要有三种主流的 tokenization 算法：BPE，WordPiece，Unigram Language Model。
 55 | 
 56 | ### BPE
 57 | BPE（Byte Pair Encoding）最早是一种数据压缩算法，其思想是将经常一起出现的数据对替换为不在数据串中的其他字符，然后再通过一个 merge 表来恢复原始数据。
 58 | 
 59 | 在 2015 年，[-@sennrich2016neural] 把该算法引入到 NLP 领域。2019 年，[-@wang2019neural] 又提出了 BBPE（Byte-Level BPE）算法，将 BPE 的思想从字符级别扩展到字节级别。
 60 | 
 61 | OpenAI 所采用的 tokenization 算法就是 BPE 算法。BPE 可以帮助模型处理罕见的、或者看不见的单词，并创建更紧凑和一致的文本表示。BPE 还允许模型通过组合现有单词或 token 来生成新单词或 token。词汇量越大，模型生成的文本就越多样化和富有表现力。然而，词汇表越大，模型需要的内存和计算资源就越多。因此，词汇大小的选择取决于模型的质量和效率之间的权衡。
 62 | 
 63 | ### WordPiece
 64 | [-@pub37842] 提出了用于解决日语和韩语语音问题的 WordPiece。与BPE 类似，WordPiece 也是从一个基础小词表出发，通过不断合并来产生最终的词表。
 65 | 
 66 | WordPiece 与 BPE 的主要的差别在于，BPE 按频率来选择合并的 token 对，而 WordPiece 按 token 间的互信息[^1]来进行合并。WordPiece 可以较好的平衡词表大小和 OOV[^2] 问题，但是可能会产生不太合理的、错误的切分，并且 WordPeice 对拼写错误非常敏感，同时其对前缀的支持也不够好。
 67 | 
 68 | ### Unigram Language Model
 69 | [-@kudo2018subword] 提出了 Unigram Language Model，其核心思想就是先初始化一个大词表，然后通过 unigram 语言模型计算删除不同 subword 造成的损失来代表 subword 的重要性，最后保留 loss 较大或者说重要性较高的 subword。
 70 | 
 71 | ULM 是一种基于语言模型的分词算法，这种语言模型可以给多种分词结果赋予概率，从而可以学到其中的噪声，其使用的训练算法可以利用所有可能的分词结果。但是，ULM 的效果与初始词表息息相关，初始词表的好还会影响到最终的结果。
 72 | 
 73 | ## Token 与模型成本之间的关系
 74 | Tokenization 会影响 LLM 需要处理的数据量和计算次数。LLM 需要处理的 token 越多，模型消耗的内存和计算资源就越多。
 75 | 
 76 | 因此，运行 LLM 的成本取决于：
 77 | 
 78 | 1. tokenization 采用的算法和模型所使用的词汇表的大小
 79 | 2. 输入/输出文本的长度和复杂性
 80 | 
 81 | 因此，对于不同的模型[^3]，与模型交互时所使用的 token 数量的不同，最终所花费的成本也不同。对于 OpenAI 而言，GTP4 的费用是 GTP3 的 10 倍，对于 GPT4 而言，32K 上下文模型的费用是 4K 上下文模型费用的 2 倍[^4]。百度的文心 4.0 大模型的费用则是之前版本的 15 倍[^5]。
 82 | 
 83 | * 对于 OpenAI 的 gpt-3.5-turbo-16k 模型而言，每 1024 个输入 token 的费用为 0.003\$，每 1024 个输出 token 的费用为 0.004\$。
 84 | * 对于 OpenAI 的 gpt-4 模型而言，每 1024 个输入 token 的费用为 0.03\$，每 1024 个输出 token 的费用为 0.06\$。
 85 | * 对于 OpenAI 的 gpt-4-32k 模型而言，每 1024 个输入 token 的费用为 0.06\$，每 1024 个输出 token 的费用为 0.12\$。
 86 | * 对于百度的 ERNIE-Bot-turbo 模型而言，每 1000 个输入 token 的费用为 0.008￥，每 1000 个输出 token 的费用为 0.008￥。
 87 | * 对于百度的 ERNIE-Bot 4.0 模型而言，每 1000 个输入 token 的费用为 0.12￥，每 1000 个输出 token 的费用为 0.12￥。
 88 | 
 89 | 尤其是对于 LLM Agent，我们更需要特别关注其 Token 的使用量。对于 @lst-la_struct_agent_n 所示的 Agent，即便其只包含 @lst-lc_agent_tools_si 和 @lst-lc_agent_tools_mi 2 个工具，即便我们向 Agent 提问的输入 Token 看起来不多，但是实际上却可能产生非常多的输入 Token。
 90 | 
 91 | ![Agent 与 LLM 多次交互以及其复杂的提示词模版带来的 Token 暴增](./images/agent_tokens_demo.jpg){#fig-agent_tokens_demo}
 92 | 
 93 | [^1]: 在分词领域有时也被称为凝固度、内聚度，可以反映一个词内部的两个部分结合的紧密程度。
 94 | 
 95 | [^2]: OOV（Out-of-Vocabulary）：词粒度分词模型只能使用词表中的词来进行处理，无法处理词表之外的词汇，这就是所谓的 OOV 问题。
 96 | 
 97 | [^3]: [OpenAI 的模型列表](https://platform.openai.com/docs/models)
 98 | 
 99 | [^4]: [OpenAI 计费说明](https://openai.com/pricing)
100 | 
101 | [^5]: [文心大模型计费说明](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Blfmc9dlf)
102 | 


--------------------------------------------------------------------------------