├── phase2_practical
    ├── 13_rag_basics
    │   ├── data
    │   │   ├── test.txt
    │   │   └── langchain_intro.txt
    │   ├── test.py
    │   └── demo.py
    ├── 14_rag_advanced
    │   ├── data
    │   │   ├── test_docs.txt
    │   │   └── langchain_guide.txt
    │   ├── 模块完成总结.md
    │   └── test.py
    ├── 11_structured_output
    │   ├── test.py
    │   └── README.md
    ├── 09_checkpointing
    │   ├── 语法修正说明.md
    │   ├── test.py
    │   ├── view_db.py
    │   ├── demo_context_problem.py
    │   └── README.md
    ├── 07_memory_basics
    │   ├── test.py
    │   ├── README.md
    │   └── main.py
    ├── 10_middleware_basics
    │   ├── test.py
    │   └── README.md
    ├── 08_context_management
    │   └── README.md
    └── 12_validation_retry
    │   ├── test_fix.py
    │   └── test.py
├── phase1_fundamentals
    ├── 04_custom_tools
    │   ├── tools
    │   │   ├── weather.py
    │   │   ├── calculator.py
    │   │   └── web_search.py
    │   ├── README.md
    │   └── main.py
    ├── 05_simple_agent
    │   ├── test_simple.py
    │   ├── README.md
    │   └── main.py
    ├── 02_prompt_templates
    │   ├── examples
    │   │   ├── README.md
    │   │   └── template_library.py
    │   └── 模块完成总结.md
    ├── 06_agent_loop
    │   ├── test.py
    │   └── README.md
    ├── 03_messages
    │   ├── test.py
    │   ├── README.md
    │   └── main.py
    └── README.md
├── README.md
├── docs
    ├── SIMPLIFIED_LEARNING_PATH.md
    ├── temp.md
    └── FREE_API_GUIDE.md
├── .gitignore
├── requirements.txt
└── .env.example


/phase2_practical/13_rag_basics/data/test.txt:
--------------------------------------------------------------------------------
1 | LangChain 是一个强大的 LLM 应用框架。
2 | 
3 | 核心组件包括：
4 | 1. Models - 模型接口
5 | 2. Prompts - 提示词
6 | 3. Chains - 链
7 | 4. Agents - 代理
8 | 
9 | RAG 是 LangChain 的核心应用场景。


--------------------------------------------------------------------------------
/phase2_practical/13_rag_basics/data/langchain_intro.txt:
--------------------------------------------------------------------------------
1 | LangChain 是一个用于构建 LLM 应用的框架。
2 | 
3 | 它提供了以下核心组件：
4 | 1. Models - 语言模型接口
5 | 2. Prompts - 提示词模板
6 | 3. Chains - 链式调用
7 | 4. Agents - 智能代理
8 | 
9 | RAG (Retrieval-Augmented Generation) 是 LangChain 的核心应用场景之一。


--------------------------------------------------------------------------------
/phase2_practical/14_rag_advanced/data/test_docs.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | LangChain 框架核心组件
 3 | 
 4 | Models - 模型接口
 5 | 支持 OpenAI, Anthropic, Groq 等多种模型。
 6 | 版本要求：langchain>=1.0.0
 7 | 
 8 | Prompts - 提示词模板
 9 | 使用 PromptTemplate 和 ChatPromptTemplate。
10 | 
11 | Agents - 智能代理
12 | 使用 create_agent 函数创建代理。
13 | 支持工具调用和 ReAct 模式。
14 | 
15 | RAG 进阶技术
16 | 
17 | 混合检索 (Hybrid Search)
18 | 结合向量搜索和 BM25 关键词搜索。
19 | 
20 | BM25 算法
21 | Best Match 25，基于词频的检索算法。
22 | 是 TF-IDF 的改进版本。
23 | 
24 | EnsembleRetriever
25 | 使用 RRF (Reciprocal Rank Fusion) 算法。
26 | 组合多个检索器的结果。
27 | 
28 | 代码示例
29 | 
30 | @tool
31 | def search_docs(query: str) -> str:
32 |     return "结果"
33 | 
34 | agent = create_agent(model=model, tools=[search_docs])
35 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/04_custom_tools/tools/weather.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 自定义工具：天气查询
 3 | ====================
 4 | 
 5 | 使用 @tool 装饰器创建工具（LangChain 1.0 推荐方式）
 6 | """
 7 | 
 8 | from langchain_core.tools import tool
 9 | 
10 | 
11 | @tool
12 | def get_weather(city: str) -> str:
13 |     """
14 |     获取指定城市的天气信息
15 | 
16 |     参数:
17 |         city: 城市名称，如"北京"、"上海"
18 | 
19 |     返回:
20 |         天气信息字符串
21 |     """
22 |     # 模拟天气数据（实际应用中应调用真实API）
23 |     weather_data = {
24 |         "北京": "晴天，温度 15°C，空气质量良好",
25 |         "上海": "多云，温度 18°C，有轻微雾霾",
26 |         "深圳": "阴天，温度 22°C，可能有小雨",
27 |         "成都": "小雨，温度 12°C，湿度较高"
28 |     }
29 | 
30 |     return weather_data.get(city, f"抱歉，暂时没有{city}的天气数据")
31 | 
32 | 
33 | # 测试工具
34 | if __name__ == "__main__":
35 |     print("测试天气工具：")
36 |     print(f"北京天气: {get_weather.invoke({'city': '北京'})}")
37 |     print(f"上海天气: {get_weather.invoke({'city': '上海'})}")
38 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/04_custom_tools/tools/calculator.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 自定义工具：计算器
 3 | ==================
 4 | 
 5 | 演示带多个参数的工具
 6 | """
 7 | 
 8 | from langchain_core.tools import tool
 9 | 
10 | 
11 | @tool
12 | def calculator(operation: str, a: float, b: float) -> str:
13 |     """
14 |     执行基本的数学计算
15 | 
16 |     参数:
17 |         operation: 运算类型，支持 "add"(加), "subtract"(减), "multiply"(乘), "divide"(除)
18 |         a: 第一个数字
19 |         b: 第二个数字
20 | 
21 |     返回:
22 |         计算结果字符串
23 |     """
24 |     operations = {
25 |         "add": lambda x, y: x + y,
26 |         "subtract": lambda x, y: x - y,
27 |         "multiply": lambda x, y: x * y,
28 |         "divide": lambda x, y: x / y if y != 0 else "错误：除数不能为零"
29 |     }
30 | 
31 |     if operation not in operations:
32 |         return f"不支持的运算类型：{operation}。支持的类型：add, subtract, multiply, divide"
33 | 
34 |     try:
35 |         result = operations[operation](a, b)
36 |         return f"{a} {operation} {b} = {result}"
37 |     except Exception as e:
38 |         return f"计算错误：{e}"
39 | 
40 | 
41 | # 测试工具
42 | if __name__ == "__main__":
43 |     print("测试计算器工具：")
44 |     print(calculator.invoke({"operation": "add", "a": 10, "b": 5}))
45 |     print(calculator.invoke({"operation": "multiply", "a": 7, "b": 8}))
46 |     print(calculator.invoke({"operation": "divide", "a": 20, "b": 4}))
47 | 


--------------------------------------------------------------------------------
/phase2_practical/11_structured_output/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证结构化输出功能
 3 | """
 4 | 
 5 | import os
 6 | from dotenv import load_dotenv
 7 | from langchain.chat_models import init_chat_model
 8 | from pydantic import BaseModel, Field
 9 | 
10 | load_dotenv()
11 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
12 | 
13 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
14 |     raise ValueError("请先设置 GROQ_API_KEY")
15 | 
16 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
17 | 
18 | print("=" * 70)
19 | print("测试：结构化输出 - Pydantic 模型")
20 | print("=" * 70)
21 | 
22 | 
23 | class Person(BaseModel):
24 |     """人物信息"""
25 |     name: str = Field(description="姓名")
26 |     age: int = Field(description="年龄")
27 |     occupation: str = Field(description="职业")
28 | 
29 | 
30 | # 创建结构化输出的 LLM
31 | structured_llm = model.with_structured_output(Person)
32 | 
33 | print("\n提示: 张三是一名 30 岁的软件工程师")
34 | result = structured_llm.invoke("张三是一名 30 岁的软件工程师")
35 | 
36 | print(f"\n返回类型: {type(result)}")
37 | print(f"姓名: {result.name}")
38 | print(f"年龄: {result.age}")
39 | print(f"职业: {result.occupation}")
40 | 
41 | print("\n" + "=" * 70)
42 | print("测试结果：")
43 | print("  - with_structured_output() 返回 Pydantic 对象 [成功]")
44 | print("  - 自动类型验证 [成功]")
45 | print("  - 无需手动解析 JSON [成功]")
46 | print("=" * 70)
47 | 
48 | print("\n测试完成！")
49 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/05_simple_agent/test_simple.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证 Agent 可以正常工作
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | 
 8 | # 添加工具目录到路径
 9 | parent_dir = os.path.dirname(os.path.dirname(__file__))
10 | sys.path.insert(0, os.path.join(parent_dir, '04_custom_tools', 'tools'))
11 | 
12 | from dotenv import load_dotenv
13 | from langchain.chat_models import init_chat_model
14 | from langchain.agents import create_agent
15 | from weather import get_weather
16 | from calculator import calculator
17 | 
18 | load_dotenv()
19 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20 | 
21 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
22 |     raise ValueError("请先设置 GROQ_API_KEY")
23 | 
24 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
25 | 
26 | print("创建 Agent...")
27 | agent = create_agent(
28 |     model=model,
29 |     tools=[get_weather, calculator],
30 |     system_prompt="你是一个helpful assistant。"
31 | )
32 | 
33 | print("\n测试1：天气查询")
34 | response1 = agent.invoke({
35 |     "messages": [{"role": "user", "content": "北京天气如何？"}]
36 | })
37 | print(f"回答：{response1['messages'][-1].content}\n")
38 | 
39 | print("测试2：计算")
40 | response2 = agent.invoke({
41 |     "messages": [{"role": "user", "content": "10 加 20 等于多少？"}]
42 | })
43 | print(f"回答：{response2['messages'][-1].content}\n")
44 | 
45 | print("测试成功！Agent 可以正常调用工具。")
46 | 


--------------------------------------------------------------------------------
/phase2_practical/09_checkpointing/语法修正说明.md:
--------------------------------------------------------------------------------
 1 | # Module 09 - Checkpointing 修正说明
 2 | 
 3 | ## 问题发现
 4 | 
 5 | **错误用法**（会导致 `unable to open database file` 错误）：
 6 | ```python
 7 | SqliteSaver.from_conn_string("sqlite:///checkpoints.sqlite")
 8 | ```
 9 | 
10 | **正确用法**：
11 | ```python
12 | SqliteSaver.from_conn_string("checkpoints.sqlite")  # 直接传文件名
13 | ```
14 | 
15 | ## 正确的语法（2025 最新）
16 | 
17 | ### 1. 相对路径
18 | ```python
19 | with SqliteSaver.from_conn_string("checkpoints.sqlite") as checkpointer:
20 |     agent = create_agent(model=model, checkpointer=checkpointer)
21 | ```
22 | 
23 | ### 2. 绝对路径（Windows）
24 | ```python
25 | with SqliteSaver.from_conn_string("C:/data/checkpoints.sqlite") as checkpointer:
26 |     agent = create_agent(model=model, checkpointer=checkpointer)
27 | ```
28 | 
29 | ### 3. 内存数据库
30 | ```python
31 | with SqliteSaver.from_conn_string(":memory:") as checkpointer:
32 |     agent = create_agent(model=model, checkpointer=checkpointer)
33 | ```
34 | 
35 | ## 关键点
36 | 
37 | 1. ✅ **必须使用 `with` 语句**：`SqliteSaver.from_conn_string()` 返回上下文管理器
38 | 2. ✅ **不要加 `sqlite:///` 前缀**：直接传文件路径
39 | 3. ✅ **相对路径更简单**：在当前目录创建数据库文件
40 | 4. ✅ **程序退出会自动关闭连接**：`with` 语句确保资源释放
41 | 
42 | ## 已修正的文件
43 | 
44 | - ✅ `test.py` - 验证通过，成功持久化
45 | - ✅ `main.py` - 所有示例已更新
46 | - ⏳ `README.md` - 需要更新文档
47 | 
48 | ## 安装依赖
49 | 
50 | ```bash
51 | pip install langgraph-checkpoint-sqlite>=3.0.0
52 | ```
53 | 
54 | 已添加到 `requirements.txt`
55 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/02_prompt_templates/examples/README.md:
--------------------------------------------------------------------------------
 1 | # 提示词模板示例库
 2 | 
 3 | 这个目录包含各种实用的提示词模板示例。
 4 | 
 5 | ## 文件说明
 6 | 
 7 | ### template_library.py
 8 | 
 9 | 包含完整的可复用模板库，分类如下：
10 | 
11 | - **翻译类模板**
12 |   - TRANSLATOR - 专业翻译
13 | 
14 | - **编程类模板**
15 |   - CODE_GENERATOR - 代码生成
16 |   - CODE_REVIEWER - 代码审查
17 |   - CODE_EXPLAINER - 代码解释
18 |   - DEBUG_HELPER - 调试助手
19 | 
20 | - **内容创作类模板**
21 |   - SUMMARIZER - 内容摘要
22 |   - ARTICLE_WRITER - 文章写作
23 |   - EMAIL_WRITER - 邮件撰写
24 | 
25 | - **教育类模板**
26 |   - TUTOR - 教学辅导
27 |   - QUIZ_GENERATOR - 测验生成
28 | 
29 | - **商务类模板**
30 |   - PRODUCT_DESCRIPTION - 产品描述
31 |   - MARKET_ANALYSIS - 市场分析
32 | 
33 | - **客户服务类模板**
34 |   - CUSTOMER_SUPPORT - 客户服务
35 |   - FAQ_RESPONDER - FAQ回答
36 | 
37 | - **数据分析类模板**
38 |   - DATA_ANALYZER - 数据分析
39 |   - REPORT_GENERATOR - 报告生成
40 | 
41 | ## 使用方法
42 | 
43 | ```python
44 | from examples.template_library import TemplateLibrary
45 | 
46 | # 使用翻译模板
47 | messages = TemplateLibrary.TRANSLATOR.format_messages(
48 |     source_lang="英语",
49 |     target_lang="中文",
50 |     text="Hello World"
51 | )
52 | 
53 | response = model.invoke(messages)
54 | print(response.content)
55 | ```
56 | 
57 | ## 测试模板库
58 | 
59 | ```bash
60 | python examples/template_library.py
61 | ```
62 | 
63 | ## 自定义模板
64 | 
65 | 你可以基于这些模板创建自己的变体：
66 | 
67 | ```python
68 | # 创建英译中专用模板
69 | from examples.template_library import TemplateLibrary
70 | 
71 | en_to_zh = TemplateLibrary.TRANSLATOR.partial(
72 |     source_lang="英语",
73 |     target_lang="中文"
74 | )
75 | 
76 | # 使用
77 | messages = en_to_zh.format_messages(text="Hello")
78 | ```
79 | 


--------------------------------------------------------------------------------
/phase2_practical/07_memory_basics/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证内存功能
 3 | """
 4 | 
 5 | import os
 6 | from dotenv import load_dotenv
 7 | from langchain.chat_models import init_chat_model
 8 | from langchain.agents import create_agent
 9 | from langgraph.checkpoint.memory import InMemorySaver
10 | 
11 | load_dotenv()
12 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13 | 
14 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
15 |     raise ValueError("请先设置 GROQ_API_KEY")
16 | 
17 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
18 | 
19 | print("=" * 70)
20 | print("测试：InMemorySaver 内存功能")
21 | print("=" * 70)
22 | 
23 | # 创建带内存的 Agent
24 | agent = create_agent(
25 |     model=model,
26 |     tools=[],
27 |     checkpointer=InMemorySaver()
28 | )
29 | 
30 | config = {"configurable": {"thread_id": "test_session"}}
31 | 
32 | print("\n第一轮对话：")
33 | print("用户: 我叫张三")
34 | response1 = agent.invoke(
35 |     {"messages": [{"role": "user", "content": "我叫张三"}]},
36 |     config=config
37 | )
38 | print(f"Agent: {response1['messages'][-1].content}")
39 | 
40 | print("\n第二轮对话：")
41 | print("用户: 我叫什么？")
42 | response2 = agent.invoke(
43 |     {"messages": [{"role": "user", "content": "我叫什么？"}]},
44 |     config=config
45 | )
46 | print(f"Agent: {response2['messages'][-1].content}")
47 | 
48 | print("\n" + "=" * 70)
49 | print("内存状态：")
50 | print(f"  总消息数: {len(response2['messages'])}")
51 | print(f"  thread_id: {config['configurable']['thread_id']}")
52 | print("=" * 70)
53 | 
54 | if "张三" in response2['messages'][-1].content:
55 |     print("\n测试成功！Agent 记住了名字。")
56 | else:
57 |     print("\n警告：Agent 可能没有正确记住")
58 | 
59 | print("\n测试完成！")
60 | 


--------------------------------------------------------------------------------
/phase2_practical/14_rag_advanced/data/langchain_guide.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # LangChain 框架详解
 3 | 
 4 | ## 核心组件
 5 | 
 6 | LangChain 提供以下核心组件：
 7 | 
 8 | 1. Models (模型接口)
 9 |    - 支持 OpenAI GPT-4, GPT-3.5
10 |    - 支持 Anthropic Claude
11 |    - 支持 Groq Llama 模型
12 |    - 版本号：langchain>=1.0.0
13 | 
14 | 2. Prompts (提示词模板)
15 |    - PromptTemplate 类
16 |    - ChatPromptTemplate 类
17 |    - 支持变量插值
18 | 
19 | 3. Chains (链式调用)
20 |    - 已在 1.0 中废弃
21 |    - 建议使用 LCEL (LangChain Expression Language)
22 | 
23 | 4. Agents (智能代理)
24 |    - create_agent 函数
25 |    - 工具调用机制
26 |    - ReAct 模式
27 | 
28 | 5. Memory (记忆管理)
29 |    - InMemorySaver 类
30 |    - SQLite checkpointer
31 |    - 对话历史管理
32 | 
33 | ## RAG 技术栈
34 | 
35 | ### 基础 RAG
36 | - 文档加载：TextLoader, PyPDFLoader
37 | - 文本分割：RecursiveCharacterTextSplitter
38 | - 向量嵌入：HuggingFaceEmbeddings
39 | - 向量存储：Pinecone, Chroma, FAISS
40 | 
41 | ### 进阶 RAG
42 | - 混合搜索：BM25 + 向量搜索
43 | - EnsembleRetriever：组合多个检索器
44 | - 重排序：Reranking 模型
45 | - 查询优化：Query rewriting
46 | 
47 | ## 代码示例
48 | 
49 | ```python
50 | from langchain.agents import create_agent
51 | from langchain_core.tools import tool
52 | 
53 | @tool
54 | def search_docs(query: str) -> str:
55 |     """搜索文档"""
56 |     return "结果"
57 | 
58 | agent = create_agent(
59 |     model=model,
60 |     tools=[search_docs],
61 |     system_prompt="你是助手"
62 | )
63 | ```
64 | 
65 | ## 性能优化
66 | 
67 | 1. Chunk 大小：建议 500-1000 字符
68 | 2. Chunk 重叠：10-20%
69 | 3. 检索数量：k=3-5
70 | 4. 混合搜索权重：向量 0.6, BM25 0.4
71 | 
72 | ## 常见问题
73 | 
74 | Q: LangChain 1.0 有什么新特性？
75 | A: 更简洁的 API，内置 LangGraph，改进的中间件系统
76 | 
77 | Q: 如何选择向量数据库？
78 | A: Pinecone 适合生产，Chroma 适合开发，FAISS 适合离线
79 | 
80 | Q: BM25 是什么？
81 | A: Best Match 25，一种基于词频的检索算法，适合精确匹配
82 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/04_custom_tools/tools/web_search.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 自定义工具：网页搜索（模拟）
 3 | ============================
 4 | 
 5 | 演示可选参数的工具
 6 | """
 7 | 
 8 | from langchain_core.tools import tool
 9 | from typing import Optional
10 | 
11 | 
12 | @tool
13 | def web_search(query: str, num_results: Optional[int] = 3) -> str:
14 |     """
15 |     在网上搜索信息（模拟）
16 | 
17 |     参数:
18 |         query: 搜索关键词
19 |         num_results: 返回结果数量，默认3条
20 | 
21 |     返回:
22 |         搜索结果字符串
23 |     """
24 |     # 模拟搜索结果
25 |     mock_results = {
26 |         "Python": [
27 |             "Python官方网站 - https://www.python.org",
28 |             "Python教程 - 菜鸟教程",
29 |             "Python最佳实践 - Real Python"
30 |         ],
31 |         "机器学习": [
32 |             "机器学习入门 - Coursera",
33 |             "Scikit-learn文档",
34 |             "机器学习实战 - GitHub"
35 |         ],
36 |         "LangChain": [
37 |             "LangChain官方文档",
38 |             "LangChain GitHub仓库",
39 |             "LangChain教程 - YouTube"
40 |         ]
41 |     }
42 | 
43 |     # 查找结果
44 |     results = []
45 |     for key in mock_results:
46 |         if key.lower() in query.lower():
47 |             results = mock_results[key][:num_results]
48 |             break
49 | 
50 |     if not results:
51 |         return f"未找到关于'{query}'的结果"
52 | 
53 |     # 格式化输出
54 |     output = f"搜索 '{query}' 找到 {len(results)} 条结果：\n"
55 |     for i, result in enumerate(results, 1):
56 |         output += f"{i}. {result}\n"
57 | 
58 |     return output.strip()
59 | 
60 | 
61 | # 测试工具
62 | if __name__ == "__main__":
63 |     print("测试搜索工具：")
64 |     print(web_search.invoke({"query": "Python"}))
65 |     print("\n" + web_search.invoke({"query": "LangChain", "num_results": 2}))
66 | 


--------------------------------------------------------------------------------
/phase2_practical/10_middleware_basics/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证中间件功能
 3 | """
 4 | 
 5 | import os
 6 | from dotenv import load_dotenv
 7 | from langchain.chat_models import init_chat_model
 8 | from langchain.agents import create_agent
 9 | from langchain.agents.middleware import AgentMiddleware
10 | 
11 | load_dotenv()
12 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13 | 
14 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
15 |     raise ValueError("请先设置 GROQ_API_KEY")
16 | 
17 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
18 | 
19 | print("=" * 70)
20 | print("测试：中间件 before_model 和 after_model")
21 | print("=" * 70)
22 | 
23 | 
24 | class TestMiddleware(AgentMiddleware):
25 |     """测试中间件"""
26 | 
27 |     def before_model(self, state, runtime):
28 |         print("\n[测试] before_model 执行")
29 |         print(f"[测试] 当前消息数: {len(state.get('messages', []))}")
30 |         return None
31 | 
32 |     def after_model(self, state, runtime):
33 |         print("[测试] after_model 执行")
34 |         last_msg = state.get('messages', [])[-1]
35 |         print(f"[测试] 响应类型: {last_msg.__class__.__name__}")
36 |         return None
37 | 
38 | 
39 | # 创建带中间件的 Agent
40 | agent = create_agent(
41 |     model=model,
42 |     tools=[],
43 |     middleware=[TestMiddleware()]
44 | )
45 | 
46 | print("\n执行测试调用...")
47 | print("用户: 你好")
48 | 
49 | response = agent.invoke({"messages": [{"role": "user", "content": "你好"}]})
50 | 
51 | print(f"\nAgent: {response['messages'][-1].content}")
52 | 
53 | print("\n" + "=" * 70)
54 | print("测试结果：")
55 | print("  - before_model 在模型调用前执行 [成功]")
56 | print("  - after_model 在模型响应后执行 [成功]")
57 | print("=" * 70)
58 | 
59 | print("\n测试完成！")
60 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/06_agent_loop/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证 Agent 执行循环
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | 
 8 | # 添加工具目录到路径
 9 | parent_dir = os.path.dirname(os.path.dirname(__file__))
10 | sys.path.insert(0, os.path.join(parent_dir, '04_custom_tools', 'tools'))
11 | 
12 | from dotenv import load_dotenv
13 | from langchain.chat_models import init_chat_model
14 | from langchain.agents import create_agent
15 | from calculator import calculator
16 | 
17 | load_dotenv()
18 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
19 | 
20 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
21 |     raise ValueError("请先设置 GROQ_API_KEY")
22 | 
23 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
24 | 
25 | print("=" * 70)
26 | print("测试：Agent 执行循环")
27 | print("=" * 70)
28 | 
29 | agent = create_agent(model=model, tools=[calculator])
30 | 
31 | print("\n问题：10 加 20 等于多少？")
32 | response = agent.invoke({
33 |     "messages": [{"role": "user", "content": "10 加 20 等于多��？"}]
34 | })
35 | 
36 | print("\n完整消息历史：")
37 | for i, msg in enumerate(response['messages'], 1):
38 |     msg_type = msg.__class__.__name__
39 |     print(f"\n消息 {i}: {msg_type}")
40 | 
41 |     if hasattr(msg, 'content') and msg.content:
42 |         print(f"  内容: {msg.content}")
43 | 
44 |     if hasattr(msg, 'tool_calls') and msg.tool_calls:
45 |         print(f"  工具调用: {msg.tool_calls[0]['name']}")
46 | 
47 | print("\n" + "=" * 70)
48 | print("最终答案:", response['messages'][-1].content)
49 | print("=" * 70)
50 | 
51 | # 测试流式输出
52 | print("\n测试流式输出：")
53 | print("问题：5 乘以 6")
54 | print("-" * 70)
55 | 
56 | for chunk in agent.stream({
57 |     "messages": [{"role": "user", "content": "5 乘以 6"}]
58 | }):
59 |     if 'messages' in chunk:
60 |         latest = chunk['messages'][-1]
61 |         if hasattr(latest, 'content') and latest.content:
62 |             if not hasattr(latest, 'tool_calls') or not latest.tool_calls:
63 |                 print(f"最终答案: {latest.content}")
64 | 
65 | print("\n测试成功！")
66 | 


--------------------------------------------------------------------------------
/phase2_practical/09_checkpointing/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 简单测试：验证 SQLite 持久化功能
 3 | """
 4 | 
 5 | import os
 6 | from dotenv import load_dotenv
 7 | from langchain.chat_models import init_chat_model
 8 | from langchain.agents import create_agent
 9 | from langgraph.checkpoint.sqlite import SqliteSaver
10 | 
11 | load_dotenv()
12 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13 | 
14 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
15 |     raise ValueError("请先设置 GROQ_API_KEY")
16 | 
17 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
18 | 
19 | print("=" * 70)
20 | print("测试：SqliteSaver 持久化功能")
21 | print("=" * 70)
22 | 
23 | # 创建持久化 checkpointer（直接使用文件名，无需 sqlite:/// 前缀）
24 | db_path = "test_checkpoints.sqlite"
25 | 
26 | # 使用 with 语句正确管理 SqliteSaver
27 | with SqliteSaver.from_conn_string(db_path) as checkpointer:  # 直接传文件名
28 |     # 创建 Agent
29 |     agent = create_agent(
30 |         model=model,
31 |         tools=[],
32 |         checkpointer=checkpointer
33 |     )
34 | 
35 |     config = {"configurable": {"thread_id": "test_persistence"}}
36 | 
37 |     print("\n第一轮对话：")
38 |     print("用户: 我叫王五")
39 |     response1 = agent.invoke(
40 |         {"messages": [{"role": "user", "content": "我叫王五"}]},
41 |         config=config
42 |     )
43 |     print(f"Agent: {response1['messages'][-1].content}")
44 | 
45 | print("\n第二轮对话（模拟重启）：")
46 | print("[创建新的 agent 实例...]")
47 | 
48 | # 模拟重启：创建新的 checkpointer 和 agent
49 | with SqliteSaver.from_conn_string(db_path) as checkpointer_new:  # 直接传文件名
50 |     agent_new = create_agent(
51 |         model=model,
52 |         tools=[],
53 |         checkpointer=checkpointer_new
54 |     )
55 | 
56 |     print("用户: 我叫什么？")
57 |     response2 = agent_new.invoke(
58 |         {"messages": [{"role": "user", "content": "我叫什么？"}]},
59 |         config=config
60 |     )
61 |     print(f"Agent: {response2['messages'][-1].content}")
62 | 
63 |     print("\n" + "=" * 70)
64 |     print("持久化状态：")
65 |     print(f"  数据库文件: {db_path}")
66 |     print(f"  thread_id: {config['configurable']['thread_id']}")
67 |     print(f"  总消息数: {len(response2['messages'])}")
68 |     print("=" * 70)
69 | 
70 |     if "王五" in response2['messages'][-1].content:
71 |         print("\n[成功] 测试成功！Agent 记住了名字（持久化有效）。")
72 |     else:
73 |         print("\n[警告] Agent 可能没有正确记住")
74 | 
75 | print("\n测试完成！")
76 | 


--------------------------------------------------------------------------------
/phase2_practical/09_checkpointing/view_db.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 查看 SQLite 数据库内容的简单脚本
 3 | """
 4 | 
 5 | import sqlite3
 6 | import os
 7 | 
 8 | def view_database(db_path):
 9 |     """查看 SQLite 数据库的表和数据"""
10 |     if not os.path.exists(db_path):
11 |         print(f"❌ 数据库文件不存在：{db_path}")
12 |         return
13 | 
14 |     print(f"\n{'='*70}")
15 |     print(f"查看数据库：{os.path.basename(db_path)}")
16 |     print(f"{'='*70}")
17 | 
18 |     conn = sqlite3.connect(db_path)
19 |     cursor = conn.cursor()
20 | 
21 |     # 查看所有表
22 |     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
23 |     tables = cursor.fetchall()
24 | 
25 |     print(f"\n📋 数据库中的表：")
26 |     for table in tables:
27 |         print(f"  - {table[0]}")
28 | 
29 |     # 查看每个表的数据
30 |     for table in tables:
31 |         table_name = table[0]
32 |         print(f"\n📊 表 '{table_name}' 的内容：")
33 | 
34 |         try:
35 |             cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
36 |             count = cursor.fetchone()[0]
37 |             print(f"  记录数：{count}")
38 | 
39 |             # 显示前5条记录
40 |             cursor.execute(f"SELECT * FROM {table_name} LIMIT 5")
41 |             rows = cursor.fetchall()
42 | 
43 |             if rows:
44 |                 # 获取列名
45 |                 cursor.execute(f"PRAGMA table_info({table_name})")
46 |                 columns = [col[1] for col in cursor.fetchall()]
47 |                 print(f"  列：{', '.join(columns)}")
48 | 
49 |                 print("\n  前5条记录：")
50 |                 for i, row in enumerate(rows, 1):
51 |                     print(f"    [{i}] {row[:3]}...")  # 只显示前3个字段
52 |             else:
53 |                 print("  （空表）")
54 | 
55 |         except sqlite3.Error as e:
56 |             print(f"  ❌ 错误：{e}")
57 | 
58 |     conn.close()
59 | 
60 | 
61 | def main():
62 |     """主函数"""
63 |     base_dir = "C:/Users/wangy/Desktop/temp/langchain_v1_study/phase2_practical/09_checkpointing"
64 | 
65 |     db_files = [
66 |         "checkpoints.sqlite",
67 |         "multi_user.sqlite",
68 |         "tools.sqlite",
69 |         "customer_service.sqlite"
70 |     ]
71 | 
72 |     print("\n" + "="*70)
73 |     print(" SQLite 数据库查看工具")
74 |     print("="*70)
75 | 
76 |     for db_file in db_files:
77 |         db_path = os.path.join(base_dir, db_file)
78 |         view_database(db_path)
79 | 
80 |     print("\n" + "="*70)
81 |     print(" 完成！")
82 |     print("="*70)
83 |     print("\n💡 提示：")
84 |     print("  - 如果显示'数据库文件不存在'，请先运行 main.py")
85 |     print("  - 可以使用在线工具查看完整内容：https://sqliteviewer.app/")
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/03_messages/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 测试脚本 - 验证对话历史管理
  3 | ==============================
  4 | """
  5 | 
  6 | import os
  7 | from dotenv import load_dotenv
  8 | from langchain.chat_models import init_chat_model
  9 | 
 10 | load_dotenv()
 11 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 12 | 
 13 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 14 |     print("请先设置 GROQ_API_KEY")
 15 |     exit(1)
 16 | 
 17 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 18 | 
 19 | 
 20 | def test_conversation_memory():
 21 |     """测试 AI 是否记住对话"""
 22 |     print("\n测试：AI 对话记忆")
 23 |     print("="*50)
 24 | 
 25 |     conversation = [
 26 |         {"role": "system", "content": "你是助手"}
 27 |     ]
 28 | 
 29 |     # 告诉 AI 用户名字
 30 |     conversation.append({"role": "user", "content": "我叫李明"})
 31 |     r1 = model.invoke(conversation)
 32 |     conversation.append({"role": "assistant", "content": r1.content})
 33 |     print(f"用户: 我叫李明")
 34 |     print(f"AI: {r1.content[:50]}...")
 35 | 
 36 |     # 测试记忆
 37 |     conversation.append({"role": "user", "content": "我叫什么名字？"})
 38 |     r2 = model.invoke(conversation)
 39 |     print(f"\n用户: 我叫什么名字？")
 40 |     print(f"AI: {r2.content}")
 41 | 
 42 |     # 验证
 43 |     if "李明" in r2.content:
 44 |         print("\n✅ 测试通过：AI 记住了用户名字")
 45 |         return True
 46 |     else:
 47 |         print("\n❌ 测试失败：AI 忘记了用户名字")
 48 |         return False
 49 | 
 50 | 
 51 | def test_optimize_history():
 52 |     """测试历史优化函数"""
 53 |     print("\n\n测试：历史优化")
 54 |     print("="*50)
 55 | 
 56 |     def keep_recent_messages(messages, max_pairs=3):
 57 |         system_msgs = [m for m in messages if m.get("role") == "system"]
 58 |         conversation = [m for m in messages if m.get("role") != "system"]
 59 |         recent = conversation[-(max_pairs * 2):]
 60 |         return system_msgs + recent
 61 | 
 62 |     # 创建长历史
 63 |     long_conversation = [
 64 |         {"role": "system", "content": "你是助手"},
 65 |         {"role": "user", "content": "问题1"},
 66 |         {"role": "assistant", "content": "回答1"},
 67 |         {"role": "user", "content": "问题2"},
 68 |         {"role": "assistant", "content": "回答2"},
 69 |         {"role": "user", "content": "问题3"},
 70 |         {"role": "assistant", "content": "回答3"},
 71 |         {"role": "user", "content": "问题4"},
 72 |         {"role": "assistant", "content": "回答4"},
 73 |     ]
 74 | 
 75 |     print(f"原始消息数: {len(long_conversation)}")
 76 | 
 77 |     # 优化
 78 |     optimized = keep_recent_messages(long_conversation, max_pairs=2)
 79 |     print(f"优化后消息数: {len(optimized)}")
 80 | 
 81 |     # 验证
 82 |     expected = 1 + (2 * 2)  # system + 2轮对话
 83 |     if len(optimized) == expected:
 84 |         print(f"✅ 测试通过：保留了 system + 最近2轮")
 85 |         return True
 86 |     else:
 87 |         print(f"❌ 测试失败：期望 {expected} 条，实际 {len(optimized)} 条")
 88 |         return False
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     print("\n" + "="*50)
 93 |     print(" 运行测试")
 94 |     print("="*50)
 95 | 
 96 |     results = []
 97 |     results.append(test_conversation_memory())
 98 |     results.append(test_optimize_history())
 99 | 
100 |     print("\n" + "="*50)
101 |     print(" 测试结果")
102 |     print("="*50)
103 |     print(f"通过: {sum(results)}/{len(results)}")
104 | 
105 |     if all(results):
106 |         print("✅ 所有测试通过！")
107 |     else:
108 |         print("❌ 部分测试失败")
109 | 


--------------------------------------------------------------------------------
/phase2_practical/09_checkpointing/demo_context_problem.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 演示：对话历史过长的问题
  3 | """
  4 | 
  5 | import os
  6 | from dotenv import load_dotenv
  7 | from langchain.chat_models import init_chat_model
  8 | from langchain.agents import create_agent
  9 | from langgraph.checkpoint.sqlite import SqliteSaver
 10 | 
 11 | load_dotenv()
 12 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 13 | 
 14 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 15 |     raise ValueError("请先设置 GROQ_API_KEY")
 16 | 
 17 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 18 | 
 19 | 
 20 | def demo_long_conversation():
 21 |     """
 22 |     演示：对话历史过长的问题
 23 |     """
 24 |     print("\n" + "="*70)
 25 |     print(" 演示：对话历史过长的性能问题")
 26 |     print("="*70)
 27 | 
 28 |     db_path = "long_conversation.sqlite"
 29 | 
 30 |     with SqliteSaver.from_conn_string(f"sqlite:///{db_path}") as checkpointer:
 31 |         agent = create_agent(
 32 |             model=model,
 33 |             tools=[],
 34 |             checkpointer=checkpointer
 35 |         )
 36 | 
 37 |         config = {"configurable": {"thread_id": "test_user"}}
 38 | 
 39 |         # 模拟 50 轮对话
 40 |         print("\n[模拟 50 轮对话...]")
 41 |         for i in range(1, 51):
 42 |             agent.invoke(
 43 |                 {"messages": [{"role": "user", "content": f"这是第 {i} 条消息"}]},
 44 |                 config=config
 45 |             )
 46 |             if i % 10 == 0:
 47 |                 print(f"  已完成 {i} 轮...")
 48 | 
 49 |         print("\n[尝试获取状态，查看加载的消息数量...]")
 50 | 
 51 |         # 获取当前状态
 52 |         state = checkpointer.get(config)
 53 |         if state and state.values:
 54 |             messages = state.values.get("messages", [])
 55 |             print(f"\n⚠️ 当前加载的消息数量：{len(messages)}")
 56 |             print(f"⚠️ 这意味着每次 invoke 都会加载这么多消息！")
 57 | 
 58 |             # 计算大致的 Token 数（简化估算）
 59 |             total_chars = sum(len(str(msg)) for msg in messages)
 60 |             estimated_tokens = total_chars // 4  # 粗略估算
 61 |             print(f"⚠️ 估算 Token 数：~{estimated_tokens}")
 62 | 
 63 |             print("\n问题：")
 64 |             print("  1. 随着对话增长，每次加载的数据越来越多")
 65 |             print("  2. 超过模型上下文窗口限制会报错")
 66 |             print("  3. 性能下降，响应变慢")
 67 |             print("  4. Token 费用增加")
 68 | 
 69 |     # 清理
 70 |     if os.path.exists(db_path):
 71 |         os.remove(db_path)
 72 |         print(f"\n[已清理测试数据库]")
 73 | 
 74 | 
 75 | def show_solutions():
 76 |     """
 77 |     展示解决方案
 78 |     """
 79 |     print("\n" + "="*70)
 80 |     print(" 解决方案")
 81 |     print("="*70)
 82 | 
 83 |     print("""
 84 | LangChain 提供了多种策略来管理上下文：
 85 | 
 86 | 1. 消息修剪（Message Trimming）⭐ 推荐
 87 |    - 只保留最近 N 条消息
 88 |    - 保留系统消息 + 最近对话
 89 | 
 90 | 2. 消息摘要（Summarization）
 91 |    - 定期总结旧消息
 92 |    - 用摘要替换历史
 93 | 
 94 | 3. 滑动窗口（Sliding Window）
 95 |    - 固定窗口大小
 96 |    - 自动丢弃旧消息
 97 | 
 98 | 4. Token 限制
 99 |    - 根据 Token 数量裁剪
100 |    - 适配不同模型的上下文窗口
101 | 
102 | 这些策略在 phase2_practical/08_context_management 模块中详细讲解！
103 |     """)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     try:
108 |         demo_long_conversation()
109 |         show_solutions()
110 | 
111 |         print("\n" + "="*70)
112 |         print(" 下一步")
113 |         print("="*70)
114 |         print("\n查看详细解决方案：")
115 |         print("  cd phase2_practical/08_context_management")
116 |         print("  python main.py")
117 | 
118 |     except Exception as e:
119 |         print(f"\n错误: {e}")
120 |         import traceback
121 |         traceback.print_exc()
122 | 


--------------------------------------------------------------------------------
/phase2_practical/08_context_management/README.md:
--------------------------------------------------------------------------------
  1 | # 08 - Context Management (上下文管理)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **问题**：对话历史会无限增长 → 超 token、成本高、响应慢
  6 | 
  7 | **解决**：使用中间件自动管理上下文长度
  8 | 
  9 | ## SummarizationMiddleware（推荐）
 10 | 
 11 | ### 基本用法
 12 | 
 13 | ```python
 14 | from langchain.agents import create_agent
 15 | from langchain.agents.middleware import SummarizationMiddleware
 16 | from langgraph.checkpoint.memory import InMemorySaver
 17 | 
 18 | agent = create_agent(
 19 |     model=model,
 20 |     tools=[],
 21 |     checkpointer=InMemorySaver(),
 22 |     middleware=[
 23 |         SummarizationMiddleware(
 24 |             model="groq:llama-3.3-70b-versatile",
 25 |             max_tokens_before_summary=500  # 超过 500 tokens 触发摘要
 26 |         )
 27 |     ]
 28 | )
 29 | ```
 30 | 
 31 | ### 工作原理
 32 | 
 33 | ```
 34 | 对话历史: [消息1, 消息2, ..., 消息20]  (超过 500 tokens)
 35 |     ↓
 36 | SummarizationMiddleware 自动触发
 37 |     ↓
 38 | 摘要旧消息: "用户是张三，在北京工作，喜欢编程..."
 39 |     ↓
 40 | 新历史: [摘要, 最近消息]  (减少到 300 tokens)
 41 | ```
 42 | 
 43 | ### 参数说明
 44 | 
 45 | | 参数 | 说明 | 默认值 |
 46 | |-----|------|--------|
 47 | | `model` | 生成摘要的模型（可用便宜模型） | 必需 |
 48 | | `max_tokens_before_summary` | 触发摘要的 token 阈值 | 1000 |
 49 | 
 50 | ## trim_messages（手动修剪）
 51 | 
 52 | ### 基本用法
 53 | 
 54 | ```python
 55 | from langchain_core.messages import trim_messages
 56 | 
 57 | # 只保留最近 N 条消息
 58 | trimmed = trim_messages(
 59 |     messages,
 60 |     max_tokens=100,
 61 |     strategy="last",  # 保留最后的
 62 |     token_counter=len
 63 | )
 64 | ```
 65 | 
 66 | ### 适用场景
 67 | 
 68 | - 只需要最近几轮对话
 69 | - 不需要保留旧信息
 70 | - 简单直接
 71 | 
 72 | ## 策略对比
 73 | 
 74 | | 策略 | 优点 | 缺点 | 适用 |
 75 | |-----|------|------|------|
 76 | | **不处理** | 完整历史 | 超 token | 短对话 |
 77 | | **SummarizationMiddleware** | 自动化、保留信息 | 摘要成本 | 长对话（推荐）|
 78 | | **trim_messages** | 简单、精确 | 丢失旧信息 | 只要最近 N 轮 |
 79 | 
 80 | ## 实际应用
 81 | 
 82 | ### 客服机器人
 83 | 
 84 | ```python
 85 | agent = create_agent(
 86 |     model=model,
 87 |     tools=[查询订单, 查询物流],
 88 |     system_prompt="客服助手",
 89 |     checkpointer=InMemorySaver(),
 90 |     middleware=[
 91 |         SummarizationMiddleware(
 92 |             model="groq:llama-3.3-70b-versatile",
 93 |             max_tokens_before_summary=800  # 适合客服
 94 |         )
 95 |     ]
 96 | )
 97 | ```
 98 | 
 99 | ### 长期对话助手
100 | 
101 | ```python
102 | agent = create_agent(
103 |     model=model,
104 |     tools=[],
105 |     middleware=[
106 |         SummarizationMiddleware(
107 |             model="groq:llama-3.3-70b-versatile",
108 |             max_tokens_before_summary=1000
109 |         )
110 |     ],
111 |     checkpointer=InMemorySaver()
112 | )
113 | ```
114 | 
115 | ## 常见问题
116 | 
117 | ### 1. 摘要会丢失信息吗？
118 | 
119 | 会有一些细节丢失，但：
120 | - 重要信息会保留（姓名、关键事实）
121 | - 最近的消息完整保留
122 | - 对于大部分场景足够
123 | 
124 | ### 2. 如何选择 max_tokens_before_summary？
125 | 
126 | ```python
127 | # 模型上下文窗口 4k → 设置 3000
128 | # 模型上下文窗口 8k → 设置 6000
129 | # 模型上下文窗口 16k → 设置 12000
130 | 
131 | # 留一些余量给工具调用和系统提示
132 | ```
133 | 
134 | ### 3. 摘要成本高吗？
135 | 
136 | - 摘要只在超过阈值时触发
137 | - 可以使用便宜的模型（如 gpt-3.5）
138 | - 相比传输全部历史，通常更便宜
139 | 
140 | ### 4. 能自定义摘要提示词吗？
141 | 
142 | 目前 `SummarizationMiddleware` 使用默认提示词。
143 | 如需自定义，可以实现自己的中间件（Module 10 会学）。
144 | 
145 | ## 最佳实践
146 | 
147 | ```python
148 | # 1. 根据场景选择阈值
149 | agent = create_agent(
150 |     model=model,
151 |     tools=[],
152 |     middleware=[
153 |         SummarizationMiddleware(
154 |             model="groq:llama-3.3-70b-versatile",
155 |             max_tokens_before_summary=500  # 短对话
156 |             # max_tokens_before_summary=2000  # 长对话
157 |         )
158 |     ],
159 |     checkpointer=InMemorySaver()
160 | )
161 | 
162 | # 2. 使用便宜模型摘要（降低成本）
163 | SummarizationMiddleware(
164 |     model="groq:llama-3.3-70b-versatile",  # 用便宜模型摘要
165 |     max_tokens_before_summary=1000
166 | )
167 | 
168 | # 3. 监控摘要触发频率
169 | # 如果频繁触发 → 提高阈值
170 | # 如果从不触发 → 降低阈值
171 | ```
172 | 
173 | ## 核心要点
174 | 
175 | 1. **默认问题**：对话历史无限增长
176 | 2. **推荐方案**：`SummarizationMiddleware`
177 | 3. **配置位置**：`middleware=[]` 参数
178 | 4. **触发条件**：`max_tokens_before_summary`
179 | 5. **自动化**：无需手动管理
180 | 
181 | ## 下一步
182 | 
183 | **09_checkpointing** - 学习如何持久化对话状态（SQLite）
184 | 


--------------------------------------------------------------------------------
/phase2_practical/12_validation_retry/test_fix.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 简单测试：验证修复后的代码（不需要 API 调用）
  3 | """
  4 | 
  5 | from pydantic import BaseModel, Field, field_validator, ValidationError
  6 | 
  7 | print("=" * 70)
  8 | print("测试：验证修复后的 Pydantic 模型")
  9 | print("=" * 70)
 10 | 
 11 | 
 12 | # ============================================================================
 13 | # 测试 Product 模型
 14 | # ============================================================================
 15 | print("\n--- 测试 1: Product 模型 ---")
 16 | 
 17 | class Product(BaseModel):
 18 |     """产品信息（严格验证）"""
 19 |     name: str = Field(description="产品名称（字符串类型）", min_length=2)
 20 |     price: float = Field(description="价格，数字类型（必须 > 0）", gt=0)
 21 |     stock: int = Field(description="库存，整数类型（必须 >= 0）", ge=0)
 22 | 
 23 |     @field_validator('name')
 24 |     @classmethod
 25 |     def validate_name(cls, v):
 26 |         if v.lower() == "unknown":
 27 |             raise ValueError('产品名称不能是 unknown')
 28 |         return v
 29 | 
 30 | print("\n有效产品:")
 31 | try:
 32 |     product = Product(name="iPhone 15", price=5999.0, stock=50)
 33 |     print(f"[OK] {product.name}, {product.price}, {product.stock}")
 34 | except ValidationError as e:
 35 |     print(f"[FAIL] {e}")
 36 | 
 37 | print("\n无效产品（price 为负数）:")
 38 | try:
 39 |     product = Product(name="Product", price=-100.0, stock=50)
 40 |     print(f"[FAIL] {product}")
 41 | except ValidationError as e:
 42 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 43 | 
 44 | 
 45 | # ============================================================================
 46 | # 测试 ExtractedData 模型
 47 | # ============================================================================
 48 | print("\n--- 测试 2: ExtractedData 模型 ---")
 49 | 
 50 | class ExtractedData(BaseModel):
 51 |     """提取的数据（完整验证）"""
 52 |     name: str = Field(description="名称（字符串类型）", min_length=1)
 53 |     value: float = Field(description="数值（数字类型，必须 > 0）", gt=0)
 54 | 
 55 |     @field_validator('name')
 56 |     @classmethod
 57 |     def validate_name(cls, v):
 58 |         if v.strip() == "":
 59 |             raise ValueError('名称不能为空')
 60 |         return v.strip()
 61 | 
 62 | print("\n有效数据:")
 63 | try:
 64 |     data = ExtractedData(name="产品 A", value=999.99)
 65 |     print(f"[OK] {data.name}, {data.value}")
 66 | except ValidationError as e:
 67 |     print(f"[FAIL] {e}")
 68 | 
 69 | print("\n无效数据（value 为负数）:")
 70 | try:
 71 |     data = ExtractedData(name="产品 B", value=-50.0)
 72 |     print(f"[FAIL] {data}")
 73 | except ValidationError as e:
 74 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 75 | 
 76 | print("\n无效数据（value 为 0）:")
 77 | try:
 78 |     data = ExtractedData(name="产品 C", value=0.0)
 79 |     print(f"[FAIL] {data}")
 80 | except ValidationError as e:
 81 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 82 | 
 83 | 
 84 | # ============================================================================
 85 | # 测试类型验证
 86 | # ============================================================================
 87 | print("\n--- 测试 3: 类型验证 ---")
 88 | 
 89 | print("\n正确类型（数字）:")
 90 | try:
 91 |     data = ExtractedData(name="Test", value=123.45)
 92 |     print(f"[OK] value={data.value} (类型: {type(data.value).__name__})")
 93 | except ValidationError as e:
 94 |     print(f"[FAIL] {e}")
 95 | 
 96 | print("\n错误类型（字符串） - Pydantic 会自动转换:")
 97 | try:
 98 |     # Pydantic v2 会尝试转换字符串到数字
 99 |     data = ExtractedData(name="Test", value="123.45")
100 |     print(f"[OK] Pydantic 自动转换: value={data.value} (类型: {type(data.value).__name__})")
101 | except ValidationError as e:
102 |     print(f"[FAIL] {e}")
103 | 
104 | 
105 | # ============================================================================
106 | # 总结
107 | # ============================================================================
108 | print("\n" + "=" * 70)
109 | print("修复验证测试通过！")
110 | print("=" * 70)
111 | 
112 | print("\n修复内容:")
113 | print("  1. 在 Field 描述中强调类型（字符串/数字/整数）")
114 | print("  2. 改用正常价格的测试用例（避免负数验证错误）")
115 | print("  3. 添加了 Exception 捕获（处理 API 端验证失败）")
116 | print("  4. 在提示词中强调 price/value 必须是数字类型")
117 | 
118 | print("\n注意:")
119 | print("  - Pydantic 会尝试自动转换类型（字符串 → 数字）")
120 | print("  - 但 LLM API 的 tool schema 验证可能更严格")
121 | print("  - 在提示词中明确类型要求可以提高成功率")
122 | 


--------------------------------------------------------------------------------
/phase2_practical/13_rag_basics/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 简单测试：验证 RAG 基础组件（不需要 Pinecone API）
  3 | """
  4 | 
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | # 获取脚本所在目录
  9 | SCRIPT_DIR = Path(__file__).parent
 10 | DATA_DIR = SCRIPT_DIR / "data"
 11 | 
 12 | # 确保 data 目录存在
 13 | DATA_DIR.mkdir(exist_ok=True)
 14 | 
 15 | print("=" * 70)
 16 | print("测试：RAG 基础组件")
 17 | print("=" * 70)
 18 | 
 19 | 
 20 | # ============================================================================
 21 | # 测试 1：文档加载
 22 | # ============================================================================
 23 | print("\n--- 测试 1: 文档加载 ---")
 24 | 
 25 | from langchain_community.document_loaders import TextLoader
 26 | import os
 27 | 
 28 | # 创建测试文档
 29 | test_content = """LangChain 是一个强大的 LLM 应用框架。
 30 | 
 31 | 核心组件包括：
 32 | 1. Models - 模型接口
 33 | 2. Prompts - 提示词
 34 | 3. Chains - 链
 35 | 4. Agents - 代理
 36 | 
 37 | RAG 是 LangChain 的核心应用场景。"""
 38 | 
 39 | test_file = DATA_DIR / "test.txt"
 40 | 
 41 | with open(test_file, "w", encoding="utf-8") as f:
 42 |     f.write(test_content)
 43 | 
 44 | # 加载文档
 45 | loader = TextLoader(test_file, encoding="utf-8")
 46 | documents = loader.load()
 47 | 
 48 | print(f"\n[OK] 文档加载成功")
 49 | print(f"  文档数: {len(documents)}")
 50 | print(f"  内容长度: {len(documents[0].page_content)} 字符")
 51 | print(f"  元数据: {documents[0].metadata}")
 52 | 
 53 | 
 54 | # ============================================================================
 55 | # 测试 2：文本分割
 56 | # ============================================================================
 57 | print("\n--- 测试 2: 文本分割 ---")
 58 | 
 59 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 60 | 
 61 | splitter = RecursiveCharacterTextSplitter(
 62 |     chunk_size=100,
 63 |     chunk_overlap=20,
 64 |     separators=["\n\n", "\n", "。", " ", ""]
 65 | )
 66 | 
 67 | chunks = splitter.split_documents(documents)
 68 | 
 69 | print(f"\n[OK] 文本分割成功")
 70 | print(f"  原文档数: {len(documents)}")
 71 | print(f"  分割后: {len(chunks)} 块")
 72 | print(f"\n  前 2 块:")
 73 | for i, chunk in enumerate(chunks[:2], 1):
 74 |     print(f"    块 {i}: {chunk.page_content[:50]}...")
 75 | 
 76 | 
 77 | # ============================================================================
 78 | # 测试 3：向量嵌入（首次运行会下载模型，需要等待）
 79 | # ============================================================================
 80 | print("\n--- 测试 3: 向量嵌入 ---")
 81 | print("提示：首次运行会下载 HuggingFace 模型，请稍候...")
 82 | 
 83 | try:
 84 |     from langchain_huggingface import HuggingFaceEmbeddings
 85 | 
 86 |     embeddings = HuggingFaceEmbeddings(
 87 |         model_name="sentence-transformers/all-MiniLM-L6-v2"
 88 |     )
 89 | 
 90 |     # 嵌入测试文本
 91 |     text = "LangChain 是什么"
 92 |     vector = embeddings.embed_query(text)
 93 | 
 94 |     print(f"\n[OK] 向量嵌入成功")
 95 |     print(f"  文本: {text}")
 96 |     print(f"  向量维度: {len(vector)}")
 97 |     print(f"  向量前 5 个值: {[round(v, 4) for v in vector[:5]]}")
 98 | 
 99 |     # 计算相似度
100 |     import numpy as np
101 | 
102 |     texts = [
103 |         "LangChain 是一个框架",
104 |         "Python 是编程语言",
105 |         "LangChain 用于 LLM"
106 |     ]
107 | 
108 |     vectors = embeddings.embed_documents(texts)
109 | 
110 |     def cosine_sim(v1, v2):
111 |         return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
112 | 
113 |     print(f"\n  相似度测试:")
114 |     print(f"    '{texts[0]}' vs '{texts[1]}': {cosine_sim(vectors[0], vectors[1]):.4f}")
115 |     print(f"    '{texts[0]}' vs '{texts[2]}': {cosine_sim(vectors[0], vectors[2]):.4f}")
116 |     print(f"    → 相关文本相似度更高 ✓")
117 | 
118 | except Exception as e:
119 |     print(f"\n[SKIP] 向量嵌入跳过（可能网络问题）: {e}")
120 | 
121 | 
122 | # ============================================================================
123 | # 总结
124 | # ============================================================================
125 | print("\n" + "=" * 70)
126 | print("RAG 基础组件测试完成！")
127 | print("=" * 70)
128 | 
129 | print("\n已验证:")
130 | print("  [OK] 文档加载 (TextLoader)")
131 | print("  [OK] 文本分割 (RecursiveCharacterTextSplitter)")
132 | print("  [OK] 向量嵌入 (HuggingFaceEmbeddings)")
133 | 
134 | print("\nPinecone 向量存储:")
135 | print("  需要设置 PINECONE_API_KEY 才能测试")
136 | print("  免费注册: https://www.pinecone.io/")
137 | 
138 | print("\n运行完整示例:")
139 | print("  python main.py  # 查看所有示例")
140 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/05_simple_agent/README.md:
--------------------------------------------------------------------------------
  1 | # 05 - Simple Agent (简单 Agent)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **Agent = 模型 + 工具 + 自动决策**
  6 | 
  7 | Agent 的关键能力：
  8 | - 理解用户问题
  9 | - 自动判断是否需要工具
 10 | - 选择合适的工具
 11 | - 基于工具结果生成回答
 12 | 
 13 | ## create_agent 基本用法
 14 | 
 15 | ```python
 16 | from langchain.agents import create_agent
 17 | from langchain.chat_models import init_chat_model
 18 | 
 19 | agent = create_agent(
 20 |     model=init_chat_model("groq:llama-3.3-70b-versatile"),
 21 |     tools=[tool1, tool2],
 22 |     system_prompt="Agent 的行为指令"  # 可选
 23 | )
 24 | 
 25 | response = agent.invoke({
 26 |     "messages": [{"role": "user", "content": "问题"}]
 27 | })
 28 | ```
 29 | 
 30 | ### 参数说明
 31 | 
 32 | | 参数 | 说明 | 必需 |
 33 | |-----|------|------|
 34 | | `model` | 语言模型 | ✅ |
 35 | | `tools` | 工具列表 | ✅ |
 36 | | `system_prompt` | 系统提示，定义 Agent 行为 | ❌ |
 37 | 
 38 | ## Agent 执行循环
 39 | 
 40 | ```
 41 | 用户问题
 42 |    ↓
 43 | [Agent 分析]
 44 |    ↓
 45 | 需要工具？ ─── 否 ──→ 直接回答
 46 |    ↓ 是
 47 | 调用工具
 48 |    ↓
 49 | 获取结果
 50 |    ↓
 51 | 生成回答
 52 | ```
 53 | 
 54 | ### 完整流程示例
 55 | 
 56 | ```python
 57 | # 问题：北京天气如何？
 58 | 
 59 | # 步骤1：用户提问
 60 | messages = [HumanMessage("北京天气如何？")]
 61 | 
 62 | # 步骤2：AI 决定调用工具
 63 | # AIMessage(tool_calls=[{
 64 | #     "name": "get_weather",
 65 | #     "args": {"city": "北京"}
 66 | # }])
 67 | 
 68 | # 步骤3：执行工具
 69 | # ToolMessage("晴天，温度 15°C")
 70 | 
 71 | # 步骤4：AI 生成最终答案
 72 | # AIMessage("北京今天是晴天，温度 15°C")
 73 | ```
 74 | 
 75 | ## 多工具选择
 76 | 
 77 | Agent 如何选择工具？
 78 | 
 79 | **依据：工具的 docstring**
 80 | 
 81 | ```python
 82 | @tool
 83 | def get_weather(city: str) -> str:
 84 |     """获取指定城市的天气信息"""  # ← AI 读这个！
 85 |     ...
 86 | 
 87 | @tool
 88 | def calculator(operation: str, a: float, b: float) -> str:
 89 |     """执行基本的数学计算"""  # ← AI 也读这个！
 90 |     ...
 91 | ```
 92 | 
 93 | AI 会根据：
 94 | 1. 问题内容
 95 | 2. 每个工具的描述
 96 | 3. 自动选择最匹配的工具
 97 | 
 98 | ## 多轮对话
 99 | 
100 | **关键：传入历史消息**
101 | 
102 | ```python
103 | # 第一轮
104 | response1 = agent.invoke({
105 |     "messages": [{"role": "user", "content": "10 + 5"}]
106 | })
107 | 
108 | # 第二轮（带历史）
109 | response2 = agent.invoke({
110 |     "messages": response1['messages'] + [
111 |         {"role": "user", "content": "再乘以 3"}
112 |     ]
113 | })
114 | ```
115 | 
116 | ## 常见问题
117 | 
118 | ### 1. Agent 不调用工具？
119 | 
120 | **原因：**
121 | - 工具的 docstring 不清晰
122 | - 问题表述不明确
123 | - 模型认为不需要工具
124 | 
125 | **解决：**
126 | ```python
127 | # ❌ 不好
128 | @tool
129 | def tool1(x: str) -> str:
130 |     """做一些事情"""  # 太模糊
131 | 
132 | # ✅ 好
133 | @tool
134 | def get_weather(city: str) -> str:
135 |     """
136 |     获取指定城市的实时天气信息
137 | 
138 |     参数:
139 |         city: 城市名称，如"北京"、"上海"
140 |     """
141 | ```
142 | 
143 | ### 2. Agent 选错工具？
144 | 
145 | **原因：**
146 | - 多个工具的功能描述相似
147 | - 工具太多导致混淆
148 | 
149 | **解决：**
150 | - 只给必要的工具
151 | - 工具描述要有明确区分
152 | - 在 system_prompt 中说明工具使用场景
153 | 
154 | ### 3. Agent 返回什么？
155 | 
156 | ```python
157 | response = agent.invoke({"messages": [...]})
158 | 
159 | # response 是字典
160 | {
161 |     "messages": [
162 |         HumanMessage(...),      # 用户问题
163 |         AIMessage(...),          # AI 工具调用
164 |         ToolMessage(...),        # 工具结果
165 |         AIMessage(...)           # 最终回答 ← 通常取这个
166 |     ]
167 | }
168 | 
169 | # 获取最终回答
170 | final_answer = response['messages'][-1].content
171 | ```
172 | 
173 | ## 最佳实践
174 | 
175 | ### 1. 工具配置
176 | ```python
177 | # ✅ 好：只给需要的工具
178 | agent = create_agent(
179 |     model=model,
180 |     tools=[get_weather, calculator]  # 2-5 个工具最佳
181 | )
182 | 
183 | # ❌ 不好：工具太多
184 | agent = create_agent(
185 |     model=model,
186 |     tools=[tool1, tool2, ..., tool20]  # 会混淆
187 | )
188 | ```
189 | 
190 | ### 2. System Prompt
191 | ```python
192 | agent = create_agent(
193 |     model=model,
194 |     tools=[get_weather],
195 |     system_prompt="""你是天气助手。
196 | 
197 | 工作流程：
198 | 1. 理解用户的城市查询
199 | 2. 使用 get_weather 工具获取数据
200 | 3. 简洁清晰地回答
201 | 
202 | 输出格式：
203 | - 天气状况
204 | - 温度
205 | - 注意事项（如有）
206 | """
207 | )
208 | ```
209 | 
210 | ### 3. 错误处理
211 | ```python
212 | try:
213 |     response = agent.invoke({
214 |         "messages": [{"role": "user", "content": question}]
215 |     })
216 |     answer = response['messages'][-1].content
217 | except Exception as e:
218 |     print(f"Agent 错误：{e}")
219 | ```
220 | 
221 | ## 运行示例
222 | 
223 | ```bash
224 | # 确保已安装依赖
225 | pip install langchain langchain-groq python-dotenv
226 | 
227 | # 设置 API Key（.env 文件）
228 | GROQ_API_KEY=your_key_here
229 | 
230 | # 运行
231 | python main.py
232 | ```
233 | 
234 | ## 下一步
235 | 
236 | **06_agent_loop** - 深入理解 Agent 执行循环的底层机制
237 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/03_messages/README.md:
--------------------------------------------------------------------------------
  1 | # 03 - Messages: 消息类型与对话管理
  2 | 
  3 | ## 核心要点（只讲难点）
  4 | 
  5 | ### 1. 三种消息类型
  6 | 
  7 | | 角色 | 字典格式 | 对象格式 | 用途 |
  8 | |------|---------|---------|------|
  9 | | System | `{"role": "system", ...}` | `SystemMessage(...)` | 系统提示 |
 10 | | User | `{"role": "user", ...}` | `HumanMessage(...)` | 用户输入 |
 11 | | Assistant | `{"role": "assistant", ...}` | `AIMessage(...)` | AI 回复 |
 12 | 
 13 | **推荐：直接用字典，简洁！**
 14 | 
 15 | ```python
 16 | # ✅ 推荐
 17 | messages = [
 18 |     {"role": "system", "content": "你是助手"},
 19 |     {"role": "user", "content": "你好"}
 20 | ]
 21 | 
 22 | # ❌ 不推荐（太啰嗦）
 23 | from langchain_core.messages import SystemMessage, HumanMessage
 24 | messages = [
 25 |     SystemMessage(content="你是助手"),
 26 |     HumanMessage(content="你好")
 27 | ]
 28 | ```
 29 | 
 30 | ---
 31 | 
 32 | ### 2. 对话历史管理（核心难点）
 33 | 
 34 | #### 🔴 关键规则
 35 | 
 36 | > **每次调用必须传递完整的对话历史！**
 37 | 
 38 | #### ❌ 错误做法
 39 | 
 40 | ```python
 41 | # 第一次
 42 | r1 = model.invoke("我叫张三")
 43 | 
 44 | # 第二次（没传历史）
 45 | r2 = model.invoke("我叫什么？")  # AI 不记得！
 46 | ```
 47 | 
 48 | #### ✅ 正确做法
 49 | 
 50 | ```python
 51 | conversation = []
 52 | 
 53 | # 第一次
 54 | conversation.append({"role": "user", "content": "我叫张三"})
 55 | r1 = model.invoke(conversation)
 56 | 
 57 | # 关键：保存 AI 回复
 58 | conversation.append({"role": "assistant", "content": r1.content})
 59 | 
 60 | # 第二次（传递完整历史）
 61 | conversation.append({"role": "user", "content": "我叫什么？"})
 62 | r2 = model.invoke(conversation)  # AI 记得！
 63 | ```
 64 | 
 65 | #### 💡 对话流程
 66 | 
 67 | ```
 68 | 第 1 轮：
 69 |   [system, user] → AI回复 → 保存回复
 70 | 
 71 | 第 2 轮：
 72 |   [system, user, assistant, user] → AI回复 → 保存回复
 73 | 
 74 | 第 3 轮：
 75 |   [system, user, assistant, user, assistant, user] → AI回复
 76 | 
 77 | 每次都传递所有历史！
 78 | ```
 79 | 
 80 | ---
 81 | 
 82 | ### 3. 对话历史优化（避免太长）
 83 | 
 84 | #### 🔴 问题
 85 | 
 86 | 对话历史会越来越长，消耗大量 tokens 和成本。
 87 | 
 88 | #### ✅ 解决方案
 89 | 
 90 | 只保留最近 N 轮对话：
 91 | 
 92 | ```python
 93 | def keep_recent_messages(messages, max_pairs=3):
 94 |     """
 95 |     保留最近的 N 轮对话
 96 | 
 97 |     max_pairs: 保留的对话轮数（每轮 = user + assistant）
 98 |     """
 99 |     # 分离 system 和对话
100 |     system_msgs = [m for m in messages if m.get("role") == "system"]
101 |     conversation = [m for m in messages if m.get("role") != "system"]
102 | 
103 |     # 只保留最近的
104 |     recent = conversation[-(max_pairs * 2):]
105 | 
106 |     # 返回：system + 最近对话
107 |     return system_msgs + recent
108 | 
109 | # 使用
110 | optimized = keep_recent_messages(conversation, max_pairs=5)
111 | response = model.invoke(optimized)
112 | ```
113 | 
114 | **原理：**
115 | - 总是保留 system 消息（定义角色）
116 | - 只保留最近 5 轮对话（10 条消息）
117 | - 丢弃更早的历史
118 | 
119 | ---
120 | 
121 | ## 完整示例
122 | 
123 | ### 正确的对话管理
124 | 
125 | ```python
126 | # 初始化
127 | conversation = [
128 |     {"role": "system", "content": "你是 Python 导师"}
129 | ]
130 | 
131 | # 第 1 轮
132 | conversation.append({"role": "user", "content": "什么是列表？"})
133 | r1 = model.invoke(conversation)
134 | conversation.append({"role": "assistant", "content": r1.content})
135 | 
136 | # 第 2 轮
137 | conversation.append({"role": "user", "content": "它和元组有什么区别？"})
138 | r2 = model.invoke(conversation)
139 | conversation.append({"role": "assistant", "content": r2.content})
140 | 
141 | # 第 3 轮（测试记忆）
142 | conversation.append({"role": "user", "content": "我第一个问题问的是什么？"})
143 | r3 = model.invoke(conversation)
144 | # AI 会回答："你问的是什么是列表"
145 | 
146 | # 优化：只保留最近 3 轮
147 | optimized = keep_recent_messages(conversation, max_pairs=3)
148 | ```
149 | 
150 | ---
151 | 
152 | ## 运行示例
153 | 
154 | ```bash
155 | cd phase1_fundamentals/03_messages
156 | python main.py
157 | ```
158 | 
159 | ---
160 | 
161 | ## 常见错误
162 | 
163 | ### 错误 1：忘记保存 AI 回复
164 | 
165 | ```python
166 | # ❌ 错误
167 | conversation.append({"role": "user", "content": "问题1"})
168 | r1 = model.invoke(conversation)
169 | # 忘记保存 r1.content！
170 | 
171 | conversation.append({"role": "user", "content": "问题2"})
172 | r2 = model.invoke(conversation)  # AI 不知道之前的回答
173 | ```
174 | 
175 | ### 错误 2：每次重新创建列表
176 | 
177 | ```python
178 | # ❌ 错误
179 | conversation = [{"role": "user", "content": "问题1"}]
180 | r1 = model.invoke(conversation)
181 | 
182 | conversation = [{"role": "user", "content": "问题2"}]  # 重新创建！
183 | r2 = model.invoke(conversation)  # 丢失了历史
184 | ```
185 | 
186 | ---
187 | 
188 | ## 核心总结
189 | 
190 | | 要点 | 说明 |
191 | |------|------|
192 | | **格式** | 用字典，不用消息对象 |
193 | | **历史** | 每次必须传递完整历史 |
194 | | **保存** | 必须保存 AI 的回复 |
195 | | **优化** | 只保留最近 N 轮 |
196 | | **System** | 总是保留 system 消息 |
197 | 
198 | ---
199 | 
200 | ## 下一步
201 | 
202 | - **04_custom_tools** - 创建自定义工具
203 | - **05_simple_agent** - 构建第一个 Agent
204 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/02_prompt_templates/模块完成总结.md:
--------------------------------------------------------------------------------
  1 | # ✅ 模块 02: Prompt Templates 创建完成！
  2 | 
  3 | ## 📁 文件结构
  4 | 
  5 | ```
  6 | phase1_fundamentals/02_prompt_templates/
  7 | ├── main.py                      # ✅ 主教程代码（9个示例）
  8 | ├── README.md                    # ✅ 详细学习文档
  9 | └── examples/                    # ✅ 示例模板库
 10 |     ├── template_library.py      # ✅ 可复用模板库（15个模板）
 11 |     └── README.md                # ✅ 使用说明
 12 | ```
 13 | 
 14 | ---
 15 | 
 16 | ## 📚 学习内容总结
 17 | 
 18 | ### 核心概念
 19 | 
 20 | 1. **为什么需要提示词模板**
 21 |    - ❌ 字符串拼接的问题
 22 |    - ✅ 模板的优势（可复用、可维护、类型安全）
 23 | 
 24 | 2. **PromptTemplate**
 25 |    - 简单文本模板
 26 |    - 3种创建方法
 27 |    - format() 和 invoke() 的区别
 28 | 
 29 | 3. **ChatPromptTemplate**
 30 |    - 聊天消息模板
 31 |    - 支持 system/user/assistant 角色
 32 |    - 适合对话场景
 33 | 
 34 | 4. **高级特性**
 35 |    - 部分变量（partial）
 36 |    - 模板组合
 37 |    - 可复用模板库
 38 | 
 39 | 5. **LCEL 链式调用**
 40 |    - 使用 `|` 运算符
 41 |    - 模板 + 模型的组合
 42 | 
 43 | ---
 44 | 
 45 | ## 🎯 9个示例详解
 46 | 
 47 | | 示例 | 名称 | 学习重点 |
 48 | |------|------|----------|
 49 | | 1 | 为什么需要模板 | 对比字符串拼接 vs 模板 |
 50 | | 2 | PromptTemplate 基础 | 3种创建方法 |
 51 | | 3 | ChatPromptTemplate | 聊天消息模板 |
 52 | | 4 | 多轮对话模板 | 包含对话历史 |
 53 | | 5 | MessagePromptTemplate | 高级用法 |
 54 | | 6 | 部分变量 | 预填充固定变量 |
 55 | | 7 | 模板组合 | 组合多个模板片段 |
 56 | | 8 | 可复用模板库 | 实战最佳实践 |
 57 | | 9 | LCEL 链式调用 | 预览后续内容 |
 58 | 
 59 | ---
 60 | 
 61 | ## 🔧 模板库包含的15个模板
 62 | 
 63 | ### 翻译类
 64 | - ✅ TRANSLATOR - 专业翻译
 65 | 
 66 | ### 编程类
 67 | - ✅ CODE_GENERATOR - 代码生成
 68 | - ✅ CODE_REVIEWER - 代码审查
 69 | - ✅ CODE_EXPLAINER - 代码解释
 70 | - ✅ DEBUG_HELPER - 调试助手
 71 | 
 72 | ### 内容创作类
 73 | - ✅ SUMMARIZER - 内容摘要
 74 | - ✅ ARTICLE_WRITER - 文章写作
 75 | - ✅ EMAIL_WRITER - 邮件撰写
 76 | 
 77 | ### 教育类
 78 | - ✅ TUTOR - 教学辅导
 79 | - ✅ QUIZ_GENERATOR - 测验生成
 80 | 
 81 | ### 商务类
 82 | - ✅ PRODUCT_DESCRIPTION - 产品描述
 83 | - ✅ MARKET_ANALYSIS - 市场分析
 84 | 
 85 | ### 客户服务类
 86 | - ✅ CUSTOMER_SUPPORT - 客户服务
 87 | - ✅ FAQ_RESPONDER - FAQ回答
 88 | 
 89 | ### 数据分析类
 90 | - ✅ DATA_ANALYZER - 数据分析
 91 | - ✅ REPORT_GENERATOR - 报告生成
 92 | 
 93 | ---
 94 | 
 95 | ## 🚀 快速开始
 96 | 
 97 | ### 运行主教程
 98 | 
 99 | ```bash
100 | cd phase1_fundamentals/02_prompt_templates
101 | python main.py
102 | ```
103 | 
104 | ### 运行模板库示例
105 | 
106 | ```bash
107 | python examples/template_library.py
108 | ```
109 | 
110 | ### 在项目中使用模板库
111 | 
112 | ```python
113 | from examples.template_library import TemplateLibrary
114 | 
115 | # 使用翻译模板
116 | messages = TemplateLibrary.TRANSLATOR.format_messages(
117 |     source_lang="英语",
118 |     target_lang="中文",
119 |     text="Hello World"
120 | )
121 | 
122 | response = model.invoke(messages)
123 | ```
124 | 
125 | ---
126 | 
127 | ## 💡 关键知识点
128 | 
129 | ### PromptTemplate vs ChatPromptTemplate
130 | 
131 | | 特性 | PromptTemplate | ChatPromptTemplate |
132 | |------|----------------|-------------------|
133 | | 输出格式 | 字符串 | 消息列表 |
134 | | 角色支持 | ❌ 无 | ✅ system/user/assistant |
135 | | 对话历史 | ❌ 不支持 | ✅ 支持 |
136 | | 适用场景 | 简单提示 | **聊天、对话（推荐）** |
137 | 
138 | ### 模板创建的三种方法
139 | 
140 | **方法 1：from_template（推荐）**
141 | ```python
142 | template = PromptTemplate.from_template("你好 {name}")
143 | ```
144 | 
145 | **方法 2：显式指定变量**
146 | ```python
147 | template = PromptTemplate(
148 |     input_variables=["name"],
149 |     template="你好 {name}"
150 | )
151 | ```
152 | 
153 | **方法 3：部分变量**
154 | ```python
155 | template = PromptTemplate.from_template("你是{role}，请{task}")
156 | partial = template.partial(role="助手")
157 | ```
158 | 
159 | ### LCEL 链式调用
160 | 
161 | ```python
162 | # 传统方式
163 | template = ChatPromptTemplate.from_messages([...])
164 | messages = template.format_messages(...)
165 | response = model.invoke(messages)
166 | 
167 | # LCEL 方式（更简洁）
168 | chain = template | model
169 | response = chain.invoke({"variable": "value"})
170 | ```
171 | 
172 | ---
173 | 
174 | ## 📖 最佳实践
175 | 
176 | 1. **优先使用 ChatPromptTemplate**
177 |    - 更灵活
178 |    - 支持多角色
179 |    - 便于维护对话历史
180 | 
181 | 2. **创建模板库**
182 |    - 将常用模板集中管理
183 |    - 提高代码复用性
184 |    - 便于团队协作
185 | 
186 | 3. **使用部分变量**
187 |    - 预填充固定不变的值
188 |    - 创建模板变体
189 |    - 减少重复代码
190 | 
191 | 4. **文档化模板**
192 |    - 说明变量含义
193 |    - 提供使用示例
194 |    - 便于维护
195 | 
196 | ---
197 | 
198 | ## 🎓 学习进度
199 | 
200 | - ✅ **01_hello_langchain** - LLM 基础调用
201 | - ✅ **02_prompt_templates** - 提示词模板（当前）
202 | - ⏭️ **03_messages** - 消息类型（下一步）
203 | - ⏭️ **04_custom_tools** - 自定义工具
204 | - ⏭️ **05_simple_agent** - 简单 Agent
205 | 
206 | ---
207 | 
208 | ## 📝 下一步
209 | 
210 | 继续学习：
211 | 
212 | ```bash
213 | # 查看下一个模块
214 | cd ../03_messages
215 | ```
216 | 
217 | 或者我可以帮你创建下一个模块！
218 | 
219 | ---
220 | 
221 | **创建时间：** 2025-11-05
222 | **模块状态：** ✅ 完成
223 | **测试状态：** ✅ 通过（导入测试成功）
224 | 


--------------------------------------------------------------------------------
/phase2_practical/12_validation_retry/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 简单测试：验证 Pydantic 验证功能（不需要网络）
  3 | """
  4 | 
  5 | from pydantic import BaseModel, Field, field_validator, ValidationError
  6 | 
  7 | print("=" * 70)
  8 | print("测试：Pydantic 验证和错误处理")
  9 | print("=" * 70)
 10 | 
 11 | 
 12 | # ============================================================================
 13 | # 测试 1：Field 约束
 14 | # ============================================================================
 15 | print("\n--- 测试 1: Field 约束 ---")
 16 | 
 17 | class User(BaseModel):
 18 |     name: str = Field(min_length=2, max_length=20)
 19 |     age: int = Field(ge=0, le=150)
 20 |     email: str
 21 | 
 22 | print("\n有效数据:")
 23 | try:
 24 |     user = User(name="张三", age=30, email="zhang@example.com")
 25 |     print(f"[OK] {user.name}, {user.age}, {user.email}")
 26 | except ValidationError as e:
 27 |     print(f"[FAIL] {e}")
 28 | 
 29 | print("\n无效数据（年龄超出范围）:")
 30 | try:
 31 |     user = User(name="李四", age=200, email="li@example.com")
 32 |     print(f"[OK] {user}")
 33 | except ValidationError as e:
 34 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 35 | 
 36 | 
 37 | # ============================================================================
 38 | # 测试 2：自定义验证器
 39 | # ============================================================================
 40 | print("\n--- 测试 2: 自定义验证器 ---")
 41 | 
 42 | class Product(BaseModel):
 43 |     name: str = Field(min_length=2)
 44 |     price: float = Field(gt=0)
 45 | 
 46 |     @field_validator('name')
 47 |     @classmethod
 48 |     def validate_name(cls, v):
 49 |         if v.lower() == "unknown":
 50 |             raise ValueError('产品名称不能是 unknown')
 51 |         return v
 52 | 
 53 | print("\n有效产品:")
 54 | try:
 55 |     product = Product(name="iPhone", price=999.0)
 56 |     print(f"[OK] {product.name}, {product.price}")
 57 | except ValidationError as e:
 58 |     print(f"[FAIL] {e}")
 59 | 
 60 | print("\n无效产品（名称是 unknown）:")
 61 | try:
 62 |     product = Product(name="unknown", price=100.0)
 63 |     print(f"[FAIL] {product}")
 64 | except ValidationError as e:
 65 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 66 | 
 67 | print("\n无效产品（价格 <= 0）:")
 68 | try:
 69 |     product = Product(name="Product", price=-100.0)
 70 |     print(f"[FAIL] {product}")
 71 | except ValidationError as e:
 72 |     print(f"[OK] 验证失败（符合预期）: {e.errors()[0]['msg']}")
 73 | 
 74 | 
 75 | # ============================================================================
 76 | # 测试 3：ValidationError 处理
 77 | # ============================================================================
 78 | print("\n--- 测试 3: ValidationError 处理 ---")
 79 | 
 80 | class Data(BaseModel):
 81 |     value: int = Field(ge=0, le=100)
 82 | 
 83 | test_values = [50, -10, 150]
 84 | 
 85 | for val in test_values:
 86 |     try:
 87 |         data = Data(value=val)
 88 |         print(f"value={val:3d} [OK] 验证通过")
 89 |     except ValidationError as e:
 90 |         error_msg = e.errors()[0]['msg']
 91 |         print(f"value={val:3d} [FAIL] 验证失败: {error_msg}")
 92 | 
 93 | 
 94 | # ============================================================================
 95 | # 测试 4：重试循环模拟
 96 | # ============================================================================
 97 | print("\n--- 测试 4: 重试循环模拟 ---")
 98 | 
 99 | def simulate_extraction_with_retry(attempts: int):
100 |     """模拟验证失败的重试逻辑"""
101 |     max_retries = 3
102 | 
103 |     for attempt in range(1, max_retries + 1):
104 |         print(f"  尝试 {attempt}/{max_retries}...", end=" ")
105 | 
106 |         # 模拟：前 attempts-1 次失败，最后一次成功
107 |         if attempt < attempts:
108 |             print("验证失败")
109 |         else:
110 |             print("验证通过 [OK]")
111 |             return True
112 | 
113 |     print("  已达到最大重试次数 [FAIL]")
114 |     return False
115 | 
116 | print("\n场景 1: 第 2 次尝试成功")
117 | simulate_extraction_with_retry(2)
118 | 
119 | print("\n场景 2: 第 3 次尝试成功")
120 | simulate_extraction_with_retry(3)
121 | 
122 | print("\n场景 3: 所有尝试都失败")
123 | simulate_extraction_with_retry(4)
124 | 
125 | 
126 | # ============================================================================
127 | # 总结
128 | # ============================================================================
129 | print("\n" + "=" * 70)
130 | print("Pydantic 验证测试通过！")
131 | print("=" * 70)
132 | 
133 | print("\n关键点:")
134 | print("  1. Field 约束（ge, le, min_length, max_length）正常工作")
135 | print("  2. @field_validator 自定义验证正常工作")
136 | print("  3. ValidationError 可以正确捕获和处理")
137 | print("  4. 重试循环逻辑正确")
138 | 
139 | print("\n注意:")
140 | print("  要测试 with_retry() 和 with_fallbacks() 需要:")
141 | print("  1. 确保 GROQ_API_KEY 正确")
142 | print("  2. 网络连接正常")
143 | print("  3. 运行 main.py 查看完整示例")
144 | 


--------------------------------------------------------------------------------
/phase2_practical/13_rag_basics/demo.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LangChain 1.0 - RAG Basics 演示（非交互式）
  3 | ===========================================
  4 | 
  5 | 快速演示所有 RAG 组件，无需按 Enter 确认
  6 | """
  7 | 
  8 | import os
  9 | from pathlib import Path
 10 | from dotenv import load_dotenv
 11 | from langchain.chat_models import init_chat_model
 12 | from langchain_community.document_loaders import TextLoader
 13 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 14 | from langchain_huggingface import HuggingFaceEmbeddings
 15 | from langchain_pinecone import PineconeVectorStore
 16 | from langchain_core.tools import tool
 17 | from pinecone import Pinecone, ServerlessSpec
 18 | import time
 19 | 
 20 | # 获取脚本所在目录
 21 | SCRIPT_DIR = Path(__file__).parent
 22 | DATA_DIR = SCRIPT_DIR / "data"
 23 | 
 24 | # 确保 data 目录存在
 25 | DATA_DIR.mkdir(exist_ok=True)
 26 | 
 27 | load_dotenv()
 28 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 29 | PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 30 | 
 31 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 32 |     raise ValueError("请先设置 GROQ_API_KEY")
 33 | 
 34 | if not PINECONE_API_KEY or PINECONE_API_KEY == "your_pinecone_api_key_here":
 35 |     print("\n[警告] 未设置 PINECONE_API_KEY")
 36 |     print("Pinecone 相关示例将被跳过\n")
 37 |     PINECONE_API_KEY = None
 38 | 
 39 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 40 | 
 41 | 
 42 | def main():
 43 |     print("\n" + "=" * 70)
 44 |     print(" LangChain 1.0 - RAG Basics 快速演示")
 45 |     print("=" * 70)
 46 | 
 47 |     # 示例 1: 文档加载
 48 |     print("\n[1/6] 文档加载...")
 49 |     sample_text = """LangChain 是一个用于构建 LLM 应用的框架。
 50 | 
 51 | 它提供了以下核心组件：
 52 | 1. Models - 语言模型接口
 53 | 2. Prompts - 提示词模板
 54 | 3. Chains - 链式调用
 55 | 4. Agents - 智能代理
 56 | 
 57 | RAG (Retrieval-Augmented Generation) 是 LangChain 的核心应用场景之一。"""
 58 | 
 59 |     doc_path = DATA_DIR / "langchain_intro.txt"
 60 |     with open(doc_path, "w", encoding="utf-8") as f:
 61 |         f.write(sample_text)
 62 | 
 63 |     loader = TextLoader(doc_path, encoding="utf-8")
 64 |     documents = loader.load()
 65 |     print(f"  [OK] 加载了 {len(documents)} 个文档")
 66 | 
 67 |     # 示例 2: 文本分割
 68 |     print("\n[2/6] 文本分割...")
 69 |     splitter = RecursiveCharacterTextSplitter(
 70 |         chunk_size=200,
 71 |         chunk_overlap=50,
 72 |         separators=["\n\n", "\n", "。", "！", "？", " ", ""]
 73 |     )
 74 |     chunks = splitter.split_documents(documents)
 75 |     print(f"  [OK] 分割为 {len(chunks)} 个块")
 76 | 
 77 |     # 示例 3: 向量嵌入
 78 |     print("\n[3/6] 向量嵌入 (首次运行会下载模型)...")
 79 |     embeddings = HuggingFaceEmbeddings(
 80 |         model_name="sentence-transformers/all-MiniLM-L6-v2"
 81 |     )
 82 |     vector = embeddings.embed_query("LangChain 是什么")
 83 |     print(f"  [OK] 向量维度: {len(vector)}")
 84 | 
 85 |     # 示例 4-6: Pinecone 相关
 86 |     if PINECONE_API_KEY:
 87 |         print("\n[4/6] Pinecone 设置...")
 88 |         try:
 89 |             pc = Pinecone(api_key=PINECONE_API_KEY)
 90 |             index_name = "langchain-rag-demo"
 91 |             dimension = 384
 92 | 
 93 |             existing_indexes = [idx.name for idx in pc.list_indexes()]
 94 |             if index_name in existing_indexes:
 95 |                 print(f"  [OK] 索引已存在")
 96 |                 index = pc.Index(index_name)
 97 |             else:
 98 |                 print(f"  创建新索引...")
 99 |                 pc.create_index(
100 |                     name=index_name,
101 |                     dimension=dimension,
102 |                     metric="cosine",
103 |                     spec=ServerlessSpec(cloud="aws", region="us-east-1")
104 |                 )
105 |                 time.sleep(10)
106 |                 index = pc.Index(index_name)
107 |                 print(f"  [OK] 索引创建完成")
108 | 
109 |             print("\n[5/6] 文档索引...")
110 |             vectorstore = PineconeVectorStore.from_documents(
111 |                 documents=chunks,
112 |                 embedding=embeddings,
113 |                 index_name=index_name
114 |             )
115 |             print(f"  [OK] {len(chunks)} 个文档块已索引")
116 | 
117 |             print("\n[6/6] RAG 问答...")
118 |             @tool
119 |             def search_knowledge_base(query: str) -> str:
120 |                 """在知识库中搜索相关信息"""
121 |                 docs = vectorstore.similarity_search(query, k=2)
122 |                 return "\n\n".join([doc.page_content for doc in docs])
123 | 
124 |             from langchain.agents import create_agent
125 |             agent = create_agent(
126 |                 model=model,
127 |                 tools=[search_knowledge_base],
128 |                 system_prompt="你是一个助手，可以访问知识库。使用 search_knowledge_base 工具搜索相关信息，然后回答问题。"
129 |             )
130 | 
131 |             question = "LangChain 有哪些核心组件？"
132 |             print(f"\n  问题: {question}")
133 |             try:
134 |                 response = agent.invoke({"messages": [{"role": "user", "content": question}]})
135 |                 print(f"  回答: {response['messages'][-1].content}")
136 |                 print(f"\n  [OK] RAG 问答完成")
137 |             except Exception as e:
138 |                 print(f"  [错误] RAG 问答失败（Groq 工具调用问题）")
139 |                 print(f"  提示: 这是 Groq 处理中文工具调用的偶发问题，不影响其他功能")
140 | 
141 |         except Exception as e:
142 |             print(f"  [错误] Pinecone 操作失败: {e}")
143 |     else:
144 |         print("\n[4-6] 跳过 Pinecone 相关示例（未设置 API key）")
145 | 
146 |     print("\n" + "=" * 70)
147 |     print(" 演示完成！")
148 |     print("=" * 70)
149 |     print("\n完整功能请运行: python main.py")
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     try:
154 |         main()
155 |     except Exception as e:
156 |         print(f"\n错误: {e}")
157 |         import traceback
158 |         traceback.print_exc()
159 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/04_custom_tools/README.md:
--------------------------------------------------------------------------------
  1 | # 04 - Custom Tools (自定义工具)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **工具 (Tool) = 给 AI 的函数**
  6 | 
  7 | 使用 `@tool` 装饰器，让 AI 能调用你的 Python 函数。
  8 | 
  9 | ## @tool 基本用法
 10 | 
 11 | ```python
 12 | from langchain_core.tools import tool
 13 | 
 14 | @tool
 15 | def get_weather(city: str) -> str:
 16 |     """
 17 |     获取指定城市的天气信息
 18 | 
 19 |     参数:
 20 |         city: 城市名称，如"北京"、"上海"
 21 | 
 22 |     返回:
 23 |         天气信息字符串
 24 |     """
 25 |     # 你的实现
 26 |     return "晴天，温度 15°C"
 27 | ```
 28 | 
 29 | ### 关键要点
 30 | 
 31 | | 必需项 | 说明 |
 32 | |-------|------|
 33 | | `@tool` 装饰器 | 声明这是一个工具 |
 34 | | **docstring** | AI 读这个来理解工具用途 ⚠️ 非常重要！ |
 35 | | 类型注解 | 参数和返回值的类型 |
 36 | | 返回 `str` | 工具应该返回字符串（AI 最容易理解） |
 37 | 
 38 | ## 工具的 docstring
 39 | 
 40 | **AI 依赖 docstring 来理解工具！**
 41 | 
 42 | ```python
 43 | @tool
 44 | def my_tool(param: str) -> str:
 45 |     """
 46 |     工具的简短描述（AI 读这个！）
 47 | 
 48 |     参数:
 49 |         param: 参数说明
 50 | 
 51 |     返回:
 52 |         返回值说明
 53 |     """
 54 |     ...
 55 | ```
 56 | 
 57 | ### 好的 vs 不好的 docstring
 58 | 
 59 | ```python
 60 | # ❌ 不好：太模糊
 61 | @tool
 62 | def tool1(x: str) -> str:
 63 |     """做一些事情"""
 64 |     ...
 65 | 
 66 | # ✅ 好：清晰明确
 67 | @tool
 68 | def search_products(query: str) -> str:
 69 |     """
 70 |     在产品数据库中搜索产品
 71 | 
 72 |     参数:
 73 |         query: 搜索关键词，如"笔记本电脑"、"手机"
 74 | 
 75 |     返回:
 76 |         产品列表的 JSON 字符串
 77 |     """
 78 |     ...
 79 | ```
 80 | 
 81 | ## 参数类型
 82 | 
 83 | ### 1. 单参数
 84 | ```python
 85 | @tool
 86 | def get_weather(city: str) -> str:
 87 |     """获取指定城市的天气"""
 88 |     ...
 89 | ```
 90 | 
 91 | ### 2. 多参数
 92 | ```python
 93 | @tool
 94 | def calculator(operation: str, a: float, b: float) -> str:
 95 |     """
 96 |     执行数学计算
 97 | 
 98 |     参数:
 99 |         operation: "add", "subtract", "multiply", "divide"
100 |         a: 第一个数字
101 |         b: 第二个数字
102 |     """
103 |     ...
104 | ```
105 | 
106 | ### 3. 可选参数
107 | ```python
108 | from typing import Optional
109 | 
110 | @tool
111 | def web_search(query: str, num_results: Optional[int] = 3) -> str:
112 |     """
113 |     搜索网页
114 | 
115 |     参数:
116 |         query: 搜索关键词
117 |         num_results: 返回结果数量，默认 3
118 |     """
119 |     ...
120 | ```
121 | 
122 | ## 调用工具
123 | 
124 | 工具有两种调用方式：
125 | 
126 | ### 1. 直接调用（测试用）
127 | ```python
128 | # 使用 .invoke() 方法
129 | result = get_weather.invoke({"city": "北京"})
130 | print(result)  # "晴天，温度 15°C"
131 | ```
132 | 
133 | ### 2. 绑定到模型（让 AI 调用）
134 | ```python
135 | from langchain.chat_models import init_chat_model
136 | 
137 | model = init_chat_model("groq:llama-3.3-70b-versatile")
138 | 
139 | # 绑定工具
140 | model_with_tools = model.bind_tools([get_weather])
141 | 
142 | # AI 可以决定是否调用工具
143 | response = model_with_tools.invoke("北京天气如何？")
144 | 
145 | # 检查 AI 是否要调用工具
146 | if response.tool_calls:
147 |     print("AI 想调用工具：", response.tool_calls)
148 | else:
149 |     print("AI 直接回答：", response.content)
150 | ```
151 | 
152 | ## 工具属性
153 | 
154 | 创建工具后，可以查看其属性：
155 | 
156 | ```python
157 | @tool
158 | def my_tool(param: str) -> str:
159 |     """工具描述"""
160 |     ...
161 | 
162 | print(my_tool.name)         # "my_tool"
163 | print(my_tool.description)  # "工具描述"
164 | print(my_tool.args)         # 参数模式
165 | ```
166 | 
167 | ## 最佳实践
168 | 
169 | ### 1. 清晰的描述
170 | ```python
171 | # ✅ 好
172 | @tool
173 | def search_flights(origin: str, destination: str, date: str) -> str:
174 |     """
175 |     搜索航班信息
176 | 
177 |     参数:
178 |         origin: 出发城市，如"北京"
179 |         destination: 目的地城市，如"上海"
180 |         date: 出发日期，格式 YYYY-MM-DD
181 | 
182 |     返回:
183 |         可用航班的 JSON 列表
184 |     """
185 | ```
186 | 
187 | ### 2. 功能单一
188 | ```python
189 | # ❌ 不好：一个工具做太多事
190 | @tool
191 | def do_everything(action: str, data: str) -> str:
192 |     """做各种事情"""
193 |     if action == "weather": ...
194 |     elif action == "calculate": ...
195 |     elif action == "search": ...
196 | 
197 | # ✅ 好：每个工具做一件事
198 | @tool
199 | def get_weather(city: str) -> str:
200 |     """获取天气"""
201 |     ...
202 | 
203 | @tool
204 | def calculator(operation: str, a: float, b: float) -> str:
205 |     """计算"""
206 |     ...
207 | ```
208 | 
209 | ### 3. 错误处理
210 | ```python
211 | @tool
212 | def divide(a: float, b: float) -> str:
213 |     """
214 |     除法计算
215 | 
216 |     参数:
217 |         a: 被除数
218 |         b: 除数
219 |     """
220 |     try:
221 |         if b == 0:
222 |             return "错误：除数不能为零"
223 |         result = a / b
224 |         return f"{a} / {b} = {result}"
225 |     except Exception as e:
226 |         return f"计算错误：{e}"
227 | ```
228 | 
229 | ### 4. 返回字符串
230 | ```python
231 | # ✅ 好：返回字符串
232 | @tool
233 | def get_user_info(user_id: str) -> str:
234 |     """获取用户信息"""
235 |     user = {"id": user_id, "name": "张三"}
236 |     return json.dumps(user, ensure_ascii=False)  # 转成 JSON 字符串
237 | 
238 | # ❌ 不好：返回字典（某些情况可能有问题）
239 | @tool
240 | def get_user_info(user_id: str) -> dict:
241 |     """获取用户信息"""
242 |     return {"id": user_id, "name": "张三"}
243 | ```
244 | 
245 | ## 测试工具
246 | 
247 | 每个工具文件都可以直接运行测试：
248 | 
249 | ```python
250 | # 在文件末尾添加
251 | if __name__ == "__main__":
252 |     print("测试工具：")
253 |     print(my_tool.invoke({"param": "test"}))
254 | ```
255 | 
256 | 运行测试：
257 | ```bash
258 | python tools/weather.py
259 | ```
260 | 
261 | ## 项目结构
262 | 
263 | ```
264 | 04_custom_tools/
265 | ├── main.py                # 6 个示例
266 | ├── README.md              # 本文件
267 | └── tools/                 # 工具目录
268 |     ├── weather.py         # 天气工具
269 |     ├── calculator.py      # 计算器工具
270 |     └── web_search.py      # 搜索工具
271 | ```
272 | 
273 | ## 运行示例
274 | 
275 | ```bash
276 | # 测试单个工具
277 | python tools/weather.py
278 | 
279 | # 运行所有示例
280 | python main.py
281 | ```
282 | 
283 | ## 下一步
284 | 
285 | **05_simple_agent** - 使用 `create_agent` 让 AI 自动调用这些工具
286 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LangChain 1.0 学习仓库
  2 | 
  3 | 这是一个系统学习 **LangChain 1.0** 的实践仓库，涵盖从基础概念到实战项目的完整学习路径。
  4 | 
  5 | ## 📚 关于 LangChain 1.0
  6 | 
  7 | LangChain 1.0 是用于构建 LLM 驱动应用程序的框架的最新主要版本（2024年9月发布）。主要特性：
  8 | 
  9 | - ✅ **构建在 LangGraph 运行时之上** - 提供持久化、流式处理、人在回路等能力
 10 | - ✅ **新的 `create_agent` API** - 简化 Agent 创建流程
 11 | - ✅ **中间件架构** - 提供细粒度的执行控制（before_model、after_model、wrap_model_call 等）
 12 | - ✅ **多模态支持** - 处理文本、图像、视频、文件
 13 | - ✅ **结构化输出** - 使用 Pydantic 模型定义输出格式
 14 | - ✅ **语义化版本控制** - 1.x 系列保证 API 稳定
 15 | 
 16 | ## 🚀 快速开始
 17 | 
 18 | ### 环境要求
 19 | 
 20 | - Python 3.10 或更高版本（不支持 Python 3.9）
 21 | - pip 或 uv 包管理器
 22 | 
 23 | ### 安装步骤
 24 | 
 25 | 1. **克隆仓库**
 26 | ```bash
 27 | git clone <your-repo-url>
 28 | cd langchain_v1_study
 29 | ```
 30 | 
 31 | 2. **创建虚拟环境**
 32 | ```bash
 33 | # 创建虚拟环境
 34 | python -m venv venv
 35 | 
 36 | # 激活虚拟环境
 37 | # Windows:
 38 | venv\Scripts\activate
 39 | # Unix/macOS:
 40 | source venv/bin/activate
 41 | ```
 42 | 
 43 | 3. **安装依赖**
 44 | ```bash
 45 | pip install -r requirements.txt
 46 | ```
 47 | 
 48 | 4. **配置环境变量**
 49 | ```bash
 50 | # 复制环境变量模板
 51 | cp .env.example .env
 52 | 
 53 | # 编辑 .env 文件，填入你的 API Keys
 54 | ```
 55 | 
 56 | 需要的 API Keys：
 57 | - `OPENAI_API_KEY` - OpenAI API 密钥（https://platform.openai.com/api-keys）
 58 | - `ANTHROPIC_API_KEY` - Anthropic API 密钥（https://console.anthropic.com/）
 59 | - `LANGSMITH_API_KEY` - LangSmith API 密钥（可选，用于可观测性）
 60 | 
 61 | ### 验证安装
 62 | 
 63 | 运行第一个示例：
 64 | ```bash
 65 | python phase1_fundamentals/01_hello_langchain/main.py
 66 | ```
 67 | 
 68 | ## 📖 学习路径
 69 | 
 70 | 本仓库采用**四阶段渐进式学习**，共 24 个模块 + 3 个综合项目：
 71 | 
 72 | ### 阶段一：基础知识（第1-2周）
 73 | 📂 `phase1_fundamentals/`
 74 | 
 75 | | 模块 | 主题 | 学习内容 |
 76 | |------|------|----------|
 77 | | 01 | Hello LangChain | 第一次 LLM 调用，使用 `init_chat_model` |
 78 | | 02 | Prompt Templates | 创建和使用提示词模板 |
 79 | | 03 | Messages | 消息类型（System、Human、AI） |
 80 | | 04 | Custom Tools | 自定义工具（天气、计算器、搜索） |
 81 | | 05 | Simple Agent | 使用 `create_agent` 创建 Agent |
 82 | | 06 | Agent Loop | 理解 ReAct 模式执行循环 |
 83 | 
 84 | ### 阶段二：中级特性（第3-4周）
 85 | 📂 `phase2_intermediate/`
 86 | 
 87 | | 模块 | 主题 | 学习内容 |
 88 | |------|------|----------|
 89 | | 07 | Memory Basics | 使用 InMemorySaver 实现短期内存 |
 90 | | 08 | Context Management | 消息修剪和摘要 |
 91 | | 09 | Checkpointing | 使用 SQLite/Postgres 持久化状态 |
 92 | | 10 | Middleware Basics | before_model 和 after_model 钩子 |
 93 | | 11 | Middleware Monitoring | 可观测性中间件 |
 94 | | 12 | Middleware Guardrails | PII 脱敏和输入验证 |
 95 | | 13 | Structured Output | 使用 Pydantic 定义输出模式 |
 96 | | 14 | Validation Retry | 优雅地处理验证失败 |
 97 | | 15 | Multi-Tool Structured | 结合工具和结构化输出 |
 98 | 
 99 | ### 阶段三：高级主题（第5-6周）
100 | 📂 `phase3_advanced/`
101 | 
102 | | 模块 | 主题 | 学习内容 |
103 | |------|------|----------|
104 | | 16 | LangGraph Basics | 创建带节点和边的状态图 |
105 | | 17 | Multi-Agent | 协调多个专业化 Agent |
106 | | 18 | Conditional Routing | 实现动态工作流路由 |
107 | | 19 | Image Input | 使用视觉模型处理图像 |
108 | | 20 | File Handling | 处理文档上传和分析 |
109 | | 21 | Mixed Modality | 结合文本、图像和结构化数据 |
110 | | 22 | LangSmith Integration | 设置追踪和监控 |
111 | | 23 | Error Handling | 实现健壮的错误恢复 |
112 | | 24 | Cost Optimization | 追踪 token 使用并优化 |
113 | 
114 | ### 阶段四：实际应用（第7-8周）
115 | 📂 `phase4_projects/`
116 | 
117 | | 项目 | 描述 | 核心技术 |
118 | |------|------|----------|
119 | | RAG 文档问答系统 | 基于向量数据库的文档问答 | 文档加载、向量存储、检索增强生成 |
120 | | 多 Agent 客户支持 | 智能客服系统 | 多 Agent 协作、HITL、对话内存 |
121 | | 研究助手 | 带工具的研究助手 | 网页搜索、MCP 集成、引用格式化 |
122 | 
123 | ## 📁 项目结构
124 | 
125 | ```
126 | langchain_v1_study/
127 | ├── phase1_fundamentals/     # 阶段一：基础知识
128 | ├── phase2_intermediate/     # 阶段二：中级特性
129 | ├── phase3_advanced/         # 阶段三：高级主题
130 | ├── phase4_projects/         # 阶段四：综合项目
131 | ├── shared/                  # 共享资源（工具、提示词、中间件）
132 | ├── notebooks/               # Jupyter 笔记本实验
133 | ├── docs/                    # 学习笔记和文档
134 | └── tests/                   # 全局测试
135 | ```
136 | 
137 | 详细结构请查看 [CLAUDE.md](./CLAUDE.md)
138 | 
139 | ## 🎯 使用指南
140 | 
141 | ### 运行单个模块
142 | 
143 | ```bash
144 | # 进入模块目录
145 | cd phase1_fundamentals/01_hello_langchain
146 | 
147 | # 运行主程序
148 | python main.py
149 | 
150 | # 运行测试（如果有）
151 | python test.py
152 | ```
153 | 
154 | ### 运行综合项目
155 | 
156 | ```bash
157 | # 进入项目目录
158 | cd phase4_projects/01_rag_system
159 | 
160 | # 安装项目特定依赖
161 | pip install -r requirements.txt
162 | 
163 | # 运行项目
164 | python main.py
165 | ```
166 | 
167 | ### 使用 Jupyter Notebook
168 | 
169 | ```bash
170 | # 安装 Jupyter
171 | pip install jupyter
172 | 
173 | # 启动 Notebook
174 | jupyter notebook notebooks/
175 | ```
176 | 
177 | ## 📝 学习建议
178 | 
179 | 1. **按顺序学习** - 从阶段一开始，每个模块都基于前面的知识
180 | 2. **动手实践** - 每个模块都有可运行的代码，修改参数观察效果
181 | 3. **记录笔记** - 在 `docs/learning_notes/` 中记录你的学习心得
182 | 4. **查看 README** - 每个模块都有独立的 README.md 说明核心概念
183 | 5. **完成测试** - 运行测试文件验证你的理解
184 | 6. **做综合项目** - 前三个阶段完成后，通过项目巩固所学
185 | 
186 | ## 🔧 常用命令
187 | 
188 | ```bash
189 | # 查看已安装的包
190 | pip list
191 | 
192 | # 更新某个包
193 | pip install --upgrade langchain
194 | 
195 | # 激活 LangSmith 追踪（可选）
196 | export LANGSMITH_TRACING=true  # Unix/macOS
197 | set LANGSMITH_TRACING=true     # Windows
198 | 
199 | # 运行全局测试
200 | pytest tests/
201 | ```
202 | 
203 | ## 📚 重要资源
204 | 
205 | - **官方文档**: https://docs.langchain.com/oss/python/langchain/quickstart
206 | - **迁移指南**: https://docs.langchain.com/oss/python/migrate/langchain-v1
207 | - **LangGraph 文档**: https://docs.langchain.com/oss/python/langgraph
208 | - **LangSmith 平台**: https://smith.langchain.com
209 | - **GitHub 仓库**: https://github.com/langchain-ai/langchain
210 | 
211 | ## 🆘 常见问题
212 | 
213 | ### 1. 导入错误：ModuleNotFoundError
214 | 
215 | 确保虚拟环境已激活并且安装了所有依赖：
216 | ```bash
217 | pip install -r requirements.txt
218 | ```
219 | 
220 | ### 2. API Key 错误
221 | 
222 | 检查 `.env` 文件是否正确配置，确保 API Keys 有效。
223 | 
224 | ### 3. Python 版本不兼容
225 | 
226 | LangChain 1.0 需要 Python 3.10+：
227 | ```bash
228 | python --version  # 检查版本
229 | ```
230 | 
231 | 
232 | ## 🤝 贡献
233 | 
234 | 这是个人学习仓库，欢迎提交问题和改进建议！
235 | 
236 | ## 📄 许可证
237 | 
238 | MIT License
239 | 
240 | ## 🎓 关于作者
241 | 
242 | 正在学习 LangChain 1.0 的开发者，记录学习过程供参考。
243 | 
244 | ---
245 | 
246 | **开始学习之旅** 👉 [01_hello_langchain](./phase1_fundamentals/01_hello_langchain/)
247 | 


--------------------------------------------------------------------------------
/docs/SIMPLIFIED_LEARNING_PATH.md:
--------------------------------------------------------------------------------
  1 | # LangChain 1.0 精简学习路径
  2 | 
  3 | **原则**: 专注日常 80% 使用场景，去掉过于高级的内容
  4 | 
  5 | ---
  6 | 
  7 | ## 📊 模块精简对比
  8 | 
  9 | | 原路径 | 模块数 | 精简后 | 模块数 | 变化 |
 10 | |--------|--------|--------|--------|------|
 11 | | Phase 1 | 6 | Phase 1 基础 | 6 | 保持 ✅ |
 12 | | Phase 2 | 9 | Phase 2 实用进阶 | 7 | 精简 2 个 |
 13 | | Phase 3 | 9 | Phase 3 高级（可选） | 4 | 精简 5 个 |
 14 | | Phase 4 | 3 项目 | 整合到各阶段 | - | 提前实战 |
 15 | | **总计** | **27** | **总计** | **17** | **减少 10 个** |
 16 | 
 17 | ---
 18 | 
 19 | ## 🎯 精简后的学习路径
 20 | 
 21 | ### Phase 1: 基础核心 (第 1 周) - 保持不变
 22 | 
 23 | ✅ **01_hello_langchain** - 第一次 LLM 调用
 24 | ✅ **02_prompt_templates** - 提示词模板
 25 | ✅ **03_messages** - 消息类型
 26 | ✅ **04_custom_tools** - 自定义工具
 27 | ✅ **05_simple_agent** - 使用 create_agent
 28 | ✅ **06_agent_loop** - Agent 执行循环
 29 | 
 30 | **目标**: 能创建基本的 Agent
 31 | 
 32 | ---
 33 | 
 34 | ### Phase 2: 实用进阶 (第 2-3 周) - 精简 + RAG 提前
 35 | 
 36 | #### 内存和状态 (必学)
 37 | ✅ **07_memory_basics** - InMemorySaver
 38 | ✅ **08_context_management** - 消息修剪/摘要
 39 | ✅ **09_checkpointing** - 持久化（SQLite）
 40 | 
 41 | #### 中间件 (必学)
 42 | ✅ **10_middleware_basics** - before/after hooks
 43 | - ~~11_middleware_monitoring~~ ❌ 删除（日常不常用）
 44 | - ~~12_middleware_guardrails~~ ❌ 删除（可选场景）
 45 | 
 46 | #### 结构化输出 (很实用)
 47 | ✅ **11_structured_output** - Pydantic 模型
 48 | ✅ **12_validation_retry** - 验证和重试
 49 | - ~~15_multi_tool_structured~~ ❌ 合并到 11
 50 | 
 51 | #### RAG 系统 (提前！)
 52 | ✅ **13_rag_basics** - 文档加载、向量存储、检索
 53 | ✅ **14_rag_advanced** - 改进检索、混合搜索
 54 | 
 55 | **目标**: 能构建生产级 Agent + RAG 系统
 56 | 
 57 | ---
 58 | 
 59 | ### Phase 3: 高级主题 (第 4 周，可选) - 大幅精简
 60 | 
 61 | ✅ **15_langgraph_low_level** - 复杂控制流（需要时才学）
 62 | ✅ **16_multi_agent** - 多 Agent 协作（需要时才学）
 63 | ✅ **17_langsmith_monitoring** - 生产监控（上线时才学）
 64 | ✅ **18_final_project** - 综合项目（客服系统或研究助手）
 65 | 
 66 | **删除的模块**:
 67 | - ~~18_conditional_routing~~ ❌ (合并到 15)
 68 | - ~~19_image_input~~ ❌ (非核心)
 69 | - ~~20_file_handling~~ ❌ (合并到 RAG)
 70 | - ~~21_mixed_modality~~ ❌ (非核心)
 71 | - ~~22_langsmith~~ → 保留简化版
 72 | - ~~23_error_handling~~ ❌ (融入各模块)
 73 | - ~~24_cost_optimization~~ ❌ (非初学重点)
 74 | 
 75 | **目标**: 掌握复杂场景的解决方案
 76 | 
 77 | ---
 78 | 
 79 | ## 🗂️ 精简后的目录结构
 80 | 
 81 | ```
 82 | langchain_v1_study/
 83 | ├── phase1_fundamentals/          # 第 1 周
 84 | │   ├── 01_hello_langchain/
 85 | │   ├── 02_prompt_templates/
 86 | │   ├── 03_messages/
 87 | │   ├── 04_custom_tools/
 88 | │   ├── 05_simple_agent/
 89 | │   └── 06_agent_loop/
 90 | │
 91 | ├── phase2_practical/             # 第 2-3 周 (重命名)
 92 | │   ├── 07_memory_basics/
 93 | │   ├── 08_context_management/
 94 | │   ├── 09_checkpointing/
 95 | │   ├── 10_middleware_basics/
 96 | │   ├── 11_structured_output/
 97 | │   ├── 12_validation_retry/
 98 | │   ├── 13_rag_basics/           # RAG 提前！
 99 | │   └── 14_rag_advanced/         # RAG 进阶
100 | │
101 | └── phase3_advanced/              # 第 4 周（可选）
102 |     ├── 15_langgraph_low_level/
103 |     ├── 16_multi_agent/
104 |     ├── 17_langsmith_monitoring/
105 |     └── 18_final_project/        # 综合项目
106 | ```
107 | 
108 | ---
109 | 
110 | ## 📚 为什么这样精简？
111 | 
112 | ### 删除理由
113 | 
114 | | 删除的模块 | 理由 | 替代方案 |
115 | |-----------|------|----------|
116 | | middleware_monitoring | 日常不常用 | 在 langsmith 中学监控 |
117 | | middleware_guardrails | 特定场景 | PII 等可作为 middleware_basics 示例 |
118 | | multi_tool_structured | 重复 | 合并到 structured_output |
119 | | conditional_routing | 重复 | 合并到 langgraph_low_level |
120 | | image_input | 非核心 | 需要时查官方文档 |
121 | | file_handling | 重复 | 合并到 RAG 模块 |
122 | | mixed_modality | 非核心 | 进阶内容，不是日常 |
123 | | error_handling | 分散 | 每个模块中都涉及错误处理 |
124 | | cost_optimization | 非初学 | 生产环境才考虑 |
125 | 
126 | ### RAG 提前的理由
127 | 
128 | 1. **实用性强**: 大部分 LLM 应用都需要 RAG
129 | 2. **理解 Agent**: RAG 是 Agent + 检索的典型应用
130 | 3. **学习动力**: 早期就能做出实用系统
131 | 4. **知识整合**: 综合前面学的工具、Agent、内存
132 | 
133 | ---
134 | 
135 | ## 🎓 新的学习时间线
136 | 
137 | ### 第 1 周: 基础 (Phase 1)
138 | - Day 1-2: 01-03 (LLM + Prompts + Messages)
139 | - Day 3-4: 04 (Tools)
140 | - Day 5-7: 05-06 (Agent + Loop)
141 | 
142 | ### 第 2 周: 内存和结构化 (Phase 2 前半)
143 | - Day 1-3: 07-09 (Memory + Context + Checkpoint)
144 | - Day 4-5: 10 (Middleware)
145 | - Day 6-7: 11-12 (Structured Output + Validation)
146 | 
147 | ### 第 3 周: RAG 系统 (Phase 2 后半)
148 | - Day 1-3: 13 (RAG Basics - 文档加载、向量存储、检索)
149 | - Day 4-5: 14 (RAG Advanced - 改进检索)
150 | - Day 6-7: 构建自己的 RAG 应用
151 | 
152 | ### 第 4 周: 高级和项目 (Phase 3，可选)
153 | - Day 1-2: 15 (LangGraph 低层 API)
154 | - Day 3-4: 16 (Multi-Agent)
155 | - Day 5: 17 (LangSmith 监控)
156 | - Day 6-7: 18 (综合项目)
157 | 
158 | ---
159 | 
160 | ## 💡 学习建议
161 | 
162 | ### 快速路径（2 周）
163 | 如果时间紧，重点学：
164 | 1. Phase 1 全部 (01-06)
165 | 2. Phase 2 的 07-09, 11, 13（内存 + 结构化 + RAG 基础）
166 | 
167 | **结果**: 能构建基本的 RAG 应用
168 | 
169 | ### 标准路径（3 周）
170 | 1. Phase 1 全部 (01-06)
171 | 2. Phase 2 全部 (07-14)
172 | 
173 | **结果**: 能构建生产级 RAG 应用
174 | 
175 | ### 完整路径（4 周）
176 | 1. Phase 1 全部
177 | 2. Phase 2 全部
178 | 3. Phase 3 选学（根据需求）
179 | 
180 | **结果**: 能处理复杂场景
181 | 
182 | ---
183 | 
184 | ## ✅ 核心能力对比
185 | 
186 | ### 精简前（24 模块）
187 | - ✅ 基础 Agent
188 | - ✅ 中间件（3个模块）
189 | - ✅ 结构化输出（3个模块）
190 | - ✅ LangGraph
191 | - ✅ 多模态
192 | - ✅ 监控
193 | - ✅ RAG
194 | - ⚠️ 内容多，容易迷失重点
195 | 
196 | ### 精简后（17 模块）
197 | - ✅ 基础 Agent
198 | - ✅ 中间件（1个模块，涵盖核心）
199 | - ✅ 结构化输出（2个模块，更聚焦）
200 | - ✅ **RAG（提前，2个模块）**
201 | - ✅ LangGraph（可选）
202 | - ✅ Multi-Agent（可选）
203 | - ✅ 监控（可选）
204 | - ✅ 专注实用，路径清晰
205 | 
206 | ---
207 | 
208 | ## 📋 实施步骤
209 | 
210 | 1. **重命名目录**:
211 |    ```bash
212 |    mv phase2_intermediate phase2_practical
213 |    ```
214 | 
215 | 2. **调整模块编号**:
216 |    - 11_structured_output (原 13)
217 |    - 12_validation_retry (原 14)
218 |    - 13_rag_basics (新增，基于原项目1)
219 |    - 14_rag_advanced (新增，基于原项目1)
220 | 
221 | 3. **精简 Phase 3**:
222 |    - 15_langgraph_low_level (原 16)
223 |    - 16_multi_agent (原 17)
224 |    - 17_langsmith_monitoring (原 22)
225 |    - 18_final_project (原 phase4 项目2或3)
226 | 
227 | ---
228 | 
229 | ## 🎯 最终结论
230 | 
231 | **从 24 个模块 → 17 个模块**
232 | 
233 | - ✅ 保留日常 80% 使用场景
234 | - ✅ RAG 提前到第 3 周
235 | - ✅ 高级内容标记为可选
236 | - ✅ 学习路径更清晰
237 | - ✅ 更容易坚持完成
238 | 
239 | ---
240 | 
241 | **是否采用这个精简方案？** 我可以立即更新 CLAUDE.md 和目录结构。
242 | 


--------------------------------------------------------------------------------
/phase2_practical/14_rag_advanced/模块完成总结.md:
--------------------------------------------------------------------------------
  1 | # Module 14 - RAG Advanced 完成总结
  2 | 
  3 | ## ✅ 已完成的内容
  4 | 
  5 | ### 1. 核心文件
  6 | - ✅ `main.py` - 完整的混合检索教程（6个渐进式示例）
  7 | - ✅ `test.py` - 组件测试脚本（无需API key）
  8 | - ✅ `README.md` - 完整文档（包含理论、实践和最佳实践）
  9 | - ✅ `data/` - 测试数据目录（自动生成）
 10 | - ✅ `chroma_db/` - 向量数据库目录（自动生成）
 11 | 
 12 | ### 2. 核心功能实现
 13 | 
 14 | #### 示例 1：准备测试数据
 15 | - 创建包含技术术语、概念、代码的测试文档
 16 | - 文档加载和分割
 17 | - 展示 chunk 预览
 18 | 
 19 | #### 示例 2：向量检索器（语义搜索）
 20 | - HuggingFaceEmbeddings (all-MiniLM-L6-v2)
 21 | - Chroma 向量存储
 22 | - 语义相似度搜索
 23 | - 测试不同类型的查询
 24 | 
 25 | #### 示例 3：BM25 检索器（关键词搜索）
 26 | - BM25 算法介绍
 27 | - 关键词精确匹配
 28 | - 对专有名词、代码、版本号的准确检索
 29 | 
 30 | #### 示例 4：混合检索器（EnsembleRetriever）
 31 | - 组合向量搜索和 BM25
 32 | - RRF (Reciprocal Rank Fusion) 算法
 33 | - 权重配置 [0.4, 0.6]
 34 | - 对比测试（BM25 vs Vector vs Hybrid）
 35 | 
 36 | #### 示例 5：权重优化实验
 37 | - 测试不同权重配置：
 38 |   - [0.0, 1.0] - 纯向量
 39 |   - [0.3, 0.7] - 偏向向量
 40 |   - [0.5, 0.5] - 平衡
 41 |   - [0.7, 0.3] - 偏向 BM25
 42 |   - [1.0, 0.0] - 纯 BM25
 43 | - 展示不同权重对检索结果的影响
 44 | 
 45 | #### 示例 6：RAG Agent with Hybrid Search
 46 | - 将混合检索集成�� Agent
 47 | - 创建 `search_knowledge_base` 工具
 48 | - 使用 Groq Llama 3.3 70B 模型
 49 | - 实现问答功能
 50 | 
 51 | ### 3. 文档完善
 52 | 
 53 | #### README.md 包含：
 54 | 1. **快速开始指南**
 55 |    - 安装命令
 56 |    - LangChain 1.0 导入提示
 57 | 
 58 | 2. **核心概念讲解**
 59 |    - 为什么需要进阶 RAG
 60 |    - 向量检索 vs BM25 检索
 61 |    - 对比表格（5种查询类型）
 62 | 
 63 | 3. **EnsembleRetriever 详解**
 64 |    - RRF 算法原理
 65 |    - 权重配置策略
 66 |    - 完整代码示例
 67 | 
 68 | 4. **完整实现流程**
 69 |    - 离线阶段：建立索引
 70 |    - 在线阶段：RAG 问答
 71 | 
 72 | 5. **性能优化**
 73 |    - 权重调整策略
 74 |    - k 值选择建议
 75 |    - 监控和评估方法
 76 | 
 77 | 6. **常见问题解答**
 78 |    - Q1-Q6 覆盖常见疑问
 79 |    - 混合检索使用场景
 80 |    - 向量数据库选择对比
 81 | 
 82 | 7. **最佳实践**
 83 |    - 生产环境检查清单
 84 |    - 代码模板（HybridRAGSystem 类）
 85 |    - 测试套件示例
 86 | 
 87 | 8. **进一步学习**
 88 |    - 下一步主题（重排序、查询优化等）
 89 |    - 相关资源链接
 90 | 
 91 | ## 🔧 技术亮点
 92 | 
 93 | ### 1. LangChain 1.0 适配
 94 | - ✅ 正确导入：`from langchain_classic.retrievers import EnsembleRetriever`
 95 | - ✅ 使用 `create_agent` 创建 Agent
 96 | - ✅ 使用 `init_chat_model` 初始化模型
 97 | - ✅ 符合 LangChain 1.0 最新规范
 98 | 
 99 | ### 2. 混合检索实现
100 | - ✅ BM25 + 向量搜索组合
101 | - ✅ RRF 算法自动融合排名
102 | - ✅ 灵活的权重配置
103 | - ✅ 对比实验展示效果
104 | 
105 | ### 3. 生产级代码
106 | - ✅ 完整的错误处理
107 | - ✅ 清晰的日志输出
108 | - ✅ 模块化设计
109 | - ✅ 详细的注释说明
110 | 
111 | ### 4. 本地化测试
112 | - ✅ 使用 HuggingFace 免费模型（无需 API key）
113 | - ✅ Chroma 本地向量数据库
114 | - ✅ test.py 可独立运行
115 | - ✅ 自动下载和缓存模型
116 | 
117 | ## 📦 依赖更新
118 | 
119 | 已更新 `requirements.txt`：
120 | ```python
121 | # LangChain Classic - 包含部分移至经典包的组件（如 EnsembleRetriever）
122 | langchain-classic>=1.0.0
123 | 
124 | # BM25 检索算法（用于混合搜索）
125 | rank_bm25>=0.2.2
126 | 
127 | # Chroma - 轻量级向量数据库（推荐用于学习）
128 | chromadb>=0.5.0
129 | ```
130 | 
131 | ## ✅ 测试结果
132 | 
133 | ### test.py 测试通过（5个测试）：
134 | 1. ✅ 文档加载和分割
135 | 2. ✅ 向量检索 (HuggingFaceEmbeddings + Chroma)
136 | 3. ✅ BM25 检索 (rank_bm25)
137 | 4. ✅ 混合检索 (EnsembleRetriever + RRF)
138 | 5. ✅ 权重对比实验
139 | 
140 | ### 测试输出示例：
141 | ```
142 | [OK] 文档加载和分割成功
143 |   原文档: 1 个
144 |   分割后: 4 块
145 | 
146 | [OK] 向量检索成功
147 |   查询: LangChain 核心组件
148 |   结果数: 2
149 | 
150 | [OK] BM25 检索成功
151 |   查询: BM25 算法
152 |   结果数: 2
153 | 
154 | [OK] 混合检索器创建成功
155 |   组合: BM25 + 向量搜索
156 |   权重: [0.5, 0.5]
157 |   算法: RRF (Reciprocal Rank Fusion)
158 | 
159 | [OK] 权重对比完成
160 | ```
161 | 
162 | ## 🎯 学习目标达成
163 | 
164 | 学习者完成本模块后将掌握：
165 | 
166 | ### 理论层面
167 | - ✅ 理解向量搜索和 BM25 搜索的区别
168 | - ✅ 理解混合检索的优势
169 | - ✅ 理解 RRF 算法原理
170 | - ✅ 理解权重配置的影响
171 | 
172 | ### 实践层面
173 | - ✅ 能创建向量检索器
174 | - ✅ 能创建 BM25 检索器
175 | - ✅ 能组合多个检索器
176 | - ✅ 能调整权重优化检索质量
177 | - ✅ 能将混合检索集成到 RAG Agent
178 | 
179 | ### 生产应用
180 | - ✅ 知道何时使用混合检索
181 | - ✅ 知道如何选择权重配置
182 | - ✅ 知道如何评估检索质量
183 | - ✅ 知道如何优化性能
184 | 
185 | ## 🔍 重要发现：LangChain 1.0 导入变更
186 | 
187 | 在开发过程中发现并解决了重要问题：
188 | 
189 | ### 问题
190 | ```python
191 | # ❌ 这个导入在 LangChain 1.0 中不再有效
192 | from langchain.retrievers import EnsembleRetriever
193 | # ModuleNotFoundError: No module named 'langchain.retrievers'
194 | ```
195 | 
196 | ### 解决方案
197 | ```python
198 | # ✅ LangChain 1.0 正确导入
199 | from langchain_classic.retrievers import EnsembleRetriever
200 | ```
201 | 
202 | ### 原因
203 | 根据 LangChain v1 迁移指南：
204 | - `EnsembleRetriever` 和其他检索器已移至 `langchain-classic` 包
205 | - 这是 LangChain 1.0 架构重组的一部分
206 | - 需要安装 `langchain-classic` 包
207 | 
208 | ### 已更新
209 | - ✅ main.py 导入已修正
210 | - ✅ test.py 导入已修正
211 | - ✅ README.md 已添加重要提示
212 | - ✅ requirements.txt 已添加 langchain-classic
213 | 
214 | ## 📊 对比：基础 RAG vs 进阶 RAG
215 | 
216 | | 方面 | Module 13 (基础) | Module 14 (进阶) |
217 | |-----|-----------------|-----------------|
218 | | 检索方法 | 纯向量搜索 | 混合搜索 (BM25 + 向量) |
219 | | 语义查询 | ⭐⭐⭐ | ⭐⭐⭐ |
220 | | 精确匹配 | ⭐ | ⭐⭐⭐ |
221 | | 代码搜索 | ⭐ | ⭐⭐⭐ |
222 | | 鲁棒性 | 中 | 高 |
223 | | 复杂度 | 低 | 中 |
224 | | 生产适用 | 原型 | 生产级 |
225 | 
226 | ## 🎓 核心要点总结
227 | 
228 | ### 1. 混合检索 = 向量 + BM25
229 | - 结合两者优势
230 | - 语义理解 + 精确匹配
231 | - 适用于大多数场景
232 | 
233 | ### 2. EnsembleRetriever
234 | - LangChain 的标准组合器
235 | - 使用 RRF 算法融合结果
236 | - 灵活的权重配置
237 | 
238 | ### 3. 权重调整策略
239 | - 技术文档：[0.4, 0.6] - 稍偏向语义
240 | - 代码搜索：[0.6, 0.4] - 稍偏向精确
241 | - 通用场景：[0.5, 0.5] - 平衡
242 | 
243 | ### 4. 生产就绪
244 | - 监控检索质量
245 | - 缓存热门查询
246 | - 设置合适的 k 值
247 | - 实现容错机制
248 | 
249 | ## 🚀 下一步学习
250 | 
251 | 完成 Module 14 后，学习者可以：
252 | 
253 | 1. **继续 Phase 2**：
254 |    - Module 15: LangGraph 低层 API（可选）
255 |    - 或跳到 Phase 3 高级主题
256 | 
257 | 2. **实战项目**：
258 |    - 构建知识库问答系统
259 |    - 集成到现有应用
260 |    - 优化检索质量
261 | 
262 | 3. **进阶主题**（可选）：
263 |    - 重排序 (Reranking)
264 |    - 查询优化 (Query rewriting)
265 |    - 元数据过滤
266 |    - 多查询策略
267 |    - 上下文压缩
268 | 
269 | ## 📝 文件清单
270 | 
271 | ```
272 | phase2_practical/14_rag_advanced/
273 | ├── main.py                    # 完整教程（6个示例）
274 | ├── test.py                    # 组件测试（无需 API）
275 | ├── README.md                  # 完���文档（7000+ 字）
276 | ├── 模块完成总结.md            # 本文件
277 | ├── data/                      # 测试数据（自动生成）
278 | │   ├── test_docs.txt         # test.py 使用
279 | │   └── langchain_guide.txt   # main.py 使用
280 | └── chroma_db/                # 向量数据库（自动生成）
281 | ```
282 | 
283 | ## ✨ 总结
284 | 
285 | Module 14 成功实现了 RAG 进阶功能，重点是混合检索技术。通过结合向量搜索和 BM25 关键词搜索，显著提升了检索质量和鲁棒性。
286 | 
287 | **核心成就**：
288 | 1. ✅ 完整实现混合检索系统
289 | 2. ✅ 适配 LangChain 1.0 最新语法
290 | 3. ✅ 提供详细文档和最佳实践
291 | 4. ✅ 包含完整测试和对比实验
292 | 5. ✅ 解决 EnsembleRetriever 导入问题
293 | 
294 | **实用价值**：
295 | - 生产级代码可直接使用
296 | - 清晰的权重配置指导
297 | - 完整的问题排查方案
298 | - 详细的性能优化建议
299 | 
300 | 学习者现在具备了构建生产级 RAG 系统的能力！🎉
301 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # ============================================================================
  2 | # Python .gitignore for LangChain 1.0 Study Repository
  3 | # ============================================================================
  4 | 
  5 | # ----------------------------------------------------------------------------
  6 | # 环境变量和敏感信息
  7 | # ----------------------------------------------------------------------------
  8 | .env
  9 | .env.local
 10 | .env.*.local
 11 | *.env
 12 | *.sqlite
 13 | *.db
 14 | CLAUDE.md
 15 | # API Keys 和配置文件
 16 | *_api_key*
 17 | *_secret*
 18 | credentials.json
 19 | secrets.yaml
 20 | 13_rag_basics/data
 21 | # ----------------------------------------------------------------------------
 22 | # Python 相关
 23 | # ----------------------------------------------------------------------------
 24 | 
 25 | # Byte-compiled / optimized / DLL files
 26 | __pycache__/
 27 | *.py[cod]
 28 | *$py.class
 29 | 
 30 | # C extensions
 31 | *.so
 32 | 
 33 | # Distribution / packaging
 34 | .Python
 35 | build/
 36 | develop-eggs/
 37 | dist/
 38 | downloads/
 39 | eggs/
 40 | .eggs/
 41 | lib/
 42 | lib64/
 43 | parts/
 44 | sdist/
 45 | var/
 46 | wheels/
 47 | share/python-wheels/
 48 | *.egg-info/
 49 | .installed.cfg
 50 | *.egg
 51 | MANIFEST
 52 | 
 53 | # PyInstaller
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | *.py,cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | cover/
 75 | 
 76 | # Translations
 77 | *.mo
 78 | *.pot
 79 | 
 80 | # Django stuff:
 81 | *.log
 82 | local_settings.py
 83 | db.sqlite3
 84 | db.sqlite3-journal
 85 | 
 86 | # Flask stuff:
 87 | instance/
 88 | .webassets-cache
 89 | 
 90 | # Scrapy stuff:
 91 | .scrapy
 92 | 
 93 | # Sphinx documentation
 94 | docs/_build/
 95 | 
 96 | # PyBuilder
 97 | .pybuilder/
 98 | target/
 99 | 
100 | # Jupyter Notebook
101 | .ipynb_checkpoints
102 | *.ipynb_checkpoints/
103 | 
104 | # IPython
105 | profile_default/
106 | ipython_config.py
107 | 
108 | # pyenv
109 | .python-version
110 | 
111 | # pipenv
112 | Pipfile.lock
113 | 
114 | # poetry
115 | poetry.lock
116 | 
117 | # pdm
118 | .pdm.toml
119 | 
120 | # PEP 582
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .venv
132 | env/
133 | venv/
134 | ENV/
135 | env.bak/
136 | venv.bak/
137 | 
138 | # Spyder project settings
139 | .spyderproject
140 | .spyproject
141 | 
142 | # Rope project settings
143 | .ropeproject
144 | 
145 | # mkdocs documentation
146 | /site
147 | 
148 | # mypy
149 | .mypy_cache/
150 | .dmypy.json
151 | dmypy.json
152 | 
153 | # Pyre type checker
154 | .pyre/
155 | 
156 | # pytype static type analyzer
157 | .pytype/
158 | 
159 | # Cython debug symbols
160 | cython_debug/
161 | 
162 | # ----------------------------------------------------------------------------
163 | # IDE 和编辑器
164 | # ----------------------------------------------------------------------------
165 | 
166 | # VS Code
167 | .vscode/
168 | *.code-workspace
169 | 
170 | # PyCharm
171 | .idea/
172 | *.iml
173 | *.iws
174 | *.ipr
175 | 
176 | # Sublime Text
177 | *.sublime-project
178 | *.sublime-workspace
179 | 
180 | # Vim
181 | [._]*.s[a-v][a-z]
182 | [._]*.sw[a-p]
183 | [._]s[a-rt-v][a-z]
184 | [._]ss[a-gi-z]
185 | [._]sw[a-p]
186 | 
187 | # Emacs
188 | *~
189 | \#*\#
190 | .\#*
191 | 
192 | # ----------------------------------------------------------------------------
193 | # 操作系统
194 | # ----------------------------------------------------------------------------
195 | 
196 | # macOS
197 | .DS_Store
198 | .AppleDouble
199 | .LSOverride
200 | ._*
201 | .Spotlight-V100
202 | .Trashes
203 | 
204 | # Windows
205 | Thumbs.db
206 | Thumbs.db:encryptable
207 | ehthumbs.db
208 | ehthumbs_vista.db
209 | *.stackdump
210 | [Dd]esktop.ini
211 | $RECYCLE.BIN/
212 | *.lnk
213 | 
214 | # Linux
215 | .directory
216 | .Trash-*
217 | 
218 | # ----------------------------------------------------------------------------
219 | # LangChain / LangSmith 相关
220 | # ----------------------------------------------------------------------------
221 | 
222 | # LangSmith 本地缓存
223 | .langsmith/
224 | 
225 | # LangGraph 检查点数据库
226 | *.db
227 | *.sqlite
228 | *.sqlite3
229 | checkpoints/
230 | 
231 | # 向量数据库存储
232 | chroma_db/
233 | faiss_index/
234 | *.index
235 | *.faiss
236 | 
237 | # ----------------------------------------------------------------------------
238 | # 项目特定
239 | # ----------------------------------------------------------------------------
240 | 
241 | # 数据文件
242 | data/raw/
243 | data/processed/
244 | *.csv
245 | *.xlsx
246 | *.json
247 | *.jsonl
248 | *.parquet
249 | 
250 | # 模型文件
251 | models/
252 | *.pkl
253 | *.pickle
254 | *.h5
255 | *.pt
256 | *.pth
257 | *.onnx
258 | *.ckpt
259 | 
260 | # 日志文件
261 | logs/
262 | *.log
263 | 
264 | # 临时文件
265 | tmp/
266 | temp/
267 | *.tmp
268 | 
269 | # 输出文件
270 | outputs/
271 | results/
272 | *.output
273 | 
274 | # 上传文件
275 | uploads/
276 | 
277 | # 下载文件
278 | downloads/
279 | 
280 | # 缓存
281 | cache/
282 | .cache/
283 | 
284 | # ----------------------------------------------------------------------------
285 | # 文档和笔记（如果不想提交个人笔记）
286 | # ----------------------------------------------------------------------------
287 | 
288 | # 取消注释以下行来忽略个人学习笔记
289 | # docs/learning_notes/
290 | # notebooks/exploration/
291 | 
292 | # ----------------------------------------------------------------------------
293 | # 测试相关
294 | # ----------------------------------------------------------------------------
295 | 
296 | # 测试生成的文件
297 | test_outputs/
298 | test_results/
299 | 
300 | # ----------------------------------------------------------------------------
301 | # 依赖和包管理
302 | # ----------------------------------------------------------------------------
303 | 
304 | # 取消注释以下行来忽略锁文件（如果团队使用不同的包管理器）
305 | # poetry.lock
306 | # Pipfile.lock
307 | # package-lock.json
308 | # yarn.lock
309 | 
310 | # ----------------------------------------------------------------------------
311 | # 其他
312 | # ----------------------------------------------------------------------------
313 | 
314 | # 备份文件
315 | *.bak
316 | *.backup
317 | *.old
318 | 
319 | # 压缩文件
320 | *.zip
321 | *.tar.gz
322 | *.rar
323 | *.7z
324 | 
325 | # 大文件（如果使用 Git LFS）
326 | *.bin
327 | *.dat
328 | 
329 | # Node modules（如果项目中有 JavaScript 工具）
330 | node_modules/
331 | 
332 | # ----------------------------------------------------------------------------
333 | # 保留的示例文件（取消 ignore）
334 | # ----------------------------------------------------------------------------
335 | 
336 | # 确保示例文件不被忽略
337 | !.env.example
338 | !**/samples/*.png
339 | !**/samples/*.jpg
340 | !**/samples/*.pdf
341 | !**/examples/*.py
342 | 


--------------------------------------------------------------------------------
/docs/temp.md:
--------------------------------------------------------------------------------
  1 | 示例 6：RAG 问答 - 使用检索工具
  2 | ======================================================================
  3 | 
  4 | 问题: LangChain 有哪些核心组件？
  5 | 回答: LangChain 的核心组件包括 Models、Prompts、Chains、Agents 和 Memory。这些 组件为构建和应用大型语言模型（LLM）提供了基础，能够支持诸如 RAG（检索增强生成）等高级功能。通过这些组件，开发者可以更好地利用 LLM 的能力，实现更智能和更高效的自然语言处理应用。
  6 | ----------------------------------------------------------------------
  7 | 
  8 | 问题: RAG 是什么？
  9 | 回答: RAG 是 Retrieval-Augmented Generation 的缩写，它是一种结合了检索和生成的 技术，让语言模型能够访问外部知识库。它是 LangChain 框架中的一个核心应用场景，用于构建大型语言模型（LLM）应用。
 10 | ----------------------------------------------------------------------
 11 | 
 12 | 问题: LangChain 1.0 有什么改进？
 13 | 
 14 | 错误: Error code: 400 - {'error': {'message': "Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '<function=search_knowledge_base {"query": "LangChain 1.0 \\u6539\\u9769"} </function>'}} 
 15 | Traceback (most recent call last):
 16 |   File "c:\Users\wangy\Desktop\temp\langchain_v1_study\phase2_practical\13_rag_basics\main.py", line 439, in main
 17 |     example_6_rag_qa(vectorstore)
 18 |     ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^
 19 |   File "c:\Users\wangy\Desktop\temp\langchain_v1_study\phase2_practical\13_rag_basics\main.py", line 398, in example_6_rag_qa
 20 |     response = agent.invoke({"messages": [{"role": "user", "content": question}]})
 21 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\pregel\main.py", line 3094, in invoke
 22 |     for chunk in self.stream(
 23 |                  ~~~~~~~~~~~^
 24 |         input,
 25 |         ^^^^^^
 26 |     ...<10 lines>...
 27 |         **kwargs,
 28 |         ^^^^^^^^^
 29 |     ):
 30 |     ^
 31 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\pregel\main.py", line 2679, in stream
 32 |     for _ in runner.tick(
 33 |              ~~~~~~~~~~~^
 34 |         [t for t in loop.tasks.values() if not t.writes],
 35 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 36 |     ...<2 lines>...
 37 |         schedule_task=loop.accept_push,
 38 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 39 |     ):
 40 |     ^
 41 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\pregel\_runner.py", line 167, in tick
 42 |     run_with_retry(
 43 |     ~~~~~~~~~~~~~~^
 44 |         t,
 45 |         ^^
 46 |     ...<10 lines>...
 47 |         },
 48 |         ^^
 49 |     )
 50 |     ^
 51 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\pregel\_retry.py", line 42, in run_with_retry
 52 |     return task.proc.invoke(task.input, config)
 53 |            ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^
 54 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\_internal\_runnable.py", line 656, in invoke
 55 |     input = context.run(step.invoke, input, config, **kwargs)
 56 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langgraph\_internal\_runnable.py", line 400, in invoke
 57 |     ret = self.func(*args, **kwargs)
 58 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain\agents\factory.py", line 1065, in model_node
 59 |     response = _execute_model_sync(request)
 60 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain\agents\factory.py", line 1038, in _execute_model_sync
 61 |     output = model_.invoke(messages)
 62 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_core\runnables\base.py", line 5489, in invoke
 63 |     return self.bound.invoke(
 64 |            ~~~~~~~~~~~~~~~~~^
 65 |         input,
 66 |         ^^^^^^
 67 |         self._merge_configs(config),
 68 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 69 |         **{**self.kwargs, **kwargs},
 70 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 71 |     )
 72 |     ^
 73 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_core\language_models\chat_models.py", line 382, in invoke
 74 |     self.generate_prompt(
 75 |     ~~~~~~~~~~~~~~~~~~~~^
 76 |         [self._convert_input(input)],
 77 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 78 |     ...<6 lines>...
 79 |         **kwargs,
 80 |         ^^^^^^^^^
 81 |     ).generations[0][0],
 82 |     ^
 83 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_core\language_models\chat_models.py", line 1091, in generate_prompt   
 84 |     return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
 85 |            ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 86 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_core\language_models\chat_models.py", line 906, in generate
 87 |     self._generate_with_cache(
 88 |     ~~~~~~~~~~~~~~~~~~~~~~~~~^
 89 |         m,
 90 |         ^^
 91 |     ...<2 lines>...
 92 |         **kwargs,
 93 |         ^^^^^^^^^
 94 |     )
 95 |     ^
 96 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_core\language_models\chat_models.py", line 1195, in _generate_with_cache
 97 |     result = self._generate(
 98 |         messages, stop=stop, run_manager=run_manager, **kwargs
 99 |     )
100 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\langchain_groq\chat_models.py", line 544, in _generate
101 |     response = self.client.create(messages=message_dicts, **params)
102 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\groq\resources\chat\completions.py", line 464, in create
103 |     return self._post(
104 |            ~~~~~~~~~~^
105 |         "/openai/v1/chat/completions",
106 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
107 |     ...<45 lines>...
108 |         stream_cls=Stream[ChatCompletionChunk],
109 |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
110 |     )
111 |     ^
112 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\groq\_base_client.py", line 1242, in post
113 |     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
114 |                            ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
115 |   File "C:\Users\wangy\Desktop\temp\langchain_v1_study\venv\Lib\site-packages\groq\_base_client.py", line 1044, in request
116 |     raise self._make_status_error_from_response(err.response) from None        
117 | groq.BadRequestError: Error code: 400 - {'error': {'message': "Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '<function=search_knowledge_base {"query": "LangChain 1.0 \\u6539\\u9769"} </function>'}}
118 | During task with name 'model' and id '54b7191c-5f00-8dbe-3594-6c5c2c884b06'  


--------------------------------------------------------------------------------
/phase2_practical/09_checkpointing/README.md:
--------------------------------------------------------------------------------
  1 | # 09 - Checkpointing (检查点持久化)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **Checkpointing = 将对话状态持久化到数据库**
  6 | 
  7 | - `InMemorySaver` → 内存中（程序退出即丢失）
  8 | - `SqliteSaver` → SQLite 数据库（持久化存储）
  9 | 
 10 | ## 基本用法
 11 | 
 12 | ### InMemorySaver 的限制
 13 | 
 14 | ```python
 15 | from langgraph.checkpoint.memory import InMemorySaver
 16 | 
 17 | agent = create_agent(
 18 |     model=model,
 19 |     tools=[],
 20 |     checkpointer=InMemorySaver()
 21 | )
 22 | 
 23 | # 限制：
 24 | # ❌ 程序重启后丢失
 25 | # ❌ 无法跨进程共享
 26 | # ❌ 不适合生产环境
 27 | ```
 28 | 
 29 | ### SqliteSaver（推荐生产使用）
 30 | 
 31 | ```python
 32 | from langgraph.checkpoint.sqlite import SqliteSaver
 33 | 
 34 | # 创建持久化 checkpointer（使用 with 语句）
 35 | with SqliteSaver.from_conn_string("checkpoints.sqlite") as checkpointer:
 36 |     agent = create_agent(
 37 |         model=model,
 38 |         tools=[],
 39 |         checkpointer=checkpointer  # 使用 SQLite
 40 |     )
 41 | 
 42 |     config = {"configurable": {"thread_id": "user_123"}}
 43 | 
 44 |     # 第一次运行
 45 |     agent.invoke({"messages": [...]}, config)
 46 | 
 47 | # 程序重启后，对话仍然保留！
 48 | with SqliteSaver.from_conn_string("sqlite:///checkpoints.sqlite") as checkpointer:
 49 |     agent = create_agent(model=model, checkpointer=checkpointer)
 50 |     agent.invoke({"messages": [...]}, config)
 51 | ```
 52 | 
 53 | **重要：** `SqliteSaver.from_conn_string()` 返回上下文管理器，必须使用 `with` 语句！
 54 | 
 55 | ## 工作原理
 56 | 
 57 | ### 数据保存位置
 58 | 
 59 | ```
 60 | InMemorySaver:
 61 |     对话历史 → 内存（变量）→ 程序退出即丢失
 62 | 
 63 | SqliteSaver:
 64 |     对话历史 → SQLite 文件 → 持久化存储
 65 |         ↓
 66 |     checkpoints.sqlite
 67 |     ├── thread_id: user_123
 68 |     │   ├── checkpoint_1
 69 |     │   ├── checkpoint_2
 70 |     │   └── checkpoint_3
 71 |     └── thread_id: user_456
 72 |         ├── checkpoint_1
 73 |         └── checkpoint_2
 74 | ```
 75 | 
 76 | ### 跨进程访问
 77 | 
 78 | ```python
 79 | # 进程 A（Web 服务器）
 80 | with SqliteSaver.from_conn_string("shared.sqlite") as checkpointer:
 81 |     agent_a = create_agent(model=model, checkpointer=checkpointer)
 82 |     agent_a.invoke({...}, config={"configurable": {"thread_id": "user_1"}})
 83 | 
 84 | # 进程 B（后台任务）
 85 | with SqliteSaver.from_conn_string("shared.sqlite") as checkpointer:
 86 |     agent_b = create_agent(model=model, checkpointer=checkpointer)
 87 |     # 可以访问进程 A 创建的对话
 88 |     agent_b.invoke({...}, config={"configurable": {"thread_id": "user_1"}})
 89 | ```
 90 | 
 91 | ## 参数说明
 92 | 
 93 | ### SqliteSaver.from_conn_string()
 94 | 
 95 | | 参数 | 说明 | 示例 |
 96 | |-----|------|------|
 97 | | `conn_string` | 数据库文件路径（不要加 `sqlite:///` 前缀） | `"checkpoints.sqlite"` |
 98 | 
 99 | ### 路径格式
100 | 
101 | ```python
102 | # 相对路径（当前目录） - 推荐
103 | with SqliteSaver.from_conn_string("checkpoints.sqlite") as checkpointer:
104 |     agent = create_agent(model=model, checkpointer=checkpointer)
105 | 
106 | # 绝对路径（生产环境）
107 | with SqliteSaver.from_conn_string("C:/data/checkpoints.sqlite") as checkpointer:
108 |     agent = create_agent(model=model, checkpointer=checkpointer)
109 | 
110 | # 内存数据库（测试用）
111 | with SqliteSaver.from_conn_string(":memory:") as checkpointer:
112 |     agent = create_agent(model=model, checkpointer=checkpointer)
113 | ```
114 | 
115 | **重要：**
116 | - ✅ 直接传文件路径，不要加 `sqlite:///` 前缀
117 | - ✅ 相对路径会在当前目录创建数据库
118 | - ✅ Windows 路径使用正斜杠 `/` 或双反斜杠 `\\`
119 | 
120 | ## 对比 InMemorySaver
121 | 
122 | | 特性 | InMemorySaver | SqliteSaver |
123 | |-----|--------------|-------------|
124 | | **持久化** | ❌ 程序退出即丢失 | ✅ 持久化到文件 |
125 | | **跨进程** | ❌ 无法共享 | ✅ 可以共享 |
126 | | **性能** | ⚡ 快（内存） | 🐢 慢一点（磁盘 I/O）|
127 | | **适用** | 开发、测试 | 生产环境 |
128 | 
129 | ## 实际应用
130 | 
131 | ### 客服系统
132 | 
133 | ```python
134 | # 客户今天上午咨询
135 | with SqliteSaver.from_conn_string("customer_service.sqlite") as checkpointer:
136 |     agent = create_agent(model=model, tools=[查询订单], checkpointer=checkpointer)
137 | 
138 |     config = {"configurable": {"thread_id": "customer_zhang"}}
139 |     agent.invoke({"messages": [{"role": "user", "content": "订单 12345 在哪？"}]}, config)
140 | 
141 | # 下午客户再次咨询（即使服务重启）
142 | with SqliteSaver.from_conn_string("customer_service.sqlite") as checkpointer:
143 |     agent = create_agent(model=model, tools=[查询订单], checkpointer=checkpointer)
144 |     agent.invoke({"messages": [{"role": "user", "content": "到了吗？"}]}, config)
145 |     # Agent 记得上午查询的订单号！
146 | ```
147 | 
148 | ### 多用户聊天
149 | 
150 | ```python
151 | with SqliteSaver.from_conn_string("chat.sqlite") as checkpointer:
152 |     agent = create_agent(model=model, checkpointer=checkpointer)
153 | 
154 |     # 用户 A
155 |     agent.invoke({...}, config={"configurable": {"thread_id": "user_alice"}})
156 | 
157 |     # 用户 B
158 |     agent.invoke({...}, config={"configurable": {"thread_id": "user_bob"}})
159 | 
160 |     # 所有用户的对话都持久化在 chat.sqlite 中
161 | ```
162 | 
163 | ## 常见问题
164 | 
165 | ### 1. 数据库文件在哪？
166 | 
167 | ```python
168 | # 相对路径 → 当前工作目录
169 | SqliteSaver.from_conn_string("sqlite:///checkpoints.sqlite")
170 | # 文件位置：当前目录/checkpoints.sqlite
171 | 
172 | # 绝对路径 → 指定位置
173 | SqliteSaver.from_conn_string("sqlite:///C:/data/checkpoints.sqlite")
174 | # 文件位置：C:/data/checkpoints.sqlite
175 | ```
176 | 
177 | ### 2. 如何清空某个用户的历史？
178 | 
179 | 目前需要手动操作数据库：
180 | 
181 | ```python
182 | import sqlite3
183 | 
184 | conn = sqlite3.connect("checkpoints.sqlite")
185 | cursor = conn.cursor()
186 | 
187 | # 删除特定 thread_id 的记录
188 | cursor.execute("DELETE FROM checkpoints WHERE thread_id = ?", ("user_123",))
189 | conn.commit()
190 | conn.close()
191 | ```
192 | 
193 | ### 3. 数据库会无限增长吗？
194 | 
195 | 会！需要定期清理：
196 | 
197 | **策略：**
198 | - 定期删除旧对话（如 30 天前）
199 | - 限制每个 thread 的 checkpoint 数量
200 | - 定期备份和归档
201 | 
202 | ### 4. 性能影响？
203 | 
204 | - SQLite 比内存慢，但影响不大
205 | - 适合中小型应用（< 10000 并发用户）
206 | - 大规模应用考虑 PostgreSQL（LangGraph 也支持）
207 | 
208 | ## 最佳实践
209 | 
210 | ```python
211 | # 1. 生产环境使用绝对路径 + with 语句
212 | with SqliteSaver.from_conn_string("C:/production/data/checkpoints.sqlite") as checkpointer:
213 |     agent = create_agent(model=model, checkpointer=checkpointer)
214 | 
215 | # 2. 开发环境使用相对路径
216 | with SqliteSaver.from_conn_string("dev_checkpoints.sqlite") as checkpointer:
217 |     agent = create_agent(model=model, checkpointer=checkpointer)
218 | 
219 | # 3. 测试环境使用内存数据库
220 | with SqliteSaver.from_conn_string(":memory:") as checkpointer:
221 |     agent = create_agent(model=model, checkpointer=checkpointer)
222 | 
223 | # 4. 定期备份数据库文件
224 | # 使用系统任务定期复制 checkpoints.sqlite
225 | 
226 | # 5. 监控数据库大小
227 | import os
228 | db_size = os.path.getsize("checkpoints.sqlite")
229 | print(f"数据库大小: {db_size / 1024 / 1024:.2f} MB")
230 | ```
231 | 
232 | ## 核心要点
233 | 
234 | 1. **InMemorySaver**：内存存储，程序退出即丢失
235 | 2. **SqliteSaver**：持久化到 SQLite 文件
236 | 3. **创建方式**：`with SqliteSaver.from_conn_string("checkpoints.sqlite") as checkpointer:`
237 | 4. **路径格式**：直接传文件路径，不要加 `sqlite:///` 前缀
238 | 5. **跨进程**：多个进程可访问同一数据库
239 | 6. **生产推荐**：使用 SqliteSaver + with 语句
240 | 
241 | ## 下一步
242 | 
243 | **10_middleware_basics** - 学习如何创建自定义中间件
244 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | # ============================================================================
  2 | # LangChain 1.0 学习仓库 - Python 依赖文件
  3 | # ============================================================================
  4 | # 使用说明：
  5 | #   pip install -r requirements.txt
  6 | # 或使用 uv（更快）：
  7 | #   uv pip install -r requirements.txt
  8 | # ============================================================================
  9 | 
 10 | # ----------------------------------------------------------------------------
 11 | # LangChain 核心包（1.0 版本）
 12 | # ----------------------------------------------------------------------------
 13 | 
 14 | # LangChain 核心库
 15 | langchain>=1.0.0,<2.0.0
 16 | langchain-core>=1.0.0,<2.0.0
 17 | 
 18 | # LangChain Classic - 包含部分移至经典包的组件（如 EnsembleRetriever）
 19 | langchain-classic>=1.0.0
 20 | 
 21 | # LangGraph - Agent 运行时
 22 | langgraph>=1.0.0,<2.0.0
 23 | 
 24 | # LangSmith - 可观测性平台（可选但推荐）
 25 | langsmith>=0.2.0
 26 | 
 27 | # ----------------------------------------------------------------------------
 28 | # 模型集成包（按需选择）
 29 | # ----------------------------------------------------------------------------
 30 | 
 31 | # ⭐⭐⭐ Groq 集成（推荐！免费且超快）
 32 | langchain-groq>=0.2.0
 33 | 
 34 | # OpenAI 集成
 35 | langchain-openai>=0.2.0
 36 | 
 37 | # Anthropic 集成（Claude）
 38 | langchain-anthropic>=0.3.0
 39 | 
 40 | # Google Gemini 集成（免费额度慷慨）
 41 | langchain-google-genai>=2.0.0
 42 | 
 43 | # Cohere 集成（可选）
 44 | # langchain-cohere>=0.3.0
 45 | 
 46 | # HuggingFace 集成（用于 Embeddings - Module 13, 14）
 47 | langchain-huggingface>=0.1.0
 48 | 
 49 | # HuggingFace Transformers 和 Embeddings
 50 | sentence-transformers>=3.0.0
 51 | transformers>=4.40.0
 52 | torch>=2.0.0  # PyTorch（用于 HuggingFace 模型）
 53 | 
 54 | # ----------------------------------------------------------------------------
 55 | # 社区集成和工具
 56 | # ----------------------------------------------------------------------------
 57 | 
 58 | # 社区贡献的集成
 59 | langchain-community>=0.3.0
 60 | 
 61 | # 文本分割器和文档加载器
 62 | langchain-text-splitters>=0.3.0
 63 | 
 64 | # ----------------------------------------------------------------------------
 65 | # 向量数据库（用于 RAG）
 66 | # ----------------------------------------------------------------------------
 67 | 
 68 | # Chroma - 轻量级向量数据库（��荐用于学习）
 69 | chromadb>=0.5.0
 70 | 
 71 | # FAISS - Facebook AI 相似度搜索
 72 | faiss-cpu>=1.8.0
 73 | # 如果有 GPU，使用：faiss-gpu>=1.8.0
 74 | 
 75 | # Pinecone - 云向量数据库（需要注册）
 76 | langchain-pinecone>=0.2.0
 77 | pinecone-client>=6.0.0
 78 | 
 79 | # Weaviate（可选）
 80 | # weaviate-client>=4.0.0
 81 | 
 82 | # ----------------------------------------------------------------------------
 83 | # 检索增强（用于 RAG Advanced - Module 14）
 84 | # ----------------------------------------------------------------------------
 85 | 
 86 | # BM25 检索算法（用于混合搜索）
 87 | rank_bm25>=0.2.2
 88 | 
 89 | # ----------------------------------------------------------------------------
 90 | # 文档处理
 91 | # ----------------------------------------------------------------------------
 92 | 
 93 | # PDF 处理
 94 | pypdf>=5.0.0
 95 | # 或使用 PyMuPDF（更快）
 96 | # pymupdf>=1.24.0
 97 | 
 98 | # Word 文档处理
 99 | python-docx>=1.1.0
100 | 
101 | # Excel 处理
102 | openpyxl>=3.1.0
103 | 
104 | # Markdown 处理
105 | markdown>=3.6
106 | 
107 | # HTML 处理
108 | beautifulsoup4>=4.12.0
109 | lxml>=5.0.0
110 | 
111 | # ----------------------------------------------------------------------------
112 | # 数据验证和模型
113 | # ----------------------------------------------------------------------------
114 | 
115 | # Pydantic - 数据验证（LangChain 依赖）
116 | pydantic>=2.0.0,<3.0.0
117 | pydantic-settings>=2.0.0
118 | 
119 | # ----------------------------------------------------------------------------
120 | # 环境变量管理
121 | # ----------------------------------------------------------------------------
122 | 
123 | # 环境变量加载
124 | python-dotenv>=1.0.0
125 | 
126 | # ----------------------------------------------------------------------------
127 | # 网络请求和 API
128 | # ----------------------------------------------------------------------------
129 | 
130 | # HTTP 客户端
131 | httpx>=0.27.0
132 | requests>=2.32.0
133 | 
134 | # 异步支持
135 | aiohttp>=3.10.0
136 | 
137 | # ----------------------------------------------------------------------------
138 | # 数据库（用于 Checkpointing）
139 | # ----------------------------------------------------------------------------
140 | 
141 | # SQLite Checkpointer - LangGraph 持久化（必需）
142 | langgraph-checkpoint-sqlite>=3.0.0
143 | 
144 | # PostgreSQL（可选）
145 | # psycopg2-binary>=2.9.0
146 | # 或使用异步版本
147 | # asyncpg>=0.29.0
148 | 
149 | # Redis（可选，用于缓存）
150 | # redis>=5.0.0
151 | 
152 | # ----------------------------------------------------------------------------
153 | # 开发和测试工具
154 | # ----------------------------------------------------------------------------
155 | 
156 | # Jupyter 笔记本
157 | jupyter>=1.1.0
158 | ipykernel>=6.29.0
159 | notebook>=7.2.0
160 | 
161 | # 测试框架
162 | pytest>=8.0.0
163 | pytest-asyncio>=0.23.0
164 | pytest-cov>=5.0.0
165 | 
166 | # 代码格式化
167 | black>=24.0.0
168 | isort>=5.13.0
169 | 
170 | # 代码检查
171 | flake8>=7.0.0
172 | pylint>=3.0.0
173 | 
174 | # 类型检查
175 | mypy>=1.10.0
176 | 
177 | # ----------------------------------------------------------------------------
178 | # 实用工具
179 | # ----------------------------------------------------------------------------
180 | 
181 | # 进度条
182 | tqdm>=4.66.0
183 | 
184 | # 日期时间处理
185 | python-dateutil>=2.9.0
186 | 
187 | # JSON 处理增强
188 | orjson>=3.10.0
189 | 
190 | # YAML 处理
191 | pyyaml>=6.0.0
192 | 
193 | # 正则表达式增强
194 | regex>=2024.0.0
195 | 
196 | # ----------------------------------------------------------------------------
197 | # 可视化（可选）
198 | # ----------------------------------------------------------------------------
199 | 
200 | # 图表绘制
201 | # matplotlib>=3.9.0
202 | # seaborn>=0.13.0
203 | 
204 | # LangGraph 可视化
205 | # graphviz>=0.20.0
206 | 
207 | # ----------------------------------------------------------------------------
208 | # 搜索和检索工具（可选）
209 | # ----------------------------------------------------------------------------
210 | 
211 | # Tavily 搜索 API
212 | # tavily-python>=0.5.0
213 | 
214 | # Google 搜索
215 | # google-search-results>=2.4.0
216 | 
217 | # DuckDuckGo 搜索
218 | # duckduckgo-search>=6.0.0
219 | 
220 | # Wikipedia
221 | # wikipedia>=1.4.0
222 | 
223 | # arXiv 论文搜索
224 | # arxiv>=2.1.0
225 | 
226 | # ----------------------------------------------------------------------------
227 | # 性能优化（可选）
228 | # ----------------------------------------------------------------------------
229 | 
230 | # 更快的 JSON 解析
231 | # ujson>=5.10.0
232 | 
233 | # 更快的 YAML 解析
234 | # ruamel.yaml>=0.18.0
235 | 
236 | # ----------------------------------------------------------------------------
237 | # 注意事项
238 | # ----------------------------------------------------------------------------
239 | # 1. 版本号使用语义化版本控制
240 | # 2. 核心包固定主版本号，避免破坏性更新
241 | # 3. 可选包已注释，按需取消注释
242 | # 4. 定期更新依赖以获取安全补丁和新功能
243 | # 5. 在生产环境中建议锁定具体版本号
244 | # ============================================================================
245 | 
246 | # 如果需要锁定所有依赖的具体版本，运行：
247 | # pip freeze > requirements.lock
248 | 


--------------------------------------------------------------------------------
/docs/FREE_API_GUIDE.md:
--------------------------------------------------------------------------------
  1 | # 免费 LLM API 使用指南（2025）
  2 | 
  3 | 本指南帮助您获取和使用**完全免费**或**低成本**的 LLM API，用于 LangChain 学习。
  4 | 
  5 | ## 🌟 推荐排行（按免费程度）
  6 | 
  7 | ### ⭐⭐⭐ 1. Groq API（强烈推荐！）
  8 | 
  9 | **为什么选择 Groq？**
 10 | - ✅ **完全免费**，无需信用卡
 11 | - ✅ **速度极快**（使用专用 LPU 硬件，比 GPU 快 10倍+）
 12 | - ✅ **LangChain 原生支持**（官方 `langchain-groq` 包）
 13 | - ✅ 支持最新开源模型（Llama 3.3 70B, Mixtral 8x7B 等）
 14 | 
 15 | **获取步骤：**
 16 | 1. 访问：https://console.groq.com/keys
 17 | 2. 使用 Google/GitHub 账号登录（无需信用卡）
 18 | 3. 点击 "Create API Key"
 19 | 4. 复制 API Key 到 `.env` 文件
 20 | 
 21 | **在 LangChain 中使用：**
 22 | ```bash
 23 | pip install langchain-groq
 24 | ```
 25 | 
 26 | ```python
 27 | from langchain_groq import ChatGroq
 28 | 
 29 | model = ChatGroq(
 30 |     model="llama-3.3-70b-versatile",  # 或 mixtral-8x7b-32768
 31 |     temperature=0.7,
 32 |     groq_api_key="your_groq_api_key"
 33 | )
 34 | 
 35 | response = model.invoke("Hello, how are you?")
 36 | print(response.content)
 37 | ```
 38 | 
 39 | **限制：**
 40 | - 每分钟约 30 次请求（对学习足够）
 41 | - 每天约 14,400 次请求
 42 | 
 43 | ---
 44 | 
 45 | ### ⭐⭐⭐ 2. Google Gemini API（免费额度慷慨）
 46 | 
 47 | **为什么选择 Gemini？**
 48 | - ✅ 免费额度非常慷慨
 49 | - ✅ 性能优秀（Gemini 1.5 Flash 很快）
 50 | - ✅ 支持多模态（文本、图像、视频）
 51 | - ✅ Google 官方支持
 52 | 
 53 | **免费额度：**
 54 | - 每分钟 15 次请求
 55 | - 每天 1,500 次请求
 56 | - Gemini 1.5 Flash 和 Pro 都免费
 57 | 
 58 | **获取步骤：**
 59 | 1. 访问：https://aistudio.google.com/apikey
 60 | 2. 使用 Google 账号登录
 61 | 3. 点击 "Get API Key" → "Create API Key"
 62 | 4. 复制 API Key
 63 | 
 64 | **在 LangChain 中使用：**
 65 | ```bash
 66 | pip install langchain-google-genai
 67 | ```
 68 | 
 69 | ```python
 70 | from langchain_google_genai import ChatGoogleGenerativeAI
 71 | 
 72 | model = ChatGoogleGenerativeAI(
 73 |     model="gemini-1.5-flash",  # 或 gemini-1.5-pro
 74 |     google_api_key="your_google_api_key"
 75 | )
 76 | 
 77 | response = model.invoke("Explain quantum computing")
 78 | print(response.content)
 79 | ```
 80 | 
 81 | ---
 82 | 
 83 | ### ⭐⭐ 3. DeepSeek API（成本极低）
 84 | 
 85 | **为什么选择 DeepSeek？**
 86 | - ✅ 成本仅为 OpenAI 的 **2%**
 87 | - ✅ 性能接近 GPT-4
 88 | - ✅ 128K 上下文窗口
 89 | - ✅ 中国团队开发，支持中文
 90 | 
 91 | **价格：**
 92 | - 输入：$0.28 / 1M tokens
 93 | - 输出：$0.42 / 1M tokens
 94 | - （100万 tokens 总成本约 $0.70，而 GPT-4 需要 $30+）
 95 | 
 96 | **获取步骤：**
 97 | 1. 访问：https://platform.deepseek.com/
 98 | 2. 注册账号（需要手机号）
 99 | 3. 新用户通常有免费额度
100 | 4. 获取 API Key
101 | 
102 | **在 LangChain 中使用：**
103 | ```python
104 | from langchain_openai import ChatOpenAI
105 | 
106 | model = ChatOpenAI(
107 |     model="deepseek-chat",
108 |     openai_api_key="your_deepseek_api_key",
109 |     openai_api_base="https://api.deepseek.com"
110 | )
111 | ```
112 | 
113 | ---
114 | 
115 | ### ⭐⭐ 4. Claude API（学生/研究者免费）
116 | 
117 | **免费途径：**
118 | 
119 | #### 方法 1：学生计划（推荐）
120 | - 网址：https://anthropic.com/students
121 | - 条件：在校学生（需要 .edu 邮箱）
122 | - 额度：**$500 免费额度**
123 | - 申请：填写简单表格即可
124 | 
125 | #### 方法 2：研究者计划
126 | - 网址：https://anthropic.com/research
127 | - 条件：从事 AI 安全/对齐研究
128 | - 额度：根据研究需求提供
129 | 
130 | #### 方法 3：云平台赠金
131 | - **AWS Activate**：通过 AWS Bedrock 使用 Claude，可获得 $300-$300,000 额度
132 | - **Google Cloud**：通过 Vertex AI 使用 Claude，新用户 $300 额度
133 | 
134 | **在 LangChain 中使用：**
135 | ```bash
136 | pip install langchain-anthropic
137 | ```
138 | 
139 | ```python
140 | from langchain_anthropic import ChatAnthropic
141 | 
142 | model = ChatAnthropic(
143 |     model="claude-sonnet-4-5-20250929",
144 |     anthropic_api_key="your_anthropic_api_key"
145 | )
146 | ```
147 | 
148 | ---
149 | 
150 | ### ⭐ 5. Together AI（有免费层）
151 | 
152 | **特点：**
153 | - 免费层可用于测试
154 | - 支持多种开源模型
155 | - 价格比 OpenAI 便宜
156 | 
157 | **获取步骤：**
158 | 1. 访问：https://api.together.xyz/
159 | 2. 注册账号
160 | 3. 获取免费额度
161 | 
162 | ---
163 | 
164 | ### ⭐ 6. Hugging Face Inference API（免费但有限制）
165 | 
166 | **特点：**
167 | - 完全免费
168 | - 限制：速度较慢，有请求限制
169 | - 适合学习和实验
170 | 
171 | **获取步骤：**
172 | 1. 访问：https://huggingface.co/settings/tokens
173 | 2. 创建 Access Token
174 | 3. 使用 Inference API
175 | 
176 | ```bash
177 | pip install langchain-huggingface
178 | ```
179 | 
180 | ---
181 | 
182 | ## 💡 推荐组合策略
183 | 
184 | ### 学习阶段建议
185 | 
186 | **阶段一（基础学习）：**
187 | ```
188 | 主要：Groq API（免费 + 快速）
189 | 备用：Google Gemini（免费额度大）
190 | ```
191 | 
192 | **阶段二-三（进阶学习）：**
193 | ```
194 | 主要：Groq API / Gemini
195 | 多模态：Google Gemini（支持图像）
196 | 备用：DeepSeek（低成本）
197 | ```
198 | 
199 | **阶段四（项目实战）：**
200 | ```
201 | 主要：DeepSeek（低成本高性能）
202 | 高质量任务：Claude（如果有学生额度）
203 | 快速任务：Groq
204 | ```
205 | 
206 | ## 📊 成本对比表
207 | 
208 | | API | 1M Input Tokens | 1M Output Tokens | 特点 |
209 | |-----|-----------------|------------------|------|
210 | | **Groq** | **免费** | **免费** | ⚡ 速度极快 |
211 | | **Gemini Flash** | **免费** | **免费** | 🎯 额度大 |
212 | | DeepSeek | $0.28 | $0.42 | 💰 极低成本 |
213 | | Claude Haiku | $0.25 | $1.25 | 🚀 快速便宜 |
214 | | Claude Sonnet | $3.00 | $15.00 | 🧠 高质量 |
215 | | GPT-4o | $2.50 | $10.00 | 🏆 OpenAI |
216 | | GPT-4o mini | $0.15 | $0.60 | 📦 小模型 |
217 | 
218 | ## ⚠️ 注意事项
219 | 
220 | ### 1. API Key 安全
221 | - ❌ 永远不要提交 `.env` 文件到 Git
222 | - ✅ 使用 `.gitignore` 忽略敏感文件
223 | - ✅ 定期轮换 API Keys
224 | - ✅ 设置使用限额避免超支
225 | 
226 | ### 2. 免费额度管理
227 | - 📊 定期检查使用情况
228 | - 🔄 轮换使用不同的免费 API
229 | - 💾 本地缓存响应减少重复请求
230 | - 🎯 开发时使用免费 API，生产时考虑付费
231 | 
232 | ### 3. 速率限制
233 | 大多数免费 API 都有速率限制，注意：
234 | - 添加重试逻辑
235 | - 使用指数退避策略
236 | - 避免并发请求过多
237 | 
238 | ### 4. 学生身份验证
239 | 对于 Claude 学生计划：
240 | - 需要有效的 .edu 邮箱
241 | - 通常需要学生证明
242 | - 额度可能有使用期限
243 | 
244 | ## 🚀 快速开始
245 | 
246 | ### 1. 最简单方案（Groq）
247 | 
248 | ```bash
249 | # 1. 获取 Groq API Key
250 | # 访问：https://console.groq.com/keys
251 | 
252 | # 2. 安装依赖
253 | pip install langchain langchain-groq
254 | 
255 | # 3. 创建 .env 文件
256 | echo "GROQ_API_KEY=your_key_here" > .env
257 | 
258 | # 4. 测试
259 | python phase1_fundamentals/01_hello_langchain/main.py
260 | ```
261 | 
262 | ### 2. 多模型切换
263 | 
264 | 在 `config.py` 中配置：
265 | ```python
266 | import os
267 | from dotenv import load_dotenv
268 | 
269 | load_dotenv()
270 | 
271 | # 根据环境变量自动选择可用的模型
272 | def get_default_model():
273 |     if os.getenv("GROQ_API_KEY"):
274 |         return "groq:llama-3.3-70b-versatile"
275 |     elif os.getenv("GOOGLE_API_KEY"):
276 |         return "google:gemini-1.5-flash"
277 |     elif os.getenv("ANTHROPIC_API_KEY"):
278 |         return "anthropic:claude-sonnet-4-5"
279 |     else:
280 |         raise ValueError("No API key found!")
281 | ```
282 | 
283 | ## 📚 相关资源
284 | 
285 | - Groq 文档：https://console.groq.com/docs
286 | - Gemini API 文档：https://ai.google.dev/docs
287 | - DeepSeek 文档：https://platform.deepseek.com/docs
288 | - Claude API 文档：https://docs.anthropic.com/
289 | 
290 | ## ❓ 常见问题
291 | 
292 | **Q: 我是学生，最推荐哪个？**
293 | A: 1) Groq（完全免费） 2) 申请 Claude 学生计划（$500 额度） 3) Google Gemini
294 | 
295 | **Q: Groq 和 Gemini 哪个更好？**
296 | A: Groq 速度极快但模型选择少；Gemini 支持多模态且额度大。建议两个都用。
297 | 
298 | **Q: DeepSeek 需要付费吗？**
299 | A: 需要，但成本极低。$5 可以用很久（相当于 OpenAI 的 $250）
300 | 
301 | **Q: 免费 API 有哪些限制？**
302 | A: 主要是速率限制（每分钟请求数）和每日配额。对学习来说完全够用。
303 | 
304 | **Q: 可以同时使用多个 API 吗？**
305 | A: 可以！建议配置多个 API Key，轮换使用。
306 | 
307 | ---
308 | 
309 | 💡 **开始使用建议：先从 Groq 开始（完全免费），然后逐步尝试其他 API！**
310 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
  1 | # ============================================================================
  2 | # LangChain 1.0 学习仓库 - 环境变量配置模板
  3 | # ============================================================================
  4 | # 使用说明：
  5 | # 1. 复制此文件并重命名为 .env
  6 | # 2. 填入你的实际 API Keys
  7 | # 3. .env 文件已在 .gitignore 中，不会被提交到 Git
  8 | # ============================================================================
  9 | #
 10 | # 💰 免费 API 推荐（2025年）：
 11 | # ⭐⭐⭐ Groq API - 完全免费，超快速度，LangChain 原生支持
 12 | # ⭐⭐⭐ Google Gemini - 免费额度慷慨（每分钟15次请求）
 13 | # ⭐⭐ DeepSeek - 成本极低（仅为 OpenAI 的 2%）
 14 | # ⭐⭐ Claude - 学生/研究者免费 $500 额度
 15 | # ⭐ Together AI、Hugging Face - 免费层可用
 16 | # ============================================================================
 17 | 
 18 | # ----------------------------------------------------------------------------
 19 | # ⭐⭐⭐ Groq API 配置（强烈推荐！完全免费 + 超快速度）
 20 | # ----------------------------------------------------------------------------
 21 | # 获取地址: https://console.groq.com/keys
 22 | # 特点:
 23 | #   - 完全免费，无需信用卡
 24 | #   - 速度极快（使用 LPU 硬件加速）
 25 | #   - 原生支持 LangChain（langchain-groq 包）
 26 | #   - 支持 Llama 3.3 70B, Mixtral 等开源模型
 27 | # 安装: pip install langchain-groq
 28 | GROQ_API_KEY=your_groq_api_key_here
 29 | 
 30 | # ----------------------------------------------------------------------------
 31 | # OpenAI API 配置
 32 | # ----------------------------------------------------------------------------
 33 | # 获取地址: https://platform.openai.com/api-keys
 34 | # 用于: GPT-4, GPT-3.5 等模型
 35 | OPENAI_API_KEY=your_openai_api_key_here
 36 | 
 37 | # 可选：OpenAI API Base URL（如果使用代理或自定义端点）
 38 | # OPENAI_API_BASE=https://api.openai.com/v1
 39 | 
 40 | # 可选：OpenAI 组织 ID
 41 | # OPENAI_ORGANIZATION=your_org_id_here
 42 | 
 43 | # ----------------------------------------------------------------------------
 44 | # Anthropic API 配置
 45 | # ----------------------------------------------------------------------------
 46 | # 获取地址: https://console.anthropic.com/
 47 | # 用于: Claude 系列模型
 48 | # 💰 免费额度途径：
 49 | #   1. 学生计划：https://anthropic.com/students - 免费 $500 额度
 50 | #   2. 研究者计划：https://anthropic.com/research - AI 安全研究免费额度
 51 | #   3. 云平台赠金：通过 AWS Bedrock 或 Google Vertex AI 获取 $300+ 额度
 52 | ANTHROPIC_API_KEY=your_anthropic_api_key_here
 53 | 
 54 | # ----------------------------------------------------------------------------
 55 | # LangSmith 配置（可观测性平台）
 56 | # ----------------------------------------------------------------------------
 57 | # 获取地址: https://smith.langchain.com/
 58 | # 用于: Agent 追踪、调试、评估
 59 | LANGSMITH_API_KEY=your_langsmith_api_key_here
 60 | 
 61 | # 启用 LangSmith 追踪（true/false）
 62 | LANGSMITH_TRACING=false
 63 | 
 64 | # LangSmith 项目名称
 65 | LANGSMITH_PROJECT=langchain-v1-study
 66 | 
 67 | # LangSmith 端点（通常不需要修改）
 68 | # LANGSMITH_ENDPOINT=https://api.smith.langchain.com
 69 | 
 70 | # ----------------------------------------------------------------------------
 71 | # ⭐⭐⭐ Google Gemini API 配置（推荐！免费额度慷慨）
 72 | # ----------------------------------------------------------------------------
 73 | # 获取地址: https://aistudio.google.com/apikey
 74 | # 免费额度:
 75 | #   - 每分钟 15 次请求
 76 | #   - 每天 1500 次请求
 77 | #   - Gemini 1.5 Flash 和 Pro 都免费
 78 | # 用于: Gemini Pro, Gemini Flash 等模型
 79 | # 安装: pip install langchain-google-genai
 80 | GOOGLE_API_KEY=your_google_api_key_here
 81 | 
 82 | # ----------------------------------------------------------------------------
 83 | # ⭐⭐ DeepSeek API 配置（成本极低）
 84 | # ----------------------------------------------------------------------------
 85 | # 获取地址: https://platform.deepseek.com/
 86 | # 特点:
 87 | #   - 成本仅为 OpenAI 的 2%
 88 | #   - 性能接近 GPT-4
 89 | #   - 128K 上下文窗口
 90 | # 价格: $0.28/1M input tokens, $0.42/1M output tokens
 91 | # DEEPSEEK_API_KEY=your_deepseek_api_key_here
 92 | 
 93 | # ----------------------------------------------------------------------------
 94 | # Cohere API 配置（可选）
 95 | # ----------------------------------------------------------------------------
 96 | # 获取地址: https://dashboard.cohere.com/api-keys
 97 | # 用于: Cohere 模型
 98 | # COHERE_API_KEY=your_cohere_api_key_here
 99 | 
100 | # ----------------------------------------------------------------------------
101 | # HuggingFace API 配置（可选）
102 | # ----------------------------------------------------------------------------
103 | # 获取地址: https://huggingface.co/settings/tokens
104 | # 用于: HuggingFace 模型和 Embeddings
105 | # HUGGINGFACE_API_KEY=your_huggingface_api_key_here
106 | 
107 | # ----------------------------------------------------------------------------
108 | # 向量数据库配置（用于 RAG 项目 - Module 13, 14）
109 | # ----------------------------------------------------------------------------
110 | 
111 | # ⭐⭐⭐ Pinecone（推荐！免费 serverless 层级）
112 | # 获取地址: https://www.pinecone.io/
113 | # 免费层级:
114 | #   - 1 个 serverless 索引
115 | #   - 10 GB 存储
116 | #   - 无限请求（有速率限制）
117 | #   - 无需信用卡
118 | # 使用区域: us-east-1 (AWS) - 免费
119 | # 安装: pip install langchain-pinecone pinecone-client
120 | PINECONE_API_KEY=your_pinecone_api_key_here
121 | 
122 | # Weaviate（可选）
123 | # WEAVIATE_URL=your_weaviate_url_here
124 | # WEAVIATE_API_KEY=your_weaviate_api_key_here
125 | 
126 | # ----------------------------------------------------------------------------
127 | # 搜索引擎 API 配置（用于 Web 搜索工具）
128 | # ----------------------------------------------------------------------------
129 | 
130 | # Tavily Search API（推荐用于 AI 搜索）
131 | # 获取地址: https://tavily.com/
132 | # TAVILY_API_KEY=your_tavily_api_key_here
133 | 
134 | # Serper API（Google 搜索）
135 | # 获取地址: https://serper.dev/
136 | # SERPER_API_KEY=your_serper_api_key_here
137 | 
138 | # SerpAPI（Google 搜索）
139 | # 获取地址: https://serpapi.com/
140 | # SERPAPI_API_KEY=your_serpapi_api_key_here
141 | 
142 | # ----------------------------------------------------------------------------
143 | # 数据库配置（用于 Checkpointing）
144 | # ----------------------------------------------------------------------------
145 | 
146 | # PostgreSQL（可选）
147 | # POSTGRES_HOST=localhost
148 | # POSTGRES_PORT=5432
149 | # POSTGRES_DB=langchain_checkpoints
150 | # POSTGRES_USER=your_username
151 | # POSTGRES_PASSWORD=your_password
152 | 
153 | # ----------------------------------------------------------------------------
154 | # 应用配置
155 | # ----------------------------------------------------------------------------
156 | 
157 | # 日志级别（DEBUG, INFO, WARNING, ERROR, CRITICAL）
158 | LOG_LEVEL=INFO
159 | 
160 | # 开发模式（true/false）
161 | DEV_MODE=true
162 | 
163 | # 最大重试次数
164 | MAX_RETRIES=3
165 | 
166 | # 请求超时时间（秒）
167 | REQUEST_TIMEOUT=30
168 | 
169 | # ----------------------------------------------------------------------------
170 | # 模型默认配置
171 | # ----------------------------------------------------------------------------
172 | 
173 | # 默认使用的模型
174 | DEFAULT_MODEL=openai:gpt-4
175 | 
176 | # 默认温度参数（0.0 - 2.0）
177 | DEFAULT_TEMPERATURE=0.7
178 | 
179 | # 默认最大 Token 数
180 | DEFAULT_MAX_TOKENS=2000
181 | 
182 | # ----------------------------------------------------------------------------
183 | # 注意事项
184 | # ----------------------------------------------------------------------------
185 | # 1. 永远不要提交包含真实 API Keys 的 .env 文件到 Git
186 | # 2. 定期轮换你的 API Keys 以保证安全
187 | # 3. 使用最小权限原则，只授予必要的权限
188 | # 4. 监控 API 使用情况，避免意外高额费用
189 | # 5. 在生产环境中使用环境变量或密钥管理服务
190 | # ============================================================================
191 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/README.md:
--------------------------------------------------------------------------------
  1 | # Phase 1: Fundamentals (基础知识)
  2 | 
  3 | LangChain 1.0 基础教程 - 第一阶段
  4 | 
  5 | ## 学习目标
  6 | 
  7 | 掌握 LangChain 1.0 的核心概念和基础用法：
  8 | - 模型调用和消息系统
  9 | - 提示词模板
 10 | - 自定义工具
 11 | - Agent 创建和执行
 12 | 
 13 | ## 模块列表
 14 | 
 15 | ### 01 - Hello LangChain
 16 | **学习内容：**
 17 | - `init_chat_model` - 统一的模型初始化
 18 | - `invoke` 方法 - 三种输入格式
 19 | - 环境配置和 API 密钥管理
 20 | 
 21 | **关键文件：**
 22 | - `main.py` - 7 个基础示例
 23 | - `invoke_practice.py` - 实践练习
 24 | - `README.md` - 详细教程
 25 | 
 26 | ### 02 - Prompt Templates
 27 | **学习内容：**
 28 | - `PromptTemplate` - 文本模板
 29 | - `ChatPromptTemplate` - 对话模板
 30 | - 变量替换和部分变量
 31 | - LCEL 链式调用
 32 | 
 33 | **关键文件：**
 34 | - `main.py` - 9 个模板示例
 35 | - `examples/template_library.py` - 15 个可复用模板
 36 | - `README.md` - 模板使用指南
 37 | 
 38 | ### 03 - Messages
 39 | **学习内容：**
 40 | - 消息类型：HumanMessage、AIMessage、SystemMessage
 41 | - 对话历史管理
 42 | - 多轮对话的关键规则
 43 | 
 44 | **关键文件：**
 45 | - `main.py` - 5 个核心示例
 46 | - `test.py` - 对话测试
 47 | - `README.md` - 重点讲解
 48 | 
 49 | **核心难点：**
 50 | 每次调用必须传入完整历史！
 51 | 
 52 | ```python
 53 | conversation = []
 54 | conversation.append({"role": "user", "content": "我叫张三"})
 55 | r1 = model.invoke(conversation)
 56 | conversation.append({"role": "assistant", "content": r1.content})
 57 | conversation.append({"role": "user", "content": "我叫什么？"})
 58 | r2 = model.invoke(conversation)  # AI 能记住
 59 | ```
 60 | 
 61 | ### 04 - Custom Tools
 62 | **学习内容：**
 63 | - `@tool` 装饰器 - LangChain 1.0 推荐方式
 64 | - docstring 的重要性（AI 依赖它理解工具）
 65 | - 参数类型注解
 66 | - 可选参数使用 `Optional[type]`
 67 | 
 68 | **关键文件：**
 69 | - `main.py` - 6 个工具示例
 70 | - `tools/weather.py` - 天气工具
 71 | - `tools/calculator.py` - 计算器（多参数）
 72 | - `tools/web_search.py` - 搜索（可选参数）
 73 | - `README.md` - 工具开发指南
 74 | 
 75 | **最佳实践：**
 76 | ```python
 77 | from langchain_core.tools import tool
 78 | 
 79 | @tool
 80 | def my_tool(param: str) -> str:
 81 |     """
 82 |     清晰的工具描述（AI 读这个！）
 83 | 
 84 |     参数:
 85 |         param: 参数说明
 86 | 
 87 |     返回:
 88 |         返回值说明
 89 |     """
 90 |     # 实现
 91 |     return "结果字符串"
 92 | ```
 93 | 
 94 | ### 05 - Simple Agent
 95 | **学习内容：**
 96 | - `create_agent` - LangChain 1.0 统一 API
 97 | - Agent = 模型 + 工具 + 自动决策
 98 | - Agent 如何选择工具
 99 | - 多轮对话处理
100 | 
101 | **关键文件：**
102 | - `main.py` - 6 个 Agent 示例
103 | - `test_simple.py` - 简单测试
104 | - `README.md` - Agent 使用指南
105 | 
106 | **关键语法：**
107 | ```python
108 | from langchain.agents import create_agent
109 | 
110 | agent = create_agent(
111 |     model=init_chat_model("groq:llama-3.3-70b-versatile"),
112 |     tools=[tool1, tool2],
113 |     system_prompt="Agent 的行为指令"
114 | )
115 | 
116 | response = agent.invoke({
117 |     "messages": [{"role": "user", "content": "问题"}]
118 | })
119 | 
120 | final_answer = response['messages'][-1].content
121 | ```
122 | 
123 | ### 06 - Agent Loop
124 | **学习内容：**
125 | - Agent 执行循环详解
126 | - 消息历史分析
127 | - 流式输出 `.stream()`
128 | - 调试和监控技巧
129 | 
130 | **关键文件：**
131 | - `main.py` - 6 个执行循环示例
132 | - `test.py` - 测试脚本
133 | - `README.md` - 循环详解
134 | 
135 | **执行流程：**
136 | ```
137 | 用户问题 (HumanMessage)
138 |     ↓
139 | AI 决定 (AIMessage with tool_calls)
140 |     ↓
141 | 执行工具 (ToolMessage)
142 |     ↓
143 | 最终答案 (AIMessage)
144 | ```
145 | 
146 | ## 快速开始
147 | 
148 | ### 1. 环境搭建
149 | 
150 | ```bash
151 | # 创建虚拟环境
152 | python -m venv venv
153 | 
154 | # 激活（Windows）
155 | venv\Scripts\activate
156 | 
157 | # 安装依赖
158 | pip install langchain langchain-groq python-dotenv
159 | ```
160 | 
161 | ### 2. 配置 API 密钥
162 | 
163 | 创建 `.env` 文件：
164 | ```
165 | GROQ_API_KEY=your_key_here
166 | ```
167 | 
168 | ### 3. 运行示例
169 | 
170 | ```bash
171 | # 运行特定模块
172 | cd phase1_fundamentals/01_hello_langchain
173 | python main.py
174 | 
175 | # 或者
176 | python phase1_fundamentals/02_prompt_templates/main.py
177 | ```
178 | 
179 | ## 核心知识点总结
180 | 
181 | ### 1. LangChain 1.0 架构
182 | - 构建在 LangGraph 运行时之上
183 | - 统一的 `init_chat_model` 和 `create_agent` API
184 | - 中间件架构（后续学习）
185 | 
186 | ### 2. 模型调用
187 | ```python
188 | from langchain.chat_models import init_chat_model
189 | 
190 | model = init_chat_model("groq:llama-3.3-70b-versatile")
191 | 
192 | # 三种输入格式
193 | model.invoke("简单文本")
194 | model.invoke([{"role": "user", "content": "字典格式"}])
195 | model.invoke([HumanMessage("消息对象")])
196 | ```
197 | 
198 | ### 3. 提示词模板
199 | ```python
200 | from langchain_core.prompts import ChatPromptTemplate
201 | 
202 | template = ChatPromptTemplate.from_messages([
203 |     ("system", "你是{role}"),
204 |     ("user", "{input}")
205 | ])
206 | 
207 | chain = template | model
208 | result = chain.invoke({"role": "助手", "input": "问题"})
209 | ```
210 | 
211 | ### 4. 对话历史
212 | ```python
213 | # 关键：每次调用传完整历史
214 | conversation = []
215 | conversation.append(user_msg)
216 | response = model.invoke(conversation)
217 | conversation.append({"role": "assistant", "content": response.content})
218 | ```
219 | 
220 | ### 5. 创建工具
221 | ```python
222 | from langchain_core.tools import tool
223 | 
224 | @tool
225 | def my_tool(param: str) -> str:
226 |     """工具描述 - AI 读这个！"""
227 |     return "result"
228 | ```
229 | 
230 | ### 6. 创建 Agent
231 | ```python
232 | from langchain.agents import create_agent
233 | 
234 | agent = create_agent(
235 |     model=model,
236 |     tools=[tool1, tool2],
237 |     system_prompt="指令"
238 | )
239 | 
240 | response = agent.invoke({"messages": [...]})
241 | ```
242 | 
243 | ### 7. Agent 执行循环
244 | ```python
245 | # 查看完整历史
246 | for msg in response['messages']:
247 |     print(msg)
248 | 
249 | # 获取最终答案
250 | final = response['messages'][-1].content
251 | 
252 | # 流式输出
253 | for chunk in agent.stream(input):
254 |     # 实时处理
255 | ```
256 | 
257 | ## 重要概念
258 | 
259 | ### LCEL (LangChain Expression Language)
260 | 使用 `|` 操作符链接组件：
261 | ```python
262 | chain = prompt | model | output_parser
263 | result = chain.invoke(input)
264 | ```
265 | 
266 | ### 消息类型
267 | - **HumanMessage** - 用户输入
268 | - **AIMessage** - AI 输出
269 | - **SystemMessage** - 系统指令
270 | - **ToolMessage** - 工具结果
271 | 
272 | ### Agent 工作原理
273 | 1. 接收用户问题
274 | 2. 分析是否需要工具
275 | 3. 如果需要，调用工具
276 | 4. 基于工具结果生成答案
277 | 5. 返回最终答案
278 | 
279 | ## 常见问题
280 | 
281 | ### 1. API 密钥问题
282 | 确保 `.env` 文件中的 API 密钥正确：
283 | ```bash
284 | GROQ_API_KEY=gsk_...
285 | ```
286 | 
287 | ### 2. 导入错误
288 | LangChain 1.0 导入路径：
289 | ```python
290 | from langchain.chat_models import init_chat_model
291 | from langchain.agents import create_agent
292 | from langchain_core.tools import tool
293 | ```
294 | 
295 | ### 3. Agent 不调用工具
296 | - 检查工具的 docstring 是否清晰
297 | - 确保问题明确需要该工具
298 | - 工具参数类型注解完整
299 | 
300 | ### 4. 对话不记忆
301 | 必须传入完整历史：
302 | ```python
303 | # ❌ 错误
304 | model.invoke("你记得我的名字吗？")  # AI 不记得
305 | 
306 | # ✅ 正确
307 | conversation = [previous_messages...] + [new_message]
308 | model.invoke(conversation)
309 | ```
310 | 
311 | ## 学习建议
312 | 
313 | 1. **按顺序学习**
314 |    - 01 → 02 → 03 → 04 → 05 → 06
315 |    - 每个模块都有实践练习
316 | 
317 | 2. **动手实践**
318 |    - 运行每个示例
319 |    - 修改参数观察结果
320 |    - 完成练习题
321 | 
322 | 3. **理解核心**
323 |    - invoke 方法的三种输入
324 |    - 对话历史管理
325 |    - 工具的 docstring
326 |    - Agent 执行循环
327 | 
328 | 4. **查看源码**
329 |    - 理解每个示例的实现
330 |    - 对比不同方法的差异
331 | 
332 | ## 下一步
333 | 
334 | ### Phase 2: Intermediate (中级特性)
335 | 
336 | **即将学习：**
337 | - **Module 07-09**: 内存和状态管理
338 |   - InMemorySaver
339 |   - 上下文管理
340 |   - Checkpointing 持久化
341 | 
342 | - **Module 10-12**: 中间件架构
343 |   - 自定义中间件
344 |   - 可观测性
345 |   - 防护栏（Guardrails）
346 | 
347 | - **Module 13-15**: 结构化输出
348 |   - Pydantic 模型
349 |   - 验证和重试
350 |   - 工具与结构化输出结合
351 | 
352 | ## 资源链接
353 | 
354 | - **官方文档**: https://docs.langchain.com/oss/python/langchain/
355 | - **GitHub**: https://github.com/langchain-ai/langchain
356 | - **迁移指南**: https://docs.langchain.com/oss/python/migrate/langchain-v1
357 | 
358 | ## 贡献
359 | 
360 | 如有问题或建议，请提 Issue 或 PR。
361 | 
362 | ---
363 | 
364 | **恭喜完成阶段一！** 🎉
365 | 
366 | 你已经掌握了 LangChain 1.0 的核心基础，可以开始构建实际的 AI 应用了！
367 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/06_agent_loop/README.md:
--------------------------------------------------------------------------------
  1 | # 06 - Agent Loop (Agent 执行循环)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **Agent 执行循环 = 自动化的"思考-行动-观察"过程**
  6 | 
  7 | Agent 不是一次性调用，而是一个循环：
  8 | ```
  9 | 用户问题 → AI 思考 → 调用工具 → 观察结果 → 继续思考 → 最终答案
 10 | ```
 11 | 
 12 | ## 执行循环详解
 13 | 
 14 | ### 完整流程
 15 | 
 16 | ```
 17 | ┌─────────────┐
 18 | │ 用户提问    │
 19 | │ HumanMessage│
 20 | └──────┬──────┘
 21 |        ↓
 22 | ┌─────────────┐
 23 | │ AI 分析问题 │
 24 | │ 需要工具？  │
 25 | └──────┬──────┘
 26 |        ↓ 是
 27 | ┌─────────────┐
 28 | │ AI 决定调用 │
 29 | │ AIMessage   │
 30 | │ (tool_calls)│
 31 | └──────┬──────┘
 32 |        ↓
 33 | ┌─────────────┐
 34 | │ 执行工具    │
 35 | │ ToolMessage │
 36 | └──────┬──────┘
 37 |        ↓
 38 | ┌─────────────┐
 39 | │ AI 看结果   │
 40 | │ 生成答案    │
 41 | │ AIMessage   │
 42 | └─────────────┘
 43 | ```
 44 | 
 45 | ### 消息历史示例
 46 | 
 47 | ```python
 48 | response = agent.invoke({
 49 |     "messages": [{"role": "user", "content": "25 乘以 8"}]
 50 | })
 51 | 
 52 | # response['messages'] 包含：
 53 | [
 54 |     HumanMessage(content="25 乘以 8"),
 55 |     AIMessage(tool_calls=[{
 56 |         'name': 'calculator',
 57 |         'args': {'operation': 'multiply', 'a': 25, 'b': 8}
 58 |     }]),
 59 |     ToolMessage(content="25.0 multiply 8.0 = 200.0"),
 60 |     AIMessage(content="25 乘以 8 等于 200")
 61 | ]
 62 | ```
 63 | 
 64 | ## 查看执行过程
 65 | 
 66 | ### 1. 查看完整历史
 67 | 
 68 | ```python
 69 | response = agent.invoke({"messages": [...]})
 70 | 
 71 | for msg in response['messages']:
 72 |     print(f"{msg.__class__.__name__}: {msg.content}")
 73 | ```
 74 | 
 75 | ### 2. 获取最终答案
 76 | 
 77 | ```python
 78 | # 最后一条消息就是最终答案
 79 | final_answer = response['messages'][-1].content
 80 | ```
 81 | 
 82 | ### 3. 查看使用的工具
 83 | 
 84 | ```python
 85 | used_tools = []
 86 | for msg in response['messages']:
 87 |     if hasattr(msg, 'tool_calls') and msg.tool_calls:
 88 |         for tc in msg.tool_calls:
 89 |             used_tools.append(tc['name'])
 90 | 
 91 | print(f"使用的工具: {used_tools}")
 92 | ```
 93 | 
 94 | ## 流式输出（Streaming）
 95 | 
 96 | **用于实时显示 Agent 的进度**
 97 | 
 98 | ### 基本用法
 99 | 
100 | ```python
101 | agent = create_agent(model=model, tools=tools)
102 | 
103 | # 使用 .stream() 方法
104 | for chunk in agent.stream({"messages": [...]}):
105 |     # chunk 是状态更新
106 |     if 'messages' in chunk:
107 |         latest_msg = chunk['messages'][-1]
108 |         # 处理最新消息
109 |         print(latest_msg.content)
110 | ```
111 | 
112 | ### 实时显示最终答案
113 | 
114 | ```python
115 | for chunk in agent.stream(input):
116 |     if 'messages' in chunk:
117 |         latest = chunk['messages'][-1]
118 | 
119 |         # 只显示最终答案（不包含 tool_calls）
120 |         if hasattr(latest, 'content') and latest.content:
121 |             if not hasattr(latest, 'tool_calls') or not latest.tool_calls:
122 |                 print(latest.content)
123 | ```
124 | 
125 | ### stream vs invoke
126 | 
127 | | 方法 | 返回 | 用途 |
128 | |-----|------|------|
129 | | `invoke()` | 完整结果 | 等待完成后一次性获取 |
130 | | `stream()` | 生成器 | 实时获取中间步骤 |
131 | 
132 | ## 消息类型
133 | 
134 | ### HumanMessage
135 | 用户的输入
136 | 
137 | ```python
138 | HumanMessage(content="北京天气如何？")
139 | ```
140 | 
141 | ### AIMessage（两种情况）
142 | 
143 | **情况1：调用工具**
144 | ```python
145 | AIMessage(
146 |     content="",
147 |     tool_calls=[{
148 |         'name': 'get_weather',
149 |         'args': {'city': '北京'},
150 |         'id': 'call_xxx'
151 |     }]
152 | )
153 | ```
154 | 
155 | **情况2：最终答案**
156 | ```python
157 | AIMessage(content="北京今天晴天，温度 15°C")
158 | ```
159 | 
160 | ### ToolMessage
161 | 工具执行的结果
162 | 
163 | ```python
164 | ToolMessage(
165 |     content="晴天，温度 15°C",
166 |     name="get_weather"
167 | )
168 | ```
169 | 
170 | ### SystemMessage
171 | 系统指令（通过 `system_prompt` 设置）
172 | 
173 | ```python
174 | agent = create_agent(
175 |     model=model,
176 |     tools=tools,
177 |     system_prompt="你是一个helpful assistant"
178 | )
179 | ```
180 | 
181 | ## 多步骤执行
182 | 
183 | Agent 可以多次调用工具：
184 | 
185 | ```python
186 | # 问题：先算 10 + 20，然后乘以 3
187 | response = agent.invoke({
188 |     "messages": [{"role": "user", "content": "先算 10 + 20，然后乘以 3"}]
189 | })
190 | 
191 | # Agent 可能会：
192 | # 1. 调用 calculator(add, 10, 20) → 30
193 | # 2. 调用 calculator(multiply, 30, 3) → 90
194 | # 3. 返回最终答案
195 | ```
196 | 
197 | 统计工具调用次数：
198 | ```python
199 | tool_calls_count = sum(
200 |     len(msg.tool_calls) if hasattr(msg, 'tool_calls') and msg.tool_calls else 0
201 |     for msg in response['messages']
202 | )
203 | ```
204 | 
205 | ## 调试技巧
206 | 
207 | ### 1. 打印所有消息
208 | 
209 | ```python
210 | for i, msg in enumerate(response['messages'], 1):
211 |     print(f"\n--- 消息 {i}: {msg.__class__.__name__} ---")
212 | 
213 |     if hasattr(msg, 'content'):
214 |         print(f"内容: {msg.content}")
215 | 
216 |     if hasattr(msg, 'tool_calls') and msg.tool_calls:
217 |         for tc in msg.tool_calls:
218 |             print(f"工具: {tc['name']}, 参数: {tc['args']}")
219 | ```
220 | 
221 | ### 2. 使用 stream 查看步骤
222 | 
223 | ```python
224 | step = 0
225 | for chunk in agent.stream(input):
226 |     step += 1
227 |     print(f"步骤 {step}:")
228 |     if 'messages' in chunk:
229 |         latest = chunk['messages'][-1]
230 |         print(f"  类型: {latest.__class__.__name__}")
231 | ```
232 | 
233 | ### 3. 检查是否使用工具
234 | 
235 | ```python
236 | has_tool_calls = any(
237 |     hasattr(msg, 'tool_calls') and msg.tool_calls
238 |     for msg in response['messages']
239 | )
240 | 
241 | if has_tool_calls:
242 |     print("Agent 使用了工具")
243 | else:
244 |     print("Agent 直接回答")
245 | ```
246 | 
247 | ## 常见问题
248 | 
249 | ### 1. 如何知道 Agent 何时完成？
250 | 
251 | **答：当 AIMessage 不包含 tool_calls 时**
252 | 
253 | ```python
254 | for msg in response['messages']:
255 |     if isinstance(msg, AIMessage):
256 |         if hasattr(msg, 'tool_calls') and msg.tool_calls:
257 |             print("还在调用工具...")
258 |         else:
259 |             print("完成！最终答案：", msg.content)
260 | ```
261 | 
262 | ### 2. Agent 可以调用多少次工具？
263 | 
264 | **答：默认没有限制，直到得到最终答案**
265 | 
266 | 但可能会：
267 | - 超时
268 | - 达到 token 限制
269 | - 模型决定停止
270 | 
271 | ### 3. 如何限制工具调用次数？
272 | 
273 | LangChain 1.0 的 `create_agent` 默认使用 LangGraph，可以通过配置限制：
274 | 
275 | ```python
276 | # 注意：这是高级用法，后续会详细学习
277 | config = {
278 |     "recursion_limit": 5  # 最多 5 步
279 | }
280 | 
281 | response = agent.invoke(input, config=config)
282 | ```
283 | 
284 | ## 最佳实践
285 | 
286 | ### 1. 生产环境获取答案
287 | 
288 | ```python
289 | try:
290 |     response = agent.invoke(input)
291 |     final_answer = response['messages'][-1].content
292 |     return final_answer
293 | except Exception as e:
294 |     logger.error(f"Agent 错误: {e}")
295 |     return "抱歉，出现错误"
296 | ```
297 | 
298 | ### 2. 用户体验优化
299 | 
300 | ```python
301 | # 使用流式输出
302 | print("正在思考...")
303 | for chunk in agent.stream(input):
304 |     if 'messages' in chunk:
305 |         latest = chunk['messages'][-1]
306 |         # 显示进度
307 | ```
308 | 
309 | ### 3. 调试和监控
310 | 
311 | ```python
312 | response = agent.invoke(input)
313 | 
314 | # 记录使用的工具
315 | tools_used = [
316 |     tc['name']
317 |     for msg in response['messages']
318 |     if hasattr(msg, 'tool_calls') and msg.tool_calls
319 |     for tc in msg.tool_calls
320 | ]
321 | 
322 | logger.info(f"工具使用: {tools_used}")
323 | ```
324 | 
325 | ### 4. 错误处理
326 | 
327 | ```python
328 | try:
329 |     response = agent.invoke(input)
330 | 
331 |     # 检查是否成功
332 |     if not response['messages']:
333 |         raise ValueError("没有收到响应")
334 | 
335 |     final = response['messages'][-1]
336 |     if not hasattr(final, 'content') or not final.content:
337 |         raise ValueError("没有最终答案")
338 | 
339 |     return final.content
340 | 
341 | except Exception as e:
342 |     # 记录详细错误
343 |     logger.error(f"Agent 执行失败: {e}", exc_info=True)
344 |     return None
345 | ```
346 | 
347 | ## 运行示例
348 | 
349 | ```bash
350 | # 运行所有示例
351 | python main.py
352 | 
353 | # 测试
354 | python test.py
355 | ```
356 | 
357 | ## 核心要点总结
358 | 
359 | 1. **执行循环**：问题 → 工具调用 → 结果 → 答案
360 | 2. **messages 历史**：记录完整对话过程
361 | 3. **流式输出**：`stream()` 实时显示进度
362 | 4. **消息类型**：HumanMessage、AIMessage、ToolMessage
363 | 5. **最终答案**：`response['messages'][-1].content`
364 | 
365 | ## 下一步
366 | 
367 | **阶段一（基础）完成！**
368 | 
369 | 已学习：
370 | - 01: 环境搭建和模型调用
371 | - 02: 提示词模板
372 | - 03: 消息类型和对话
373 | - 04: 自定义工具
374 | - 05: Simple Agent
375 | - 06: Agent 执行循环
376 | 
377 | **下一阶段：phase2_intermediate**
378 | - 内存和状态管理
379 | - 中间件架构
380 | - 结构化输出
381 | 


--------------------------------------------------------------------------------
/phase2_practical/14_rag_advanced/test.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 简单测试：验证混合检索组件（不需要 Groq API）
  3 | """
  4 | 
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | # 获取脚本所在目录
  9 | SCRIPT_DIR = Path(__file__).parent
 10 | DATA_DIR = SCRIPT_DIR / "data"
 11 | CHROMA_DIR = SCRIPT_DIR / "chroma_db"
 12 | 
 13 | # 确保目录存在
 14 | DATA_DIR.mkdir(exist_ok=True)
 15 | CHROMA_DIR.mkdir(exist_ok=True)
 16 | 
 17 | print("=" * 70)
 18 | print("测试：RAG Advanced - 混合检索")
 19 | print("=" * 70)
 20 | 
 21 | 
 22 | # ============================================================================
 23 | # 测试 1：准备测试数据
 24 | # ============================================================================
 25 | print("\n--- 测试 1: 准备测试数据 ---")
 26 | 
 27 | from langchain_community.document_loaders import TextLoader
 28 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 29 | 
 30 | # 创建测试文档
 31 | test_content = """
 32 | LangChain 框架核心组件
 33 | 
 34 | Models - 模型接口
 35 | 支持 OpenAI, Anthropic, Groq 等多种模型。
 36 | 版本要求：langchain>=1.0.0
 37 | 
 38 | Prompts - 提示词模板
 39 | 使用 PromptTemplate 和 ChatPromptTemplate。
 40 | 
 41 | Agents - 智能代理
 42 | 使用 create_agent 函数创建代理。
 43 | 支持工具调用和 ReAct 模式。
 44 | 
 45 | RAG 进阶技术
 46 | 
 47 | 混合检索 (Hybrid Search)
 48 | 结合向量搜索和 BM25 关键词搜索。
 49 | 
 50 | BM25 算法
 51 | Best Match 25，基于词频的检索算法。
 52 | 是 TF-IDF 的改进版本。
 53 | 
 54 | EnsembleRetriever
 55 | 使用 RRF (Reciprocal Rank Fusion) 算法。
 56 | 组合多个检索器的结果。
 57 | 
 58 | 代码示例
 59 | 
 60 | @tool
 61 | def search_docs(query: str) -> str:
 62 |     return "结果"
 63 | 
 64 | agent = create_agent(model=model, tools=[search_docs])
 65 | """
 66 | 
 67 | test_file = DATA_DIR / "test_docs.txt"
 68 | 
 69 | with open(test_file, "w", encoding="utf-8") as f:
 70 |     f.write(test_content)
 71 | 
 72 | # 加载和分割
 73 | loader = TextLoader(test_file, encoding="utf-8")
 74 | documents = loader.load()
 75 | 
 76 | splitter = RecursiveCharacterTextSplitter(
 77 |     chunk_size=150,
 78 |     chunk_overlap=30,
 79 |     separators=["\n\n", "\n", " ", ""]
 80 | )
 81 | 
 82 | chunks = splitter.split_documents(documents)
 83 | 
 84 | print(f"\n[OK] 文档加载和分割成功")
 85 | print(f"  原文档: {len(documents)} 个")
 86 | print(f"  分割后: {len(chunks)} 块")
 87 | 
 88 | 
 89 | # ============================================================================
 90 | # 测试 2：向量检索器
 91 | # ============================================================================
 92 | print("\n--- 测试 2: 向量检索器 ---")
 93 | 
 94 | try:
 95 |     from langchain_huggingface import HuggingFaceEmbeddings
 96 |     from langchain_community.vectorstores import Chroma
 97 | 
 98 |     print("创建向量存储（首次运行会下载模型）...")
 99 | 
100 |     embeddings = HuggingFaceEmbeddings(
101 |         model_name="sentence-transformers/all-MiniLM-L6-v2"
102 |     )
103 | 
104 |     vectorstore = Chroma.from_documents(
105 |         documents=chunks,
106 |         embedding=embeddings,
107 |         persist_directory=str(CHROMA_DIR)
108 |     )
109 | 
110 |     vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
111 | 
112 |     # 测试查询
113 |     query = "LangChain 核心组件"
114 |     results = vector_retriever.invoke(query)
115 | 
116 |     print(f"\n[OK] 向量检索成功")
117 |     print(f"  查询: {query}")
118 |     print(f"  结果数: {len(results)}")
119 |     if results:
120 |         preview = results[0].page_content[:50].replace("\n", " ")
121 |         print(f"  最相关: {preview}...")
122 | 
123 | except Exception as e:
124 |     print(f"\n[SKIP] 向量检索跳过: {e}")
125 |     vectorstore = None
126 |     vector_retriever = None
127 | 
128 | 
129 | # ============================================================================
130 | # 测试 3：BM25 检索器
131 | # ============================================================================
132 | print("\n--- 测试 3: BM25 检索器 ---")
133 | 
134 | try:
135 |     from langchain_community.retrievers import BM25Retriever
136 | 
137 |     bm25_retriever = BM25Retriever.from_documents(chunks)
138 |     bm25_retriever.k = 2
139 | 
140 |     # 测试查询
141 |     query = "BM25 算法"
142 |     results = bm25_retriever.invoke(query)
143 | 
144 |     print(f"\n[OK] BM25 检索成功")
145 |     print(f"  查询: {query}")
146 |     print(f"  结果数: {len(results)}")
147 |     if results:
148 |         preview = results[0].page_content[:50].replace("\n", " ")
149 |         print(f"  最相关: {preview}...")
150 | 
151 | except Exception as e:
152 |     print(f"\n[ERROR] BM25 检索失败: {e}")
153 |     print("  请安装: pip install rank_bm25")
154 |     bm25_retriever = None
155 | 
156 | 
157 | # ============================================================================
158 | # 测试 4：混合检索器
159 | # ============================================================================
160 | print("\n--- 测试 4: 混合检索器 (EnsembleRetriever) ---")
161 | 
162 | if vector_retriever and bm25_retriever:
163 |     try:
164 |         from langchain_classic.retrievers import EnsembleRetriever
165 | 
166 |         ensemble_retriever = EnsembleRetriever(
167 |             retrievers=[bm25_retriever, vector_retriever],
168 |             weights=[0.5, 0.5]
169 |         )
170 | 
171 |         print(f"\n[OK] 混合检索器创建成功")
172 |         print(f"  组合: BM25 + 向量搜索")
173 |         print(f"  权重: [0.5, 0.5]")
174 |         print(f"  算法: RRF (Reciprocal Rank Fusion)")
175 | 
176 |         # 对比测试
177 |         test_queries = [
178 |             ("语义查询", "LangChain 的功能"),
179 |             ("精确查询", "langchain>=1.0.0"),
180 |             ("混合查询", "BM25 算法原理"),
181 |         ]
182 | 
183 |         print(f"\n对比测试:")
184 |         for query_type, query in test_queries:
185 |             print(f"\n  [{query_type}] {query}")
186 | 
187 |             # BM25 结果
188 |             bm25_results = bm25_retriever.invoke(query)
189 |             bm25_preview = bm25_results[0].page_content[:40].replace("\n", " ") if bm25_results else "无"
190 | 
191 |             # 向量结果
192 |             vector_results = vector_retriever.invoke(query)
193 |             vector_preview = vector_results[0].page_content[:40].replace("\n", " ") if vector_results else "无"
194 | 
195 |             # 混合结果
196 |             ensemble_results = ensemble_retriever.invoke(query)
197 |             ensemble_preview = ensemble_results[0].page_content[:40].replace("\n", " ") if ensemble_results else "无"
198 | 
199 |             print(f"    BM25:   {bm25_preview}...")
200 |             print(f"    Vector: {vector_preview}...")
201 |             print(f"    Hybrid: {ensemble_preview}...")
202 | 
203 |     except Exception as e:
204 |         print(f"\n[ERROR] 混合检索器创建失败: {e}")
205 | else:
206 |     print(f"\n[SKIP] 混合检索器跳过（缺少组件）")
207 | 
208 | 
209 | # ============================================================================
210 | # 测试 5：权重对比
211 | # ============================================================================
212 | print("\n--- 测试 5: 权重对比 ---")
213 | 
214 | if vector_retriever and bm25_retriever:
215 |     try:
216 |         query = "LangChain 核心组件"
217 |         print(f"\n测试查询: {query}\n")
218 | 
219 |         weight_configs = [
220 |             ([0.0, 1.0], "纯向量"),
221 |             ([0.5, 0.5], "平衡"),
222 |             ([1.0, 0.0], "纯 BM25"),
223 |         ]
224 | 
225 |         for weights, desc in weight_configs:
226 |             ensemble = EnsembleRetriever(
227 |                 retrievers=[bm25_retriever, vector_retriever],
228 |                 weights=weights
229 |             )
230 | 
231 |             results = ensemble.invoke(query)
232 |             if results:
233 |                 preview = results[0].page_content[:40].replace("\n", " ")
234 |                 print(f"  {desc} {weights}: {preview}...")
235 | 
236 |         print(f"\n[OK] 权重对比完成")
237 | 
238 |     except Exception as e:
239 |         print(f"\n[ERROR] 权重对比失败: {e}")
240 | else:
241 |     print(f"\n[SKIP] 权重对比跳过")
242 | 
243 | 
244 | # ============================================================================
245 | # 总结
246 | # ============================================================================
247 | print("\n" + "=" * 70)
248 | print("RAG Advanced 组件测试完成！")
249 | print("=" * 70)
250 | 
251 | print("\n已验证:")
252 | print("  [OK] 文档加载和分割")
253 | print("  [OK] 向量检索 (HuggingFaceEmbeddings + Chroma)")
254 | print("  [OK] BM25 检索 (需要 rank_bm25)")
255 | print("  [OK] 混合检索 (EnsembleRetriever)")
256 | print("  [OK] 权重调整")
257 | 
258 | print("\n核心要点:")
259 | print("  1. 向量搜索 - 语义理解")
260 | print("  2. BM25 搜索 - 精确匹配")
261 | print("  3. 混合检索 - 结合两者优势")
262 | print("  4. RRF 算法 - 融合多个排名")
263 | 
264 | print("\n运行完整示例:")
265 | print("  python main.py  # 需要 GROQ_API_KEY")
266 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/04_custom_tools/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LangChain 1.0 - 自定义工具 (@tool 装饰器)
  3 | =========================================
  4 | 
  5 | 本模块重点讲解：
  6 | 1. 使用 @tool 装饰器创建工具（LangChain 1.0 推荐方式）
  7 | 2. 工具的参数和文档字符串（docstring）的重要性
  8 | 3. 测试工具
  9 | """
 10 | 
 11 | import os
 12 | import sys
 13 | 
 14 | # Windows终端编码支持
 15 | if sys.platform == 'win32':
 16 |     import io
 17 |     sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
 18 |     sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
 19 | 
 20 | # 添加tools目录到路径
 21 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'tools'))
 22 | 
 23 | from dotenv import load_dotenv
 24 | from langchain.chat_models import init_chat_model
 25 | from langchain_core.tools import tool
 26 | 
 27 | # 导入自定义工具
 28 | from weather import get_weather
 29 | from calculator import calculator
 30 | from web_search import web_search
 31 | 
 32 | load_dotenv()
 33 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 34 | 
 35 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 36 |     raise ValueError("请先设置 GROQ_API_KEY")
 37 | 
 38 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 39 | 
 40 | 
 41 | # ============================================================================
 42 | # 示例 1：创建第一个工具
 43 | # ============================================================================
 44 | def example_1_simple_tool():
 45 |     """
 46 |     示例1：使用 @tool 装饰器创建工具
 47 | 
 48 |     关键：
 49 |     1. 使用 @tool 装饰器
 50 |     2. 必须有 docstring（文档字符串）
 51 |     3. 参数要有类型注解
 52 |     """
 53 |     print("\n" + "="*70)
 54 |     print("示例 1：创建第一个工具")
 55 |     print("="*70)
 56 | 
 57 |     @tool
 58 |     def get_current_time() -> str:
 59 |         """获取当前时间"""
 60 |         from datetime import datetime
 61 |         return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 62 | 
 63 |     print("\n工具名称:", get_current_time.name)
 64 |     print("工具描述:", get_current_time.description)
 65 |     print("工具参数:", get_current_time.args)
 66 | 
 67 |     # 调用工具
 68 |     result = get_current_time.invoke({})
 69 |     # 被 @tool 装饰器装饰的函数会被转换为 LangChain 的 Tool
 70 |     # 对象，这个对象有 .invoke() 方法。
 71 |     print(f"\n调用结果: {result}")
 72 | 
 73 |     print("\n💡 关键点：")
 74 |     print("  1. @tool 装饰器会自动提取函数名、docstring、参数")
 75 |     print("  2. docstring 很重要！AI 用它理解工具的功能")
 76 |     print("  3. 类型注解帮助 AI 理解参数类型")
 77 | 
 78 | 
 79 | # ============================================================================
 80 | # 示例 2：带参数的工具
 81 | # ============================================================================
 82 | def example_2_tool_with_params():
 83 |     """
 84 |     示例2：带参数的工具
 85 | 
 86 |     重点：参数的文档说明
 87 |     """
 88 |     print("\n" + "="*70)
 89 |     print("示例 2：带参数的工具")
 90 |     print("="*70)
 91 | 
 92 |     print("\n查看天气工具的信息：")
 93 |     print(f"名称: {get_weather.name}")
 94 |     print(f"描述: {get_weather.description}")
 95 |     print(f"参数: {get_weather.args}")
 96 | 
 97 |     # 测试工具
 98 |     print("\n测试工具：")
 99 |     result1 = get_weather.invoke({"city": "北京"})
100 |     print(f"北京天气: {result1}")
101 | 
102 |     result2 = get_weather.invoke({"city": "上海"})
103 |     print(f"上海天气: {result2}")
104 | 
105 |     print("\n💡 docstring 格式：")
106 |     print('''
107 |     @tool
108 |     def my_tool(param1: str) -> str:
109 |         """
110 |         工具的简短描述
111 | 
112 |         参数:
113 |             param1: 参数说明
114 | 
115 |         返回:
116 |             返回值说明
117 |         """
118 |     ''')
119 | 
120 | 
121 | # ============================================================================
122 | # 示例 3：多参数工具
123 | # ============================================================================
124 | def example_3_multiple_params():
125 |     """
126 |     示例3：多参数工具
127 |     """
128 |     print("\n" + "="*70)
129 |     print("示例 3：多参数工具 - 计算器")
130 |     print("="*70)
131 | 
132 |     print("\n计算器工具信息：")
133 |     print(f"名称: {calculator.name}")
134 |     print(f"描述: {calculator.description}")
135 | 
136 |     # 测试不同运算
137 |     print("\n测试计算：")
138 |     tests = [
139 |         {"operation": "add", "a": 10, "b": 5},
140 |         {"operation": "multiply", "a": 7, "b": 8},
141 |         {"operation": "divide", "a": 20, "b": 4}
142 |     ]
143 | 
144 |     for test in tests:
145 |         result = calculator.invoke(test)
146 |         print(f"  {result}")
147 | 
148 | 
149 | # ============================================================================
150 | # 示例 4：可选参数工具
151 | # ============================================================================
152 | def example_4_optional_params():
153 |     """
154 |     示例4：可选参数
155 | 
156 |     使用 Optional[类型] 和默认值
157 |     """
158 |     print("\n" + "="*70)
159 |     print("示例 4：可选参数 - 搜索工具")
160 |     print("="*70)
161 | 
162 |     # 使用默认参数
163 |     print("\n使用默认参数（返回3条结果）：")
164 |     result1 = web_search.invoke({"query": "Python"})
165 |     print(result1)
166 | 
167 |     # 指定参数
168 |     print("\n指定返回2���结果：")
169 |     result2 = web_search.invoke({"query": "LangChain", "num_results": 2})
170 |     print(result2)
171 | 
172 | 
173 | # ============================================================================
174 | # 示例 5：工具绑定到模型（预览）
175 | # ============================================================================
176 | def example_5_bind_tools():
177 |     """
178 |     示例5：将工具绑定到模型
179 | 
180 |     这是让 AI 使用工具的第一步
181 |     """
182 |     print("\n" + "="*70)
183 |     print("示例 5：工具绑定到模型（预览）")
184 |     print("="*70)
185 | 
186 |     # 绑定工具到模型
187 |     model_with_tools = model.bind_tools([get_weather, calculator])
188 | 
189 |     print("模型已绑定工具：")
190 |     print("  - get_weather")
191 |     print("  - calculator")
192 | 
193 |     # 调用模型（模型可以选择使用工具）
194 |     print("\n测试：AI 是否会调用天气工具？")
195 |     response = model_with_tools.invoke("北京今天天气怎么样？")
196 | 
197 |     # 检查模型是否要求调用工具
198 |     if response.tool_calls:
199 |         print(f"\n✅ AI 决定使用工具！")
200 |         print(f"工具调用: {response.tool_calls}")
201 |     else:
202 |         print(f"\nℹ️ AI 直接回答（未使用工具）")
203 |         print(f"回复: {response.content}")
204 | 
205 |     print("\n💡 下一步：")
206 |     print("  在 05_simple_agent 中，我们将学习如何让 AI 自动执行工具")
207 | 
208 | 
209 | # ============================================================================
210 | # 示例 6：工具的最佳实践
211 | # ============================================================================
212 | def example_6_best_practices():
213 |     """
214 |     示例6：工具开发最佳实践
215 |     """
216 |     print("\n" + "="*70)
217 |     print("示例 6：工具开发最佳实践")
218 |     print("="*70)
219 | 
220 |     print("\n✅ 好的工具设计：")
221 |     print("""
222 | 1. 清晰的 docstring
223 |    @tool
224 |    def search_products(query: str, max_results: int = 10) -> str:
225 |        '''
226 |        在产品数据库中搜索产品
227 | 
228 |        参数:
229 |            query: 搜索关键词
230 |            max_results: 最大返回数量，默认10
231 | 
232 |        返回:
233 |            产品列表的JSON字符串
234 |        '''
235 | 
236 | 2. 明确的参数类型
237 |    - 使用类型注解：str, int, float, bool
238 |    - 可选参数用 Optional[类型]
239 | 
240 | 3. 返回字符串
241 |    - 工具应该返回 str（AI 最容易理解）
242 |    - 复杂数据可以返回 JSON 字符串
243 | 
244 | 4. 错误处理
245 |    - 在工具内部捕获异常
246 |    - 返回友好的错误消息
247 | 
248 | 5. 功能单一
249 |    - 一个工具做一件事
250 |    - 不要把多个功能塞进一个工具
251 |     """)
252 | 
253 | 
254 | # ============================================================================
255 | # 主程序
256 | # ============================================================================
257 | def main():
258 |     print("\n" + "="*70)
259 |     print(" LangChain 1.0 - 自定义工具")
260 |     print("="*70)
261 | 
262 |     try:
263 |         example_1_simple_tool()
264 |         input("\n按 Enter 继续...")
265 | 
266 |         example_2_tool_with_params()
267 |         input("\n按 Enter 继续...")
268 | 
269 |         example_3_multiple_params()
270 |         input("\n按 Enter 继续...")
271 | 
272 |         example_4_optional_params()
273 |         input("\n按 Enter 继续...")
274 | 
275 |         example_5_bind_tools()
276 |         input("\n按 Enter 继续...")
277 | 
278 |         example_6_best_practices()
279 | 
280 |         print("\n" + "="*70)
281 |         print(" 完成！")
282 |         print("="*70)
283 |         print("\n核心要点：")
284 |         print("  ✅ 使用 @tool 装饰器创建工具")
285 |         print("  ✅ 必须有清晰的 docstring")
286 |         print("  ✅ 参数要有类型注解")
287 |         print("  ✅ 工具返回字符串")
288 |         print("\n下一步：")
289 |         print("  05_simple_agent - 学习如何让 AI 自动使用工具")
290 | 
291 |     except KeyboardInterrupt:
292 |         print("\n\n程序中断")
293 |     except Exception as e:
294 |         print(f"\n错误: {e}")
295 |         import traceback
296 |         traceback.print_exc()
297 | 
298 | 
299 | if __name__ == "__main__":
300 |     main()
301 | 


--------------------------------------------------------------------------------
/phase2_practical/07_memory_basics/README.md:
--------------------------------------------------------------------------------
  1 | # 07 - Memory Basics (内存管理基础)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **内存 = Agent 记住对话历史的能力**
  6 | 
  7 | 默认情况下，每次调用 `agent.invoke()` 都是全新的开始，不记得之前的对话。使用 `InMemorySaver` 可以让 Agent 记住历史。
  8 | 
  9 | ## 基本用法
 10 | 
 11 | ### 没有内存（默认）
 12 | 
 13 | ```python
 14 | from langchain.agents import create_agent
 15 | 
 16 | agent = create_agent(model=model, tools=[])
 17 | 
 18 | # 第一轮
 19 | agent.invoke({"messages": [{"role": "user", "content": "我叫张三"}]})
 20 | 
 21 | # 第二轮 - 不记得第一轮！
 22 | response = agent.invoke({"messages": [{"role": "user", "content": "我叫什么？"}]})
 23 | # AI 会说"不知道"
 24 | ```
 25 | 
 26 | ### 添加内存
 27 | 
 28 | ```python
 29 | from langchain.agents import create_agent
 30 | from langgraph.checkpoint.memory import InMemorySaver
 31 | 
 32 | # 1. 创建 Agent 时添加 checkpointer
 33 | agent = create_agent(
 34 |     model=model,
 35 |     tools=[],
 36 |     checkpointer=InMemorySaver()  # 添加内存
 37 | )
 38 | 
 39 | # 2. 调用时指定 thread_id
 40 | config = {"configurable": {"thread_id": "conversation_1"}}
 41 | 
 42 | # 第一轮
 43 | agent.invoke(
 44 |     {"messages": [{"role": "user", "content": "我叫张三"}]},
 45 |     config=config
 46 | )
 47 | 
 48 | # 第二轮 - 记得第一轮！
 49 | response = agent.invoke(
 50 |     {"messages": [{"role": "user", "content": "我叫什么？"}]},
 51 |     config=config
 52 | )
 53 | # AI 会说"你叫张三"
 54 | ```
 55 | 
 56 | ## 关键参数
 57 | 
 58 | ### checkpointer
 59 | 
 60 | **作用**：为 Agent 添加内存管理能力
 61 | 
 62 | ```python
 63 | from langgraph.checkpoint.memory import InMemorySaver
 64 | 
 65 | agent = create_agent(
 66 |     model=model,
 67 |     tools=tools,
 68 |     checkpointer=InMemorySaver()  # InMemorySaver = 短期内存
 69 | )
 70 | ```
 71 | 
 72 | **注意**：
 73 | - `InMemorySaver` - ��存中保存（进程结束就丢失）
 74 | - 后续会学习持久化（SQLite、Postgres）
 75 | 
 76 | ### thread_id
 77 | 
 78 | **作用**：区分不同的会话
 79 | 
 80 | ```python
 81 | # 会话 1
 82 | config1 = {"configurable": {"thread_id": "user_alice"}}
 83 | agent.invoke({...}, config=config1)
 84 | 
 85 | # 会话 2（完全独立）
 86 | config2 = {"configurable": {"thread_id": "user_bob"}}
 87 | agent.invoke({...}, config=config2)
 88 | ```
 89 | 
 90 | **thread_id 的选择：**
 91 | - 聊天应用：使用用户 ID 或会话 ID
 92 | - 多轮任务：使用任务 ID
 93 | - 测试：使用描述性字符串（如 "test_1"）
 94 | 
 95 | ## 工作原理
 96 | 
 97 | ### 内存保存了什么？
 98 | 
 99 | ```python
100 | agent.invoke({"messages": [{"role": "user", "content": "你好"}]}, config)
101 | # InMemorySaver 保存：
102 | # {
103 | #     "thread_id": "xxx",
104 | #     "messages": [
105 | #         HumanMessage("你好"),
106 | #         AIMessage("你好！有什么可以帮助你的吗？")
107 | #     ]
108 | # }
109 | 
110 | agent.invoke({"messages": [{"role": "user", "content": "天气"}]}, config)
111 | # InMemorySaver 更新：
112 | # {
113 | #     "thread_id": "xxx",
114 | #     "messages": [
115 | #         HumanMessage("你好"),
116 | #         AIMessage("你好！有什么可以帮助你的吗？"),
117 | #         HumanMessage("天气"),
118 | #         AIMessage("...")
119 | #     ]
120 | # }
121 | ```
122 | 
123 | ### 自动追加历史
124 | 
125 | ```python
126 | # 你只需要传新消息
127 | agent.invoke(
128 |     {"messages": [{"role": "user", "content": "新问题"}]},
129 |     config
130 | )
131 | 
132 | # checkpointer 自动：
133 | # 1. 读取之前的历史
134 | # 2. 追加新消息
135 | # 3. 调用模型（传入完整历史）
136 | # 4. 保存新的历史
137 | ```
138 | 
139 | ## 多会话管理
140 | 
141 | ### 场景：多用户聊天
142 | 
143 | ```python
144 | agent = create_agent(
145 |     model=model,
146 |     tools=[],
147 |     checkpointer=InMemorySaver()
148 | )
149 | 
150 | # 用户 Alice
151 | config_alice = {"configurable": {"thread_id": "user_alice"}}
152 | agent.invoke({"messages": [...]}, config_alice)
153 | 
154 | # 用户 Bob
155 | config_bob = {"configurable": {"thread_id": "user_bob"}}
156 | agent.invoke({"messages": [...]}, config_bob)
157 | 
158 | # 两个会话完全独立
159 | ```
160 | 
161 | ### 场景：同一用户的不同任务
162 | 
163 | ```python
164 | # 任务 1：写代码
165 | config_task1 = {"configurable": {"thread_id": "task_coding"}}
166 | agent.invoke({"messages": [...]}, config_task1)
167 | 
168 | # 任务 2：写文档
169 | config_task2 = {"configurable": {"thread_id": "task_docs"}}
170 | agent.invoke({"messages": [...]}, config_task2)
171 | ```
172 | 
173 | ## 内存 + 工具
174 | 
175 | Agent 会记住工具调用的结果：
176 | 
177 | ```python
178 | @tool
179 | def search(query: str) -> str:
180 |     """搜索工具"""
181 |     return f"关于 {query} 的结果..."
182 | 
183 | agent = create_agent(
184 |     model=model,
185 |     tools=[search],
186 |     checkpointer=InMemorySaver()
187 | )
188 | 
189 | config = {"configurable": {"thread_id": "session_1"}}
190 | 
191 | # 第一轮：使用工具
192 | agent.invoke({"messages": [{"role": "user", "content": "搜索 Python"}]}, config)
193 | # Agent 调用 search("Python")
194 | 
195 | # 第二轮：引用之前的结果
196 | response = agent.invoke(
197 |     {"messages": [{"role": "user", "content": "刚才搜索的结果是什么？"}]},
198 |     config
199 | )
200 | # Agent 记得工具返回的结果，无需重新调用
201 | ```
202 | 
203 | ## 查看内存状态
204 | 
205 | ```
206 |   # 用户输入
207 |   {"role": "user", "content": "你好"}
208 |       ↓ 转换为
209 |   HumanMessage(content="你好")
210 | 
211 |   # AI 回复
212 |   {"role": "assistant", "content": "你好！"}
213 |       ↓ 转换为
214 |   AIMessage(content="你好！")
215 | 
216 |   # 系统指令
217 |   {"role": "system", "content": "你是助手"}
218 |       ↓ 转换为
219 |   SystemMessage(content="你是助手")
220 | 
221 | 
222 | ```
223 | 
224 | ```python
225 | response = agent.invoke({"messages": [...]}, config)
226 | 
227 | # 查看完整的对话历史
228 | print("消息数量:", len(response['messages']))
229 | 
230 | # 查看最近的消息
231 | for msg in response['messages'][-5:]:
232 |     print(f"{msg.__class__.__name__}: {msg.content}")
233 | ```
234 | 
235 | ## 常见问题
236 | 
237 | ### 1. 为什么 Agent 不记得？
238 | 
239 | **检查：**
240 | - ✅ 是否添加了 `checkpointer=InMemorySaver()`？
241 | - ✅ 是否传入了 `config` 参数？
242 | - ✅ 两次调用的 `thread_id` 是否相同？
243 | 
244 | ```python
245 | # ❌ 错误：没有 checkpointer
246 | agent = create_agent(model=model, tools=[])
247 | agent.invoke({...})  # 不会记住
248 | 
249 | # ❌ 错误：没有 config
250 | agent = create_agent(model=model, tools=[], checkpointer=InMemorySaver())
251 | agent.invoke({...})  # 不会记住
252 | 
253 | # ❌ 错误：thread_id 不同
254 | agent.invoke({...}, {"configurable": {"thread_id": "1"}})
255 | agent.invoke({...}, {"configurable": {"thread_id": "2"}})  # 不同会话
256 | 
257 | # ✅ 正确
258 | agent = create_agent(model=model, tools=[], checkpointer=InMemorySaver())
259 | config = {"configurable": {"thread_id": "1"}}
260 | agent.invoke({...}, config)
261 | agent.invoke({...}, config)  # 记得！
262 | ```
263 | 
264 | ### 2. InMemorySaver 会丢失数据吗？
265 | 
266 | **会！** InMemorySaver 只保存在内存中：
267 | - ✅ 同一进程内有效
268 | - ❌ 程序重启后丢失
269 | - ❌ 不同进程无法共享
270 | 
271 | **解决方案**：Module 09 会学习持久化（SQLite）
272 | 
273 | ### 3. 内存会无限增长吗？
274 | 
275 | **会！** 默认情况下，InMemorySaver 会保存所有消息。
276 | 
277 | **问题**：
278 | - 消息越来越多
279 | - 超过模型的 token 限制
280 | - 响应变慢、成本增加
281 | 
282 | **解决方案**：Module 08 会学习上下文管理（修剪、摘要）
283 | 
284 | ### 4. 如何清空某个会话的历史？
285 | 
286 | 目前 `InMemorySaver` 没有提供删除 API。
287 | 
288 | **临时方案**：
289 | - 使用新的 `thread_id`
290 | - 或重新创建 Agent
291 | 
292 | ## 实际应用场景
293 | 
294 | ### 1. 聊天机器人
295 | 
296 | ```python
297 | def handle_user_message(user_id: str, message: str):
298 |     config = {"configurable": {"thread_id": f"user_{user_id}"}}
299 | 
300 |     response = agent.invoke(
301 |         {"messages": [{"role": "user", "content": message}]},
302 |         config
303 |     )
304 | 
305 |     return response['messages'][-1].content
306 | ```
307 | 
308 | ### 2. 多轮任务助手
309 | 
310 | ```python
311 | def process_task(task_id: str, user_input: str):
312 |     config = {"configurable": {"thread_id": f"task_{task_id}"}}
313 | 
314 |     response = agent.invoke(
315 |         {"messages": [{"role": "user", "content": user_input}]},
316 |         config
317 |     )
318 | 
319 |     return response['messages'][-1].content
320 | ```
321 | 
322 | ### 3. 客服系统
323 | 
324 | ```python
325 | agent = create_agent(
326 |     model=model,
327 |     tools=[查询订单, 查询物流],
328 |     system_prompt="你是客服助手，记住用户的订单号",
329 |     checkpointer=InMemorySaver()
330 | )
331 | 
332 | def customer_service(session_id: str, message: str):
333 |     config = {"configurable": {"thread_id": session_id}}
334 |     response = agent.invoke(
335 |         {"messages": [{"role": "user", "content": message}]},
336 |         config
337 |     )
338 |     return response['messages'][-1].content
339 | ```
340 | 
341 | ## 运行示例
342 | 
343 | ```bash
344 | # 运行主程序
345 | python main.py
346 | 
347 | # 测试
348 | python test.py
349 | ```
350 | 
351 | ## 核心要点
352 | 
353 | 1. **默认无内存**：每次 `invoke` 是全新开始
354 | 2. **添加内存**：`checkpointer=InMemorySaver()`
355 | 3. **会话管理**：`config={"configurable": {"thread_id": "xxx"}}`
356 | 4. **自动保存**：checkpointer 自动管理历史
357 | 5. **多会话**：不同 thread_id = 不同会话
358 | 6. **记住工具**：也会记住工具调用结果
359 | 
360 | ## 限制
361 | 
362 | - ❌ 进程重启后丢失
363 | - ❌ 无限增长（需要管理上下文）
364 | - ❌ 不支持跨进程共享
365 | 
366 | ## 下一步
367 | 
368 | **08_context_management** - 学习如何管理上下文长度（修剪、摘要）
369 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/03_messages/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LangChain 1.0 - 消息类型与对话管理
  3 | ====================================
  4 | 
  5 | 本模块重点讲解：
  6 | 1. 三种消息类型的实际使用
  7 | 2. 对话历史管理（核心难点）
  8 | 3. 消息的修剪和优化
  9 | """
 10 | 
 11 | import os
 12 | from dotenv import load_dotenv
 13 | from langchain.chat_models import init_chat_model
 14 | from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 15 | 
 16 | load_dotenv()
 17 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 18 | 
 19 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 20 |     raise ValueError("请先设置 GROQ_API_KEY")
 21 | 
 22 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 23 | 
 24 | 
 25 | # ============================================================================
 26 | # 示例 1：三种消息类型
 27 | # ============================================================================
 28 | def example_1_message_types():
 29 |     """
 30 |     三种消息类型：SystemMessage, HumanMessage, AIMessage
 31 | 
 32 |     重点：字典格式 vs 消息对象（推荐用字典）
 33 |     """
 34 |     print("\n" + "="*70)
 35 |     print("示例 1：三种消息类型对比")
 36 |     print("="*70)
 37 | 
 38 |     # 方式 1：消息对象（啰嗦）
 39 |     print("\n【方式 1：消息对象】")
 40 |     messages_obj = [
 41 |         SystemMessage(content="你是 Python 导师"),
 42 |         HumanMessage(content="什么是列表？")
 43 |     ]
 44 |     response = model.invoke(messages_obj)
 45 |     print(f"回复: {response.content[:100]}...")
 46 | 
 47 |     # 方式 2：字典格式（推荐，简洁）
 48 |     print("\n【方式 2：字典格式（推荐）】")
 49 |     messages_dict = [
 50 |         {"role": "system", "content": "你是 Python 导师"},
 51 |         {"role": "user", "content": "什么是列表？"}
 52 |     ]
 53 |     response = model.invoke(messages_dict)
 54 |     print(f"回复: {response.content[:100]}...")
 55 | 
 56 |     print("\n💡 推荐：直接用字典，更简洁！")
 57 | 
 58 | 
 59 | # ============================================================================
 60 | # 示例 2：对话历史管理（核心难点）
 61 | # ============================================================================
 62 | def example_2_conversation_history():
 63 |     """
 64 |     难点：如何正确管理对话历史
 65 | 
 66 |     关键：每次调用都要传递完整历史！
 67 |     """
 68 |     print("\n" + "="*70)
 69 |     print("示例 2：对话历史管理（重点）")
 70 |     print("="*70)
 71 | 
 72 |     # 初始化对话历史
 73 |     conversation = [
 74 |         {"role": "system", "content": "你是一个简洁的助手，回答限制在50字内"}
 75 |     ]
 76 | 
 77 |     # 第一轮
 78 |     print("\n【第 1 轮】")
 79 |     conversation.append({"role": "user", "content": "什么是 Python？"})
 80 |     print(f"用户: {conversation[-1]['content']}")
 81 | 
 82 |     r1 = model.invoke(conversation)
 83 |     print(f"AI: {r1.content}")
 84 | 
 85 |     # 关键：保存 AI 回复到历史
 86 |     conversation.append({"role": "assistant", "content": r1.content})
 87 | 
 88 |     # 第二轮（测试记忆）
 89 |     print("\n【第 2 轮】")
 90 |     conversation.append({"role": "user", "content": "它有什么特点？"})
 91 |     print(f"用户: {conversation[-1]['content']}")
 92 | 
 93 |     r2 = model.invoke(conversation)
 94 |     print(f"AI: {r2.content}")
 95 | 
 96 |     conversation.append({"role": "assistant", "content": r2.content})
 97 | 
 98 |     # 第三轮（测试上下文）
 99 |     print("\n【第 3 轮】")
100 |     conversation.append({"role": "user", "content": "我第一个问题问的是什么？"})
101 |     print(f"用户: {conversation[-1]['content']}")
102 | 
103 |     r3 = model.invoke(conversation)
104 |     print(f"AI: {r3.content}")
105 | 
106 |     print(f"\n💡 对话历史共 {len(conversation)} 条消息")
107 |     print("   AI 记住了之前的内容，因为每次都传递了完整历史！")
108 | 
109 | 
110 | # ============================================================================
111 | # 示例 3：错误示范 - AI 失忆
112 | # ============================================================================
113 | def example_3_wrong_way():
114 |     """
115 |     错误示范：不保存对话历史
116 | 
117 |     结果：AI 会"失忆"
118 |     """
119 |     print("\n" + "="*70)
120 |     print("示例 3：错误示范 - AI 失忆")
121 |     print("="*70)
122 | 
123 |     print("\n❌ 错误做法：不保存历史")
124 | 
125 |     # 第一次
126 |     r1 = model.invoke("我叫张三")
127 |     print(f"用户: 我叫张三")
128 |     print(f"AI: {r1.content[:50]}...")
129 | 
130 |     # 第二次（没有传递历史）
131 |     r2 = model.invoke("我叫什么名字？")
132 |     print(f"\n用户: 我叫什么名字？")
133 |     print(f"AI: {r2.content[:80]}...")
134 |     print("\n❌ AI 不记得你叫张三！")
135 | 
136 | 
137 | # ============================================================================
138 | # 示例 4：对话历史的优化
139 | # ============================================================================
140 | def example_4_optimize_history():
141 |     """
142 |     难点：对话历史太长怎么办？
143 | 
144 |     解决方案：
145 |     1. 只保留最近 N 条
146 |     2. 总是保留 system 消息
147 |     """
148 |     print("\n" + "="*70)
149 |     print("示例 4：优化对话历史（避免太长）")
150 |     print("="*70)
151 | 
152 |     def keep_recent_messages(messages, max_pairs=3):
153 |         """
154 |         保留最近的 N 轮对话
155 | 
156 |         参数:
157 |             messages: 完整消息列表
158 |             max_pairs: 保留的对话轮数
159 | 
160 |         返回:
161 |             优化后的消息列表
162 |         """
163 |         # 分离 system 消息和对话消息
164 |         system_msgs = [m for m in messages if m.get("role") == "system"]
165 |         conversation_msgs = [m for m in messages if m.get("role") != "system"]
166 | 
167 |         # 只保留最近的消息（每轮 = user + assistant）
168 |         max_messages = max_pairs * 2
169 |         recent_msgs = conversation_msgs[-max_messages:]
170 | 
171 |         # 返回：system + 最近对话
172 |         return system_msgs + recent_msgs
173 | 
174 |     # 模拟长对话
175 |     long_conversation = [
176 |         {"role": "system", "content": "你是助手"},
177 |         {"role": "user", "content": "第1个问题"},
178 |         {"role": "assistant", "content": "第1个回答"},
179 |         {"role": "user", "content": "第2个问题"},
180 |         {"role": "assistant", "content": "第2个回答"},
181 |         {"role": "user", "content": "第3个问题"},
182 |         {"role": "assistant", "content": "第3个回答"},
183 |         {"role": "user", "content": "第4个问题"},
184 |         {"role": "assistant", "content": "第4个回答"},
185 |         {"role": "user", "content": "第5个问题"},
186 |     ]
187 | 
188 |     print(f"原始消息数: {len(long_conversation)}")
189 | 
190 |     # 优化：只保留最近 2 轮
191 |     optimized = keep_recent_messages(long_conversation, max_pairs=2)
192 |     print(f"优化后消息数: {len(optimized)}")
193 |     print(f"保留的内容: system + 最近2轮对话")
194 | 
195 |     # 使用优化后的历史
196 |     response = model.invoke(optimized)
197 |     print(f"\nAI 回复: {response.content[:100]}...")
198 | 
199 |     print("\n💡 技巧：对话太长时，只保留最近的几轮即可")
200 | 
201 | 
202 | # ============================================================================
203 | # 示例 5：实战 - 简单聊天机器人
204 | # ============================================================================
205 | def example_5_simple_chatbot():
206 |     """
207 |     实战：构建一个记住对话的聊天机器人
208 |     """
209 |     print("\n" + "="*70)
210 |     print("示例 5：实战 - 简单聊天机器人")
211 |     print("="*70)
212 | 
213 |     conversation = [
214 |         {"role": "system", "content": "你是一个友好的助手"}
215 |     ]
216 | 
217 |     questions = [
218 |         "我叫李明，今年25岁",
219 |         "我喜欢编程",
220 |         "我叫什么名字？",
221 |         "我今年多大？",
222 |         "我喜欢什么？"
223 |     ]
224 | 
225 |     for i, q in enumerate(questions, 1):
226 |         print(f"\n--- 第 {i} 轮 ---")
227 |         print(f"用户: {q}")
228 | 
229 |         conversation.append({"role": "user", "content": q})
230 |         response = model.invoke(conversation)
231 | 
232 |         print(f"AI: {response.content}")
233 |         conversation.append({"role": "assistant", "content": response.content})
234 | 
235 |     print(f"\n💡 总共 {len(conversation)} 条消息")
236 |     print("   AI 完美记住了所有信息！")
237 | 
238 | 
239 | # ============================================================================
240 | # 主程序
241 | # ============================================================================
242 | def main():
243 |     print("\n" + "="*70)
244 |     print(" LangChain 1.0 - 消息类型与对话管理")
245 |     print("="*70)
246 | 
247 |     try:
248 |         # example_1_message_types()
249 |         # input("\n按 Enter 继续...")
250 | 
251 |         # example_2_conversation_history()
252 |         # input("\n按 Enter 继续...")
253 | 
254 |         # example_3_wrong_way()
255 |         # input("\n按 Enter 继续...")
256 | 
257 |         # example_4_optimize_history()
258 |         # input("\n按 Enter 继续...")
259 | 
260 |         example_5_simple_chatbot()
261 | 
262 |         print("\n" + "="*70)
263 |         print(" 完成！")
264 |         print("="*70)
265 |         print("\n核心要点：")
266 |         print("  ✅ 推荐用字典格式，不用消息对象")
267 |         print("  ✅ 对话历史必须每次都传递完整的")
268 |         print("  ✅ 记得保存 AI 的回复到历史中")
269 |         print("  ✅ 历史太长时只保留最近几轮")
270 | 
271 |     except KeyboardInterrupt:
272 |         print("\n\n程序中断")
273 |     except Exception as e:
274 |         print(f"\n错误: {e}")
275 |         import traceback
276 |         traceback.print_exc()
277 | 
278 | 
279 | if __name__ == "__main__":
280 |     main()
281 | 


--------------------------------------------------------------------------------
/phase2_practical/11_structured_output/README.md:
--------------------------------------------------------------------------------
  1 | # 11 - Structured Output (结构化输出)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **Structured Output = 将 LLM 的自然语言输出转为结构化 Python 对象**
  6 | 
  7 | 在 LangChain 1.0 中，使用 `with_structured_output()` 方法结合 Pydantic 模型，可以确保 LLM 返回符合预定义模式的数据。
  8 | 
  9 | ## 基本用法
 10 | 
 11 | ### 定义 Pydantic 模型
 12 | 
 13 | ```python
 14 | from pydantic import BaseModel, Field
 15 | 
 16 | class Person(BaseModel):
 17 |     """人物信息"""
 18 |     name: str = Field(description="姓名")
 19 |     age: int = Field(description="年龄")
 20 |     occupation: str = Field(description="职业")
 21 | ```
 22 | 
 23 | ### 使用 with_structured_output()
 24 | 
 25 | ```python
 26 | from langchain.chat_models import init_chat_model
 27 | 
 28 | model = init_chat_model("groq:llama-3.3-70b-versatile")
 29 | 
 30 | # 创建结构化输出的 LLM
 31 | structured_llm = model.with_structured_output(Person)
 32 | 
 33 | # 调用
 34 | result = structured_llm.invoke("张三是一名 30 岁的软件工程师")
 35 | 
 36 | # result 是 Person 实例
 37 | print(result.name)       # "张三"
 38 | print(result.age)        # 30
 39 | print(result.occupation) # "软件工程师"
 40 | ```
 41 | 
 42 | ## 核心组件
 43 | 
 44 | ### 1. Pydantic BaseModel
 45 | 
 46 | 所有结构化输出的数据模型都必须继承 `BaseModel`：
 47 | 
 48 | ```python
 49 | from pydantic import BaseModel
 50 | 
 51 | class MyModel(BaseModel):
 52 |     field1: str
 53 |     field2: int
 54 | ```
 55 | 
 56 | ### 2. Field 描述
 57 | 
 58 | 使用 `Field()` 添加字段描述，帮助 LLM 理解：
 59 | 
 60 | ```python
 61 | from pydantic import Field
 62 | 
 63 | class Book(BaseModel):
 64 |     title: str = Field(description="书名")
 65 |     author: str = Field(description="作者")
 66 |     year: int = Field(description="出版年份")
 67 | ```
 68 | 
 69 | **重要**：`description` 会传递给 LLM，帮助它正确填充字段。
 70 | 
 71 | ### 3. 类型注解
 72 | 
 73 | Pydantic 支持丰富的类型：
 74 | 
 75 | ```python
 76 | from typing import Optional, List
 77 | 
 78 | class Product(BaseModel):
 79 |     name: str                    # 必填字符串
 80 |     price: float                 # 必填浮点数
 81 |     description: Optional[str]   # 可选字符串
 82 |     tags: List[str]              # 字符串列表
 83 | ```
 84 | 
 85 | ## 高级特性
 86 | 
 87 | ### 可选字段
 88 | 
 89 | ```python
 90 | class User(BaseModel):
 91 |     username: str
 92 |     email: Optional[str] = None  # 可以为 None
 93 |     age: Optional[int] = None
 94 | ```
 95 | 
 96 | ### 默认值
 97 | 
 98 | ```python
 99 | class Config(BaseModel):
100 |     timeout: int = 30         # 默认 30
101 |     retry: bool = True        # 默认 True
102 |     max_attempts: int = Field(3, description="最大重试次数")
103 | ```
104 | 
105 | ### 枚举类型
106 | 
107 | ```python
108 | from enum import Enum
109 | 
110 | class Priority(str, Enum):
111 |     LOW = "低"
112 |     MEDIUM = "中"
113 |     HIGH = "高"
114 | 
115 | class Task(BaseModel):
116 |     title: str
117 |     priority: Priority  # 只能是 LOW/MEDIUM/HIGH
118 | ```
119 | 
120 | ### 列表提取
121 | 
122 | ```python
123 | class Person(BaseModel):
124 |     name: str
125 |     age: int
126 | 
127 | class PeopleList(BaseModel):
128 |     people: List[Person]  # 多个 Person 对象
129 | 
130 | structured_llm = model.with_structured_output(PeopleList)
131 | result = structured_llm.invoke("张三 30岁，李四 25岁")
132 | # result.people = [Person(name="张三", age=30), Person(name="李四", age=25)]
133 | ```
134 | 
135 | ### 嵌套模型
136 | 
137 | ```python
138 | class Address(BaseModel):
139 |     city: str
140 |     district: str
141 | 
142 | class Company(BaseModel):
143 |     name: str
144 |     address: Address  # 嵌套模型
145 | 
146 | structured_llm = model.with_structured_output(Company)
147 | result = structured_llm.invoke("阿里巴巴在杭州滨江区")
148 | # result.address.city = "杭州"
149 | # result.address.district = "滨江区"
150 | ```
151 | 
152 | ## 工作原理
153 | 
154 | ### 传统方式 vs 结构化输出
155 | 
156 | **传统方式（繁琐）**：
157 | ```python
158 | # 1. 提示词要求 JSON
159 | prompt = "以JSON格式返回：{name, age, occupation}"
160 | response = model.invoke(prompt)
161 | 
162 | # 2. 手动解析
163 | import json
164 | data = json.loads(response.content)
165 | 
166 | # 3. 手动验证类型
167 | if not isinstance(data['age'], int):
168 |     raise ValueError("age must be int")
169 | 
170 | # 4. 手动创建对象
171 | person = Person(**data)
172 | ```
173 | 
174 | **结构化输出（简洁）**：
175 | ```python
176 | # 一步到位
177 | structured_llm = model.with_structured_output(Person)
178 | person = structured_llm.invoke("张三是一名 30 岁的软件工程师")
179 | # ✅ 自动解析、验证、创建对象
180 | ```
181 | 
182 | ### 幕后流程
183 | 
184 | ```
185 | 1. Pydantic 模型 → JSON Schema
186 |    Person → {
187 |      "type": "object",
188 |      "properties": {
189 |        "name": {"type": "string", "description": "姓名"},
190 |        "age": {"type": "integer", "description": "年龄"}
191 |      }
192 |    }
193 | 
194 | 2. JSON Schema → LLM (函数调用)
195 |    LLM 被强制返回符合 schema 的 JSON
196 | 
197 | 3. JSON → Pydantic 对象
198 |    自动验证类型并创建 Person 实例
199 | ```
200 | 
201 | ## 实际应用
202 | 
203 | ### 1. 客户信息提取
204 | 
205 | ```python
206 | class CustomerInfo(BaseModel):
207 |     name: str = Field(description="客户姓名")
208 |     phone: str = Field(description="电话号码")
209 |     email: Optional[str] = Field(None, description="邮箱")
210 |     issue: str = Field(description="问题描述")
211 | 
212 | structured_llm = model.with_structured_output(CustomerInfo)
213 | 
214 | conversation = """
215 | 客户: 我是李明，电话 138-1234-5678，订单没发货
216 | """
217 | 
218 | info = structured_llm.invoke(f"提取客户信息：{conversation}")
219 | # info.name = "李明"
220 | # info.phone = "138-1234-5678"
221 | # info.issue = "订单没发货"
222 | ```
223 | 
224 | **应用**：
225 | - 自动填充 CRM 系统
226 | - 工单自动分类
227 | - 客服辅助
228 | 
229 | ### 2. 产品评论分析
230 | 
231 | ```python
232 | class Review(BaseModel):
233 |     product: str
234 |     rating: int = Field(description="评分 1-5")
235 |     pros: List[str] = Field(description="优点列表")
236 |     cons: List[str] = Field(description="缺点列表")
237 | 
238 | structured_llm = model.with_structured_output(Review)
239 | 
240 | review = structured_llm.invoke("""
241 | iPhone 15 很棒！摄像头强大，手感好。但是价格贵，没有充电器。4分。
242 | """)
243 | 
244 | # review.product = "iPhone 15"
245 | # review.rating = 4
246 | # review.pros = ["摄像头强大", "手感好"]
247 | # review.cons = ["价格贵", "没有充电器"]
248 | ```
249 | 
250 | **应用**：
251 | - 批量处理用户评论
252 | - 自动生成分析报告
253 | - 发现产品改进点
254 | 
255 | ### 3. 文档信息提取
256 | 
257 | ```python
258 | class Invoice(BaseModel):
259 |     invoice_number: str
260 |     date: str
261 |     total_amount: float
262 |     items: List[str]
263 | 
264 | structured_llm = model.with_structured_output(Invoice)
265 | 
266 | invoice_text = """
267 | 发票号: INV-2024-001
268 | 日期: 2024-01-15
269 | 总金额: 1299.00
270 | 商品: MacBook Pro, AppleCare+
271 | """
272 | 
273 | invoice = structured_llm.invoke(f"提取发票信息：{invoice_text}")
274 | # invoice.invoice_number = "INV-2024-001"
275 | # invoice.total_amount = 1299.00
276 | ```
277 | 
278 | **应用**：
279 | - 自动化财务处理
280 | - OCR 后结构化
281 | - 数据录入
282 | 
283 | ## 常见问题
284 | 
285 | ### 1. LLM 未填充某些字段怎么办？
286 | 
287 | 使用 `Optional` 和默认值：
288 | 
289 | ```python
290 | class Data(BaseModel):
291 |     required_field: str              # 必填
292 |     optional_field: Optional[str] = None  # 可选
293 |     with_default: int = 100          # 有默认值
294 | ```
295 | 
296 | ### 2. 如何限制字段的可选值？
297 | 
298 | 使用枚举：
299 | 
300 | ```python
301 | from enum import Enum
302 | 
303 | class Status(str, Enum):
304 |     ACTIVE = "激活"
305 |     INACTIVE = "未激活"
306 | 
307 | class User(BaseModel):
308 |     status: Status  # 只能是 ACTIVE 或 INACTIVE
309 | ```
310 | 
311 | ### 3. 复杂嵌套结构会出错吗？
312 | 
313 | LLM 能力有限，建议：
314 | - 嵌套层级 ≤ 3 层
315 | - 使用清晰的 `description`
316 | - 必要时拆分成多个调用
317 | 
318 | ### 4. 如何验证提取的准确性？
319 | 
320 | 见下一章 `12_validation_retry` - 验证和重试机制。
321 | 
322 | ### 5. 所有模型都支持吗？
323 | 
324 | 大部分现代模型支持（通过函数调用）：
325 | - ✅ OpenAI (gpt-4, gpt-3.5-turbo)
326 | - ✅ Anthropic (claude-3)
327 | - ✅ Groq (llama-3)
328 | - ❌ 某些旧模型不支持
329 | 
330 | 如果不支持，LangChain 会回退到提示词 + JSON 解析。
331 | 
332 | ## 最佳实践
333 | 
334 | ```python
335 | # 1. 使用清晰的字段描述
336 | class Good(BaseModel):
337 |     created_at: str = Field(description="创建时间，格式 YYYY-MM-DD")
338 | 
339 | class Bad(BaseModel):
340 |     created_at: str  # 没有描述，LLM 可能格式错误
341 | 
342 | # 2. 合理使用 Optional
343 | class Good(BaseModel):
344 |     email: Optional[str] = None  # 邮箱可能没有
345 | 
346 | class Bad(BaseModel):
347 |     email: str  # 强制必填，可能导致提取失败
348 | 
349 | # 3. 使用枚举限制值
350 | class Good(BaseModel):
351 |     status: Status  # 枚举
352 | 
353 | class Bad(BaseModel):
354 |     status: str  # 可能返回任意字符串
355 | 
356 | # 4. 列表设置合理的描述
357 | class Good(BaseModel):
358 |     tags: List[str] = Field(description="产品标签，如 '电子产品', '手机'")
359 | 
360 | class Bad(BaseModel):
361 |     tags: List[str]  # LLM 不知道该提取什么
362 | 
363 | # 5. 嵌套模型保持简单
364 | class Good(BaseModel):
365 |     user: User      # 1 层嵌套
366 |     settings: dict  # 复杂数据用 dict
367 | 
368 | class Bad(BaseModel):
369 |     user: User
370 |         company: Company
371 |             address: Address
372 |                 country: Country  # 4 层嵌套，容易出错
373 | ```
374 | 
375 | ## 核心要点
376 | 
377 | 1. **with_structured_output(Model)** - 将 LLM 输出转为 Pydantic 对象
378 | 2. **Pydantic BaseModel** - 定义数据模式
379 | 3. **Field(description=...)** - 帮助 LLM 理解字段含义
380 | 4. **Optional[T]** - 可选字段
381 | 5. **List[T]** - 列表类型
382 | 6. **Enum** - 限制可选值
383 | 7. **嵌套模型** - 处理复杂结构（≤3 层）
384 | 8. **自动验证** - Pydantic 自动检查类型
385 | 
386 | ## 下一步
387 | 
388 | **12_validation_retry** - 学习如何验证提取结果并处理错误重试
389 | 


--------------------------------------------------------------------------------
/phase2_practical/10_middleware_basics/README.md:
--------------------------------------------------------------------------------
  1 | # 10 - Middleware Basics (中间件基础)
  2 | 
  3 | ## 核心概念
  4 | 
  5 | **Middleware（中间件）= Agent 执行过程中的钩子函数**
  6 | 
  7 | 在 LangChain 1.0 中，中间件是处理 Agent 生命周期的标准方式。
  8 | 
  9 | ## 基本用法
 10 | 
 11 | ### 创建自定义中间件
 12 | 
 13 | ```python
 14 | from langchain.agents.middleware import AgentMiddleware
 15 | 
 16 | class MyMiddleware(AgentMiddleware):
 17 |     def before_model(self, state, runtime):
 18 |         """模型调用前执行"""
 19 |         print("准备调用模型")
 20 |         return None  # 返回 None 表示继续正常流程
 21 | 
 22 |     def after_model(self, state, runtime):
 23 |         """模型响应后执行"""
 24 |         print("模型已响应")
 25 |         return None  # 返回 None 表示不修改状态
 26 | 
 27 | # 使用中间件
 28 | agent = create_agent(
 29 |     model=model,
 30 |     tools=[],
 31 |     middleware=[MyMiddleware()]
 32 | )
 33 | ```
 34 | 
 35 | ## 核心钩子方法
 36 | 
 37 | ### 1. before_model（模型调用前）
 38 | 
 39 | ```python
 40 | def before_model(self, state, runtime):
 41 |     """
 42 |     在模型调用前执行
 43 | 
 44 |     返回值：
 45 |     - None: 继续正常流程
 46 |     - dict: 更新状态（如 {"messages": [...]}）
 47 |     - {"jump_to": "..."}: 跳过正常流程
 48 |     """
 49 |     messages = state.get('messages', [])
 50 |     print(f"当前消息数: {len(messages)}")
 51 |     return None
 52 | ```
 53 | 
 54 | **用途**：
 55 | - 消息修剪（trim messages）
 56 | - PII 脱敏
 57 | - 输入验证
 58 | - 条件路由
 59 | 
 60 | ### 2. after_model（模型响应后）
 61 | 
 62 | ```python
 63 | def after_model(self, state, runtime):
 64 |     """
 65 |     在模型响应后执行
 66 | 
 67 |     返回值：
 68 |     - None: 不修改状态
 69 |     - dict: 更新状态
 70 |     """
 71 |     # 统计调用次数
 72 |     count = state.get("call_count", 0)
 73 |     return {"call_count": count + 1}
 74 | ```
 75 | 
 76 | **用途**：
 77 | - 输出验证
 78 | - 格式化响应
 79 | - 统计信息
 80 | - 状态更新
 81 | 
 82 | ## 返回值的作用
 83 | 
 84 | ### 返回 None
 85 | ```python
 86 | def before_model(self, state, runtime):
 87 |     print("日志记录")
 88 |     return None  # 不做任何修改，继续流程
 89 | ```
 90 | 
 91 | ### 返回字典（更新状态）
 92 | ```python
 93 | def after_model(self, state, runtime):
 94 |     count = state.get("count", 0)
 95 |     return {"count": count + 1}  # 更新状态中的 count
 96 | ```
 97 | 
 98 | ### 返回 jump_to（控制流程）
 99 | ```python
100 | def before_model(self, state, runtime):
101 |     if state.get("count", 0) > 10:
102 |         return {"jump_to": "__end__"}  # 跳过模型，直接结束
103 |     return None
104 | ```
105 | 
106 | **jump_to 目标**：
107 | - `"__end__"` - 结束 Agent
108 | - `"tools"` - 跳到工具节点
109 | - 其他自定义节点
110 | 
111 | ## 执行顺序（重要！）
112 | 
113 | ```python
114 | agent = create_agent(
115 |     model=model,
116 |     middleware=[Middleware1(), Middleware2(), Middleware3()]
117 | )
118 | ```
119 | 
120 | **执行流程**：
121 | ```
122 | 1. Middleware1.before_model   ↓ 正序
123 | 2. Middleware2.before_model   ↓
124 | 3. Middleware3.before_model   ↓
125 | 
126 |    [模型调用]
127 | 
128 | 6. Middleware3.after_model    ↑ 逆序
129 | 5. Middleware2.after_model    ↑
130 | 4. Middleware1.after_model    ↑
131 | ```
132 | 
133 | **类似洋葱模型**：外层先进后出
134 | 
135 | ## 实际应用
136 | 
137 | ### 1. 日志中间件
138 | 
139 | ```python
140 | class LoggingMiddleware(AgentMiddleware):
141 |     def before_model(self, state, runtime):
142 |         print(f"[日志] 消息数: {len(state.get('messages', []))}")
143 |         return None
144 | 
145 |     def after_model(self, state, runtime):
146 |         last_msg = state.get('messages', [])[-1]
147 |         print(f"[日志] 响应类型: {last_msg.__class__.__name__}")
148 |         return None
149 | ```
150 | 
151 | ### 2. 计数中间件
152 | 
153 | ```python
154 | class CallCounterMiddleware(AgentMiddleware):
155 |     def after_model(self, state, runtime):
156 |         count = state.get("model_call_count", 0)
157 |         return {"model_call_count": count + 1}
158 | 
159 | # 需要 checkpointer 来保存自定义状态
160 | agent = create_agent(
161 |     model=model,
162 |     middleware=[CallCounterMiddleware()],
163 |     checkpointer=InMemorySaver()
164 | )
165 | ```
166 | 
167 | ### 3. 消息修剪中间件
168 | 
169 | ```python
170 | class MessageTrimmerMiddleware(AgentMiddleware):
171 |     def __init__(self, max_messages=5):
172 |         super().__init__()
173 |         self.max_messages = max_messages
174 | 
175 |     def before_model(self, state, runtime):
176 |         messages = state.get('messages', [])
177 |         if len(messages) > self.max_messages:
178 |             # 只保留最近的 N 条消息
179 |             return {"messages": messages[-self.max_messages:]}
180 |         return None
181 | ```
182 | 
183 | ### 4. 输出验证中间件
184 | 
185 | ```python
186 | class OutputValidationMiddleware(AgentMiddleware):
187 |     def after_model(self, state, runtime):
188 |         last_msg = state.get('messages', [])[-1]
189 |         content = getattr(last_msg, 'content', '')
190 | 
191 |         if len(content) > 1000:
192 |             print("[警告] 响应过长")
193 | 
194 |         return None
195 | ```
196 | 
197 | ### 5. 限流中间件
198 | 
199 | ```python
200 | class MaxCallsMiddleware(AgentMiddleware):
201 |     def __init__(self, max_calls=10):
202 |         super().__init__()
203 |         self.max_calls = max_calls
204 | 
205 |     def before_model(self, state, runtime):
206 |         count = state.get("call_count", 0)
207 |         if count >= self.max_calls:
208 |             return {"jump_to": "__end__"}  # 达到限制，直接结束
209 |         return None
210 | 
211 |     def after_model(self, state, runtime):
212 |         count = state.get("call_count", 0)
213 |         return {"call_count": count + 1}
214 | ```
215 | 
216 | ## 内置中间件
217 | 
218 | ### SummarizationMiddleware（自动摘要）
219 | 
220 | ```python
221 | from langchain.agents.middleware import SummarizationMiddleware
222 | 
223 | agent = create_agent(
224 |     model=model,
225 |     middleware=[
226 |         SummarizationMiddleware(
227 |             model="groq:llama-3.1-8b-instant",  # 可用便宜模型
228 |             max_tokens_before_summary=500
229 |         )
230 |     ],
231 |     checkpointer=InMemorySaver()
232 | )
233 | ```
234 | 
235 | **作用**：
236 | - 消息超过 token 限制时自动摘要
237 | - 保留最近消息 + 旧消息摘要
238 | - 详见 08_context_management 章节
239 | 
240 | ### HumanInTheLoopMiddleware（人工审核）
241 | 
242 | ```python
243 | from langchain.agents.middleware import HumanInTheLoopMiddleware
244 | 
245 | agent = create_agent(
246 |     model=model,
247 |     tools=[send_email],
248 |     middleware=[
249 |         HumanInTheLoopMiddleware(
250 |             interrupt_on={"send_email": True}  # 调用此工具前暂停
251 |         )
252 |     ]
253 | )
254 | ```
255 | 
256 | ### PIIMiddleware（敏感信息处理）
257 | 
258 | ```python
259 | from langchain.agents.middleware import PIIMiddleware
260 | 
261 | agent = create_agent(
262 |     model=model,
263 |     middleware=[
264 |         PIIMiddleware("email", strategy="redact"),      # 邮箱脱敏
265 |         PIIMiddleware("phone_number", strategy="block") # 电话拦截
266 |     ]
267 | )
268 | ```
269 | 
270 | ## 常见问题
271 | 
272 | ### 1. 中间件能访问工具调用吗？
273 | 
274 | 不能直接访问。`before_model` 和 `after_model` 只在模型节点执行。
275 | 
276 | 如果需要拦截工具调用，使用 `wrap_tool_call`（高级特性）。
277 | 
278 | ### 2. 多个中间件的顺序重要吗？
279 | 
280 | **非常重要！**
281 | 
282 | ```python
283 | middleware=[
284 |     TrimmerMiddleware(),     # 1. 先修剪消息
285 |     SummarizationMiddleware(), # 2. 再摘要
286 |     LoggingMiddleware()      # 3. 最后记录日志
287 | ]
288 | ```
289 | 
290 | - `before_model` 按列表顺序执行
291 | - `after_model` 按列表逆序执行
292 | 
293 | ### 3. 修改状态需要 checkpointer 吗？
294 | 
295 | **自定义状态需要，messages 不需要**：
296 | 
297 | ```python
298 | # 不需要 checkpointer（messages 自动保存）
299 | def after_model(self, state, runtime):
300 |     return {"messages": [...]}
301 | 
302 | # 需要 checkpointer（自定义字段）
303 | def after_model(self, state, runtime):
304 |     return {"my_custom_field": 123}
305 | ```
306 | 
307 | ### 4. 能在中间件里调用另一个模型吗？
308 | 
309 | 可以，但要小心：
310 | 
311 | ```python
312 | class ValidationMiddleware(AgentMiddleware):
313 |     def __init__(self):
314 |         self.validator_model = init_chat_model(...)
315 | 
316 |     def after_model(self, state, runtime):
317 |         # 用另一个模型验证输出
318 |         last_msg = state['messages'][-1]
319 |         validation_result = self.validator_model.invoke(...)
320 |         return None
321 | ```
322 | 
323 | ## 最佳实践
324 | 
325 | ```python
326 | # 1. 生产环境推荐配置
327 | agent = create_agent(
328 |     model=model,
329 |     tools=[...],
330 |     middleware=[
331 |         MessageTrimmerMiddleware(max_messages=20),  # 限制消息数
332 |         SummarizationMiddleware(model=..., max_tokens=2000), # 自动摘要
333 |         LoggingMiddleware(),  # 日志记录
334 |     ],
335 |     checkpointer=SqliteSaver.from_conn_string("...")
336 | )
337 | 
338 | # 2. 开发环境
339 | agent = create_agent(
340 |     model=model,
341 |     tools=[...],
342 |     middleware=[
343 |         LoggingMiddleware(),  # 只要日志
344 |     ]
345 | )
346 | 
347 | # 3. 测试环境
348 | agent = create_agent(
349 |     model=model,
350 |     tools=[...],
351 |     middleware=[
352 |         MaxCallsMiddleware(max_calls=5),  # 防止测试费用爆炸
353 |     ]
354 | )
355 | ```
356 | 
357 | ## 核心要点
358 | 
359 | 1. **中间件** = Agent 生命周期钩子
360 | 2. **before_model** - 模型调用前（正序执行）
361 | 3. **after_model** - 模型响应后（逆序执行）
362 | 4. **返回 None** - 不修改状态
363 | 5. **返回 dict** - 更新状态
364 | 6. **返回 {"jump_to": "..."}** - 控制流程
365 | 7. **顺序重要** - 类似洋葱模型
366 | 8. **内置中间件** - SummarizationMiddleware 最常用
367 | 
368 | ## 下一步
369 | 
370 | **11_structured_output** - 学习如何使用 Pydantic 获取结构化输出
371 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/02_prompt_templates/examples/template_library.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 可复用的提示词模板库
  3 | ====================
  4 | 
  5 | 这个文件包含常用的、经过优化的提示词模板
  6 | 可以直接在项目中导入使用
  7 | 
  8 | 使用方法：
  9 |     from examples.template_library import TemplateLibrary
 10 | 
 11 |     messages = TemplateLibrary.TRANSLATOR.format_messages(
 12 |         source_lang="英语",
 13 |         target_lang="中文",
 14 |         text="Hello World"
 15 |     )
 16 | """
 17 | 
 18 | from langchain_core.prompts import ChatPromptTemplate
 19 | 
 20 | 
 21 | class TemplateLibrary:
 22 |     """可复用的提示词模板库"""
 23 | 
 24 |     # ========================================================================
 25 |     # 翻译类模板
 26 |     # ========================================================================
 27 | 
 28 |     TRANSLATOR = ChatPromptTemplate.from_messages([
 29 |         ("system",
 30 |          "你是一个专业的翻译专家，精通{source_lang}和{target_lang}。\n"
 31 |          "翻译要求：\n"
 32 |          "1. 准确传达原文意思\n"
 33 |          "2. 符合目标语言习惯\n"
 34 |          "3. 保持原文风格和语气"),
 35 |         ("user", "请将以下{source_lang}文本翻译成{target_lang}：\n\n{text}")
 36 |     ])
 37 |     """
 38 |     翻译模板
 39 | 
 40 |     变量：
 41 |         source_lang: 源语言（如：英语、中文）
 42 |         target_lang: 目标语言
 43 |         text: 要翻译的文本
 44 | 
 45 |     示例：
 46 |         messages = TRANSLATOR.format_messages(
 47 |             source_lang="英语",
 48 |             target_lang="中文",
 49 |             text="Hello, how are you?"
 50 |         )
 51 |     """
 52 | 
 53 |     # ========================================================================
 54 |     # 编程类模板
 55 |     # ========================================================================
 56 | 
 57 |     CODE_GENERATOR = ChatPromptTemplate.from_messages([
 58 |         ("system",
 59 |          "你是一个经验丰富的{language}开发者。\n"
 60 |          "代码要求：\n"
 61 |          "1. 遵循{language}最佳实践\n"
 62 |          "2. 添加必要的注释\n"
 63 |          "3. 代码简洁、可读性强"),
 64 |         ("user",
 65 |          "请用{language}编写代码实现以下功能：\n\n{description}\n\n"
 66 |          "附加要求：{requirements}")
 67 |     ])
 68 |     """代码生成模板"""
 69 | 
 70 |     CODE_REVIEWER = ChatPromptTemplate.from_messages([
 71 |         ("system",
 72 |          "你是一个资深的{language}代码审查专家。\n"
 73 |          "审查重点：{focus}\n"
 74 |          "请提供：\n"
 75 |          "1. 代码质量评分（1-10分）\n"
 76 |          "2. 主要问题和改进建议\n"
 77 |          "3. 优化后的代码（如有必要）"),
 78 |         ("user",
 79 |          "请审查以下{language}代码：\n\n"
 80 |          "```{language}\n{code}\n```")
 81 |     ])
 82 |     """代码审查模板"""
 83 | 
 84 |     CODE_EXPLAINER = ChatPromptTemplate.from_messages([
 85 |         ("system",
 86 |          "你是一个{language}编程导师，擅长用{style}的方式解释代码。"),
 87 |         ("user",
 88 |          "请解释以下{language}代码的功能和实现原理：\n\n"
 89 |          "```{language}\n{code}\n```")
 90 |     ])
 91 |     """代码解释模板"""
 92 | 
 93 |     DEBUG_HELPER = ChatPromptTemplate.from_messages([
 94 |         ("system",
 95 |          "你是一个调试专家，擅长分析和解决{language}代码问题。"),
 96 |         ("user",
 97 |          "我的{language}代码遇到了以下错误：\n\n"
 98 |          "错误信息：\n{error_message}\n\n"
 99 |          "代码：\n```{language}\n{code}\n```\n\n"
100 |          "请帮我：\n1. 分析错误原因\n2. 提供解决方案\n3. 给出修正后的代码")
101 |     ])
102 |     """调试助手模板"""
103 | 
104 |     # ========================================================================
105 |     # 内容创作类模板
106 |     # ========================================================================
107 | 
108 |     SUMMARIZER = ChatPromptTemplate.from_messages([
109 |         ("system",
110 |          "你是一个内容摘要专家，擅长提取关键信息。\n"
111 |          "摘要要求：\n"
112 |          "1. 保留最重要的信息\n"
113 |          "2. 简洁明了\n"
114 |          "3. 条理清晰"),
115 |         ("user",
116 |          "请将以下内容总结为{num_points}个要点：\n\n{content}")
117 |     ])
118 |     """内容摘要模板"""
119 | 
120 |     ARTICLE_WRITER = ChatPromptTemplate.from_messages([
121 |         ("system",
122 |          "你是一个专业的{field}领域作家。\n"
123 |          "写作风格：{style}\n"
124 |          "目标读者：{audience}"),
125 |         ("user",
126 |          "请写一篇关于{topic}的文章。\n"
127 |          "要求：\n"
128 |          "1. 字数：{word_count}字左右\n"
129 |          "2. 结构：{structure}\n"
130 |          "3. 重点：{focus}")
131 |     ])
132 |     """文章写作模板"""
133 | 
134 |     EMAIL_WRITER = ChatPromptTemplate.from_messages([
135 |         ("system",
136 |          "你是一个专业的商务邮件撰写专家。\n"
137 |          "邮件风格：{tone}\n"
138 |          "收件人类型：{recipient_type}"),
139 |         ("user",
140 |          "请帮我写一封邮件：\n"
141 |          "目的：{purpose}\n"
142 |          "关键内容：{key_points}")
143 |     ])
144 |     """邮件撰写模板"""
145 | 
146 |     # ========================================================================
147 |     # 教育类模板
148 |     # ========================================================================
149 | 
150 |     TUTOR = ChatPromptTemplate.from_messages([
151 |         ("system",
152 |          "你是一个{subject}导师。\n"
153 |          "学生水平：{level}\n"
154 |          "教学风格：{teaching_style}"),
155 |         ("user", "{question}")
156 |     ])
157 |     """教学辅导模板"""
158 | 
159 |     QUIZ_GENERATOR = ChatPromptTemplate.from_messages([
160 |         ("system",
161 |          "你是一个{subject}测验题目生成专家。\n"
162 |          "难度级别：{difficulty}\n"
163 |          "题目类型：{question_type}"),
164 |         ("user",
165 |          "请生成{num}道关于{topic}的{question_type}题目。\n"
166 |          "要求：\n"
167 |          "1. 覆盖关键知识点\n"
168 |          "2. 难度适中\n"
169 |          "3. 提供标准答案")
170 |     ])
171 |     """测验生成模板"""
172 | 
173 |     # ========================================================================
174 |     # 商务类模板
175 |     # ========================================================================
176 | 
177 |     PRODUCT_DESCRIPTION = ChatPromptTemplate.from_messages([
178 |         ("system",
179 |          "你是一个专业的产品文案撰写专家。\n"
180 |          "写作风格：{style}\n"
181 |          "目标客户：{target_audience}"),
182 |         ("user",
183 |          "请为以下产品撰写描述：\n"
184 |          "产品名称：{product_name}\n"
185 |          "核心卖点：{key_features}\n"
186 |          "字数要求：{word_count}字")
187 |     ])
188 |     """产品描述模板"""
189 | 
190 |     MARKET_ANALYSIS = ChatPromptTemplate.from_messages([
191 |         ("system",
192 |          "你是一个{industry}行业的市场分析专家。\n"
193 |          "分析维度：{dimensions}"),
194 |         ("user",
195 |          "请分析{topic}的市场情况，重点关注：\n{focus_areas}")
196 |     ])
197 |     """市场分析模板"""
198 | 
199 |     # ========================================================================
200 |     # 客户服务类模板
201 |     # ========================================================================
202 | 
203 |     CUSTOMER_SUPPORT = ChatPromptTemplate.from_messages([
204 |         ("system",
205 |          "你是{company}的客服专员。\n"
206 |          "服务态度：友好、专业、耐心\n"
207 |          "回复风格：{tone}\n"
208 |          "可用操作：{available_actions}"),
209 |         ("user", "{customer_message}")
210 |     ])
211 |     """客户服务模板"""
212 | 
213 |     FAQ_RESPONDER = ChatPromptTemplate.from_messages([
214 |         ("system",
215 |          "你是一个FAQ问答专家。\n"
216 |          "知识库：{knowledge_base}\n"
217 |          "如果问题不在知识库中，请礼貌地说明无法回答。"),
218 |         ("user", "{question}")
219 |     ])
220 |     """FAQ回答模板"""
221 | 
222 |     # ========================================================================
223 |     # 数据分析类模板
224 |     # ========================================================================
225 | 
226 |     DATA_ANALYZER = ChatPromptTemplate.from_messages([
227 |         ("system",
228 |          "你是一个数据分析专家，擅长{analysis_type}分析。\n"
229 |          "分析工具：{tools}"),
230 |         ("user",
231 |          "请分析以下数据：\n{data}\n\n"
232 |          "分析要求：\n{requirements}")
233 |     ])
234 |     """数据分析模板"""
235 | 
236 |     REPORT_GENERATOR = ChatPromptTemplate.from_messages([
237 |         ("system",
238 |          "你是一个{report_type}报告撰写专家。\n"
239 |          "报告受众：{audience}"),
240 |         ("user",
241 |          "请基于以下信息生成报告：\n{information}\n\n"
242 |          "报告结构：{structure}")
243 |     ])
244 |     """报告生成模板"""
245 | 
246 | 
247 | # ============================================================================
248 | # 使用示例
249 | # ============================================================================
250 | 
251 | if __name__ == "__main__":
252 |     """测试模板库"""
253 | 
254 |     print("="*70)
255 |     print(" 提示词模板库示例")
256 |     print("="*70)
257 | 
258 |     # 示例 1：翻译模板
259 |     print("\n【示例 1：翻译模板】")
260 |     messages = TemplateLibrary.TRANSLATOR.format_messages(
261 |         source_lang="英语",
262 |         target_lang="中文",
263 |         text="Hello, how are you today?"
264 |     )
265 |     print("生成的消息：")
266 |     for msg in messages:
267 |         print(f"  {msg.type}: {msg.content[:50]}...")
268 | 
269 |     # 示例 2：代码生成模板
270 |     print("\n【示例 2：代码生成模板】")
271 |     messages = TemplateLibrary.CODE_GENERATOR.format_messages(
272 |         language="Python",
273 |         description="计算斐波那契数列的第n项",
274 |         requirements="使用递归实现，添加类型注解"
275 |     )
276 |     print("生成的消息：")
277 |     for msg in messages:
278 |         print(f"  {msg.type}: {msg.content[:80]}...")
279 | 
280 |     # 示例 3：摘要模板
281 |     print("\n【示例 3：摘要模板】")
282 |     messages = TemplateLibrary.SUMMARIZER.format_messages(
283 |         num_points=3,
284 |         content="Python 是一种高级编程语言，以其简洁的语法和强大的功能而闻名..."
285 |     )
286 |     print("生成的消息：")
287 |     for msg in messages:
288 |         print(f"  {msg.type}: {msg.content[:80]}...")
289 | 
290 |     print("\n" + "="*70)
291 |     print(" 提示：在实际项目中，直接导入使用这些模板")
292 |     print(" from examples.template_library import TemplateLibrary")
293 |     print("="*70)
294 | 


--------------------------------------------------------------------------------
/phase1_fundamentals/05_simple_agent/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LangChain 1.0 - Simple Agent (使用 create_agent)
  3 | =============================================
  4 | 
  5 | 本模块重点讲解：
  6 | 1. 使用 create_agent 创建 Agent（LangChain 1.0 统一API）
  7 | 2. Agent 自动决定何时使用工具
  8 | 3. Agent 执行循环的工作原理
  9 | """
 10 | 
 11 | import os
 12 | import sys
 13 | 
 14 | # 添加父目录到路径以导入工具
 15 | parent_dir = os.path.dirname(os.path.dirname(__file__))
 16 | sys.path.insert(0, os.path.join(parent_dir, '04_custom_tools', 'tools'))
 17 | 
 18 | from dotenv import load_dotenv
 19 | from langchain.chat_models import init_chat_model
 20 | from langchain.agents import create_agent  # LangChain 1.0 统一 API
 21 | 
 22 | # 导入自定义工具
 23 | from weather import get_weather
 24 | from calculator import calculator
 25 | from web_search import web_search
 26 | 
 27 | load_dotenv()
 28 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 29 | 
 30 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 31 |     raise ValueError("请先设置 GROQ_API_KEY")
 32 | 
 33 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 34 | 
 35 | 
 36 | # ============================================================================
 37 | # 示例 1：创建第一个 Agent
 38 | # ============================================================================
 39 | def example_1_basic_agent():
 40 |     """
 41 |     示例1：创建最简单的 Agent
 42 | 
 43 |     关键：
 44 |     1. 使用 create_agent 函数
 45 |     2. 传入 model 和 tools
 46 |     3. Agent 会自动决定是否使用工具
 47 |     """
 48 |     print("\n" + "="*70)
 49 |     print("示例 1：创建第一个 Agent")
 50 |     print("="*70)
 51 | 
 52 |     # 创建 Agent
 53 |     agent = create_agent(
 54 |         model=model,
 55 |         tools=[get_weather]  # 只给一个工具
 56 |     )
 57 | 
 58 |     print("\nAgent 创建成功！")
 59 |     print("配置的工具：get_weather")
 60 | 
 61 |     # 测试：需要工具的问题
 62 |     print("\n测试1：询问天气（需要工具）")
 63 |     response = agent.invoke({
 64 |         "messages": [{"role": "user", "content": "北京今天天气怎么样？"}]
 65 |     })
 66 | 
 67 |     print(f"\nAgent 回复：{response['messages'][-1].content}")
 68 | 
 69 |     # 测试：不需要工具的问题
 70 |     print("\n测试2：普通问题（不需要工具）")
 71 |     response = agent.invoke({
 72 |         "messages": [{"role": "user", "content": "你好，介绍一下你自己"}]
 73 |     })
 74 | 
 75 |     print(f"\nAgent 回复：{response['messages'][-1].content}")
 76 | 
 77 |     print("\n关键点：")
 78 |     print("  - Agent 自动判断是否需要使用工具")
 79 |     print("  - 需要工具时：调用工具 → 获取结果 → 生成回答")
 80 |     print("  - 不需要时：直接回答")
 81 | 
 82 | 
 83 | # ============================================================================
 84 | # 示例 2：多工具 Agent
 85 | # ============================================================================
 86 | def example_2_multi_tool_agent():
 87 |     """
 88 |     示例2：配置多个工具的 Agent
 89 | 
 90 |     Agent 会根据问题选择合适的工具
 91 |     """
 92 |     print("\n" + "="*70)
 93 |     print("示例 2：多工具 Agent")
 94 |     print("="*70)
 95 | 
 96 |     # 创建配置多个工具的 Agent
 97 |     agent = create_agent(
 98 |         model=model,
 99 |         tools=[get_weather, calculator, web_search]
100 |     )
101 | 
102 |     print("\n配置的工具：")
103 |     print("  - get_weather（天气查询）")
104 |     print("  - calculator（计算器）")
105 |     print("  - web_search（网页搜索）")
106 | 
107 |     # 测试不同类型的问题
108 |     tests = [
109 |         "上海的天气怎么样？",           # 应该用 get_weather
110 |         "15 乘以 23 等于多少？",         # 应该用 calculator
111 |     ]
112 | 
113 |     for i, question in enumerate(tests, 1):
114 |         print(f"\n{'='*70}")
115 |         print(f"测试 {i}：{question}")
116 |         print(f"{'='*70}")
117 | 
118 |         response = agent.invoke({
119 |             "messages": [{"role": "user", "content": question}]
120 |         })
121 | 
122 |         # 显示最终回答
123 |         print(f"\nAgent 回复：{response['messages'][-1].content}")
124 | 
125 |     print("\n关键点：")
126 |     print("  - Agent 从多个工具中选择最合适的")
127 |     print("  - 基于工具的 docstring 理解工具用途")
128 | 
129 | 
130 | # ============================================================================
131 | # 示例 3：带系统提示的 Agent
132 | # ============================================================================
133 | def example_3_agent_with_system_prompt():
134 |     """
135 |     示例3：自定义 Agent 的行为
136 | 
137 |     使用 system_prompt 参数
138 |     """
139 |     print("\n" + "="*70)
140 |     print("示例 3：自定义 Agent 行为")
141 |     print("="*70)
142 | 
143 |     # 创建带系统提示的 Agent
144 |     agent = create_agent(
145 |         model=model,
146 |         tools=[get_weather, calculator],
147 |         system_prompt="""你是一个友好的助手。
148 | 特点：
149 | - 回答简洁明了
150 | - 使用工具前先说明
151 | - 结果用表格或列表清晰展示"""
152 |     )
153 | 
154 |     print("\n测试：自定义行为的 Agent")
155 |     response = agent.invoke({
156 |         "messages": [{"role": "user", "content": "北京天气如何？顺便算一下 100 加 50"}]
157 |     })
158 | 
159 |     print(f"\nAgent 回复：{response['messages'][-1].content}")
160 | 
161 |     print("\n关键点：")
162 |     print("  - system_prompt 定义 Agent 的行为风格")
163 |     print("  - 可以指定输出格式、语气、工作流程等")
164 | 
165 | 
166 | # ============================================================================
167 | # 示例 4：Agent 执行过程详解
168 | """ Agent 执行过程：
169 | 
170 | 完整消息历史：
171 | 
172 | --- 消息 1 (HumanMessage) ---
173 | 内容：25 乘以 8 等于多少？
174 | 
175 | --- 消息 2 (AIMessage) ---
176 | 内容：
177 | 工具调用：[{'name': 'calculator', 'args': {'a': 25, 'b': 8, 'operation': 'multiply'}, 'id': '3022d92m1', 'type': 'tool_call'}]
178 | 
179 | --- 消息 3 (ToolMessage) ---
180 | 内容：25.0 multiply 8.0 = 200.0
181 | 
182 | --- 消息 4 (AIMessage) ---
183 | 内容：25 乘以 8 等于 200。
184 | """
185 | # ============================================================================
186 | def example_4_agent_execution_details():
187 |     """
188 |     示例4：查看 Agent 执行的完整过程
189 | 
190 |     理解 Agent 如何一步步工作
191 |     """
192 |     print("\n" + "="*70)
193 |     print("示例 4：Agent 执行过程详解")
194 |     print("="*70)
195 | 
196 |     agent = create_agent(
197 |         model=model,
198 |         tools=[calculator]
199 |     )
200 | 
201 |     print("\n问题：25 乘以 8 等于多少？")
202 |     print("\nAgent 执行过程：")
203 | 
204 |     response = agent.invoke({
205 |         "messages": [{"role": "user", "content": "25 乘以 8 等于多少？"}]
206 |     })
207 | 
208 |     # 显示完整的消息历史
209 |     print("\n完整消息历史：")
210 |     for i, msg in enumerate(response['messages'], 1):
211 |         print(f"\n--- 消息 {i} ({msg.__class__.__name__}) ---")
212 |         if hasattr(msg, 'content'):
213 |             print(f"内容：{msg.content}")
214 |         if hasattr(msg, 'tool_calls') and msg.tool_calls:
215 |             print(f"工具调用：{msg.tool_calls}")
216 | 
217 |     print("\n执行循环：")
218 |     print("""
219 |     1. 用户提问 → HumanMessage
220 |     2. AI 决定调用工具 → AIMessage (包含 tool_calls)
221 |     3. 执行工具 → ToolMessage (包含结果)
222 |     4. AI 基于结果生成答案 → AIMessage (最终回答)
223 |     """)
224 | 
225 | 
226 | # ============================================================================
227 | # 示例 5：多轮对话 Agent
228 | # ============================================================================
229 | def example_5_multi_turn_agent():
230 |     """
231 |     示例5：Agent 的多轮对话
232 | 
233 |     关键：传入历史消息
234 |     """
235 |     print("\n" + "="*70)
236 |     print("示例 5：多轮对话 Agent")
237 |     print("="*70)
238 | 
239 |     agent = create_agent(
240 |         model=model,
241 |         tools=[calculator]
242 |     )
243 | 
244 |     # 第一轮
245 |     print("\n用户：10 加 5 等于多少？")
246 |     response1 = agent.invoke({
247 |         "messages": [{"role": "user", "content": "10 加 5 等于多少？"}]
248 |     })
249 |     print(f"Agent：{response1['messages'][-1].content}")
250 | 
251 |     # 第二轮：继续上一轮的对话
252 |     print("\n用户：再乘以 3 呢？")
253 |     response2 = agent.invoke({
254 |         "messages": response1['messages'] + [
255 |             {"role": "user", "content": "再乘以 3 呢？"}
256 |         ]
257 |     })
258 |     print(f"Agent：{response2['messages'][-1].content}")
259 | 
260 |     print("\n关键点：")
261 |     print("  - 多轮对话：传入之前的 messages")
262 |     print("  - Agent 能记住上下文")
263 |     print("  - 格式：上一轮的 response['messages'] + 新问题")
264 | 
265 | 
266 | # ============================================================================
267 | # 示例 6：Agent 最佳实践
268 | # ============================================================================
269 | def example_6_best_practices():
270 |     """
271 |     示例6：使用 Agent 的最佳实践
272 |     """
273 |     print("\n" + "="*70)
274 |     print("示例 6：Agent 最佳实践")
275 |     print("="*70)
276 | 
277 |     print("""
278 | 最佳实践：
279 | 
280 | 1. 工具选择
281 |    - 只给 Agent 需要的工具（工具太多会混淆）
282 |    - 工具的 docstring 要清晰
283 |    - 每个工具功能单一
284 | 
285 | 2. System Prompt
286 |    - 明确说明 Agent 的角色
287 |    - 定义输出格式
288 |    - 说明何时使用工具
289 | 
290 | 3. 错误处理
291 |    - 工具内部捕获异常
292 |    - 返回友好的错误信息
293 |    - Agent 可以处理工具失败
294 | 
295 | 4. 性能优化
296 |    - 减少不必要的工具调用
297 |    - 缓存常用查询结果
298 |    - 使用流式输出（后续学习）
299 | 
300 | 5. 测试
301 |    - 测试各种问题类型
302 |    - 测试边界情况
303 |    - 验证工具选择是否正确
304 |     """)
305 | 
306 |     print("\n示例：良好配置的 Agent")
307 | 
308 |     agent = create_agent(
309 |         model=model,
310 |         tools=[get_weather, calculator],
311 |         system_prompt="""你是一个专业的助手。
312 | 工作流程：
313 | 1. 仔细理解用户问题
314 | 2. 如果需要工具，先说明将要做什么
315 | 3. 调用工具获取准确信息
316 | 4. 基于结果给出清晰答案
317 | 
318 | 输出要求：
319 | - 简洁明了
320 | - 数据准确
321 | - 格式清晰"""
322 |     )
323 | 
324 |     print("\n测试：")
325 |     response = agent.invoke({
326 |         "messages": [{"role": "user", "content": "北京天气如何？"}]
327 |     })
328 |     print(f"Agent 回复：{response['messages'][-1].content}")
329 | 
330 | 
331 | # ============================================================================
332 | # 主程序
333 | # ============================================================================
334 | def main():
335 |     print("\n" + "="*70)
336 |     print(" LangChain 1.0 - Simple Agent")
337 |     print("="*70)
338 | 
339 |     try:
340 |         example_1_basic_agent()
341 |         input("\n按 Enter 继续...")
342 | 
343 |         example_2_multi_tool_agent()
344 |         input("\n按 Enter 继续...")
345 | 
346 |         example_3_agent_with_system_prompt()
347 |         input("\n按 Enter 继续...")
348 | 
349 |         example_4_agent_execution_details()
350 |         input("\n按 Enter 继续...")
351 | 
352 |         example_5_multi_turn_agent()
353 |         input("\n按 Enter 继续...")
354 | 
355 |         example_6_best_practices()
356 | 
357 |         print("\n" + "="*70)
358 |         print(" 完成！")
359 |         print("="*70)
360 |         print("\n核心要点：")
361 |         print("  create_agent 创建 Agent")
362 |         print("  Agent 自动判断何时使用工具")
363 |         print("  执行循环：问题 → 工具调用 → 结果 → 回答")
364 |         print("  多轮对话：传入历史 messages")
365 |         print("  system_prompt 定义 Agent 行为")
366 |         print("\n下一步：")
367 |         print("  06_agent_loop - 深入理解 Agent 执行循环")
368 | 
369 |     except KeyboardInterrupt:
370 |         print("\n\n程序中断")
371 |     except Exception as e:
372 |         print(f"\n错误: {e}")
373 |         import traceback
374 |         traceback.print_exc()
375 | 
376 | 
377 | if __name__ == "__main__":
378 |     main()
379 | 


--------------------------------------------------------------------------------
/phase2_practical/07_memory_basics/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LangChain 1.0 - Memory Basics (内存管理基础)
  3 | ==========================================
  4 | 
  5 | 本模块重点讲解��
  6 | 1. InMemorySaver - LangGraph 提供的内存管理
  7 | 2. checkpointer 参数 - 为 Agent 添加内存
  8 | 3. thread_id - 会话管理
  9 | 4. 多轮对话状态保持
 10 | """
 11 | 
 12 | import os
 13 | from dotenv import load_dotenv
 14 | from langchain.chat_models import init_chat_model
 15 | from langchain.agents import create_agent
 16 | from langchain_core.tools import tool
 17 | from langgraph.checkpoint.memory import InMemorySaver
 18 | 
 19 | load_dotenv()
 20 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 21 | 
 22 | if not GROQ_API_KEY or GROQ_API_KEY == "your_groq_api_key_here_replace_this":
 23 |     raise ValueError("请先设置 GROQ_API_KEY")
 24 | 
 25 | model = init_chat_model("groq:llama-3.3-70b-versatile", api_key=GROQ_API_KEY)
 26 | 
 27 | # 创建一个简单的工具
 28 | @tool
 29 | def get_user_info(user_id: str) -> str:
 30 |     """获取用户信息"""
 31 |     users = {
 32 |         "123": "张三，25岁，工程师",
 33 |         "456": "李四，30岁，设计师"
 34 |     }
 35 |     return users.get(user_id, "用户不存在")
 36 | 
 37 | 
 38 | # ============================================================================
 39 | # 示例 1：没有内存的 Agent（对比）
 40 | # ============================================================================
 41 | def example_1_no_memory():
 42 |     """
 43 |     示例1：没有内存的 Agent - 不记得之前的对话
 44 | 
 45 |     关键：每次调用都是独立的
 46 |     """
 47 |     print("\n" + "="*70)
 48 |     print("示例 1：没有内存的 Agent")
 49 |     print("="*70)
 50 | 
 51 |     # 创建没有 checkpointer 的 Agent
 52 |     agent = create_agent(
 53 |         model=model,
 54 |         tools=[]
 55 |     )
 56 | 
 57 |     print("\n第一轮对话：")
 58 |     response1 = agent.invoke({
 59 |         "messages": [{"role": "user", "content": "我叫张三"}]
 60 |     })
 61 |     print(f"Agent: {response1['messages'][-1].content}")
 62 | 
 63 |     print("\n第二轮对话：")
 64 |     response2 = agent.invoke({
 65 |         "messages": [{"role": "user", "content": "我叫什么？"}]
 66 |     })
 67 |     print(f"Agent: {response2['messages'][-1].content}")
 68 | 
 69 |     print("\n关键点：")
 70 |     print("  - Agent 不记得第一轮对话")
 71 |     print("  - 每次 invoke 都是全新的开始")
 72 |     print("  - 需要手动传入历史消息才能记住")
 73 | 
 74 | 
 75 | # ============================================================================
 76 | # 示例 2：使用 InMemorySaver 添加内存
 77 | # ============================================================================
 78 | def example_2_with_memory():
 79 |     """
 80 |     示例2：使用 InMemorySaver 添加短期内存
 81 | 
 82 |     关键：
 83 |     1. checkpointer=InMemorySaver()
 84 |     2. config={"configurable": {"thread_id": "xxx"}}
 85 |     """
 86 |     print("\n" + "="*70)
 87 |     print("示例 2：使用 InMemorySaver 添加内存")
 88 |     print("="*70)
 89 | 
 90 |     # 创建带内存的 Agent
 91 |     agent = create_agent(
 92 |         model=model,
 93 |         tools=[],
 94 |         checkpointer=InMemorySaver()  # 添加内存管理
 95 |     )
 96 | 
 97 |     # config 中指定 thread_id
 98 |     config = {"configurable": {"thread_id": "conversation_1"}}
 99 | 
100 |     print("\n第一轮对话：")
101 |     response1 = agent.invoke(
102 |         {"messages": [{"role": "user", "content": "我叫张三"}]},
103 |         config=config  # 传入 config
104 |     )
105 |     print(f"Agent: {response1['messages'][-1].content}")
106 | 
107 |     print("\n第二轮对话（同一个 thread_id）：")
108 |     response2 = agent.invoke(
109 |         {"messages": [{"role": "user", "content": "我叫什么？"}]},
110 |         config=config  # 使用相同的 thread_id
111 |     )
112 |     print(f"Agent: {response2['messages'][-1].content}")
113 | 
114 |     print("\n关键点：")
115 |     print("  - Agent 记住了第一轮对话！")
116 |     print("  - checkpointer 自动保存对话历史")
117 |     print("  - thread_id 用于区分不同的会话")
118 | 
119 | 
120 | # ============================================================================
121 | # 示例 3：多个会话（不同 thread_id）
122 | # ============================================================================
123 | def example_3_multiple_threads():
124 |     """
125 |     示例3：管理多个独立的会话
126 | 
127 |     关键：不同的 thread_id = 不同的对话
128 |     """
129 |     print("\n" + "="*70)
130 |     print("示例 3：多个独立会话")
131 |     print("="*70)
132 | 
133 |     agent = create_agent(
134 |         model=model,
135 |         tools=[],
136 |         checkpointer=InMemorySaver()
137 |     )
138 | 
139 |     # 会话 1
140 |     config1 = {"configurable": {"thread_id": "user_alice"}}
141 |     print("\n[会话 1 - Alice]")
142 |     agent.invoke(
143 |         {"messages": [{"role": "user", "content": "我叫 Alice"}]},
144 |         config=config1
145 |     )
146 |     print("Alice: 我叫 Alice")
147 | 
148 |     # 会话 2
149 |     config2 = {"configurable": {"thread_id": "user_bob"}}
150 |     print("\n[会话 2 - Bob]")
151 |     agent.invoke(
152 |         {"messages": [{"role": "user", "content": "我叫 Bob"}]},
153 |         config=config2
154 |     )
155 |     print("Bob: 我叫 Bob")
156 | 
157 |     # 回到会话 1
158 |     print("\n[回到会话 1 - Alice]")
159 |     response1 = agent.invoke(
160 |         {"messages": [{"role": "user", "content": "我叫什么？"}]},
161 |         config=config1
162 |     )
163 |     print(f"Agent: {response1['messages'][-1].content}")
164 | 
165 |     # 回到会话 2
166 |     print("\n[回到会话 2 - Bob]")
167 |     response2 = agent.invoke(
168 |         {"messages": [{"role": "user", "content": "我叫什么？"}]},
169 |         config=config2
170 |     )
171 |     print(f"Agent: {response2['messages'][-1].content}")
172 | 
173 |     print("\n关键点：")
174 |     print("  - 不同 thread_id 的会话完全独立")
175 |     print("  - Agent 能正确记住每个会话的内容")
176 |     print("  - 适合多用户聊天场景")
177 | 
178 | 
179 | # ============================================================================
180 | # 示例 4：带工具的内存 Agent
181 | # ============================================================================
182 | def example_4_memory_with_tools():
183 |     """
184 |     示例4：内存 + 工具调用
185 | 
186 |     Agent 能记住之前调用工具的结果
187 |     """
188 |     print("\n" + "="*70)
189 |     print("示例 4：内存 + 工具调用")
190 |     print("="*70)
191 | 
192 |     agent = create_agent(
193 |         model=model,
194 |         tools=[get_user_info],
195 |         checkpointer=InMemorySaver()
196 |     )
197 | 
198 |     config = {"configurable": {"thread_id": "session_1"}}
199 | 
200 |     print("\n第一轮：查询用户信息")
201 |     response1 = agent.invoke(
202 |         {"messages": [{"role": "user", "content": "查询用户 123 的信息"}]},
203 |         config=config
204 |     )
205 |     print(f"Agent: {response1['messages'][-1].content}")
206 | 
207 |     print("\n第二轮：询问之前的信息")
208 |     response2 = agent.invoke(
209 |         {"messages": [{"role": "user", "content": "刚才查询的用户多大？"}]},
210 |         config=config
211 |     )
212 |     print(f"Agent: {response2['messages'][-1].content}")
213 | 
214 |     print("\n关键点：")
215 |     print("  - Agent 记住了工具调用的结果")
216 |     print("  - 不需要重新调用工具")
217 |     print("  - 对话上下文包含工具使用历史")
218 | 
219 | 
220 | # ============================================================================
221 | # 示例 5：查看内存状态
222 | # ============================================================================
223 | def example_5_inspect_memory():
224 |     """
225 |     示例5：查看和理解内存中保存的内容
226 | 
227 |     理解 checkpointer 保存了什么
228 |     """
229 |     print("\n" + "="*70)
230 |     print("示例 5：查看内存状态")
231 |     print("="*70)
232 | 
233 |     agent = create_agent(
234 |         model=model,
235 |         tools=[],
236 |         checkpointer=InMemorySaver()
237 |     )
238 | 
239 |     config = {"configurable": {"thread_id": "inspect_thread"}}
240 | 
241 |     # 进行几轮对话
242 |     print("\n进行对话...")
243 |     agent.invoke(
244 |         {"messages": [{"role": "user", "content": "你好"}]},
245 |         config=config
246 |     )
247 | 
248 |     agent.invoke(
249 |         {"messages": [{"role": "user", "content": "我喜欢编程"}]},
250 |         config=config
251 |     )
252 | 
253 |     # 再次调用，查看返回的完整状态
254 |     response = agent.invoke(
255 |         {"messages": [{"role": "user", "content": "我喜欢什么？"}]},
256 |         config=config
257 |     )
258 | 
259 |     print("\n对话历史中的消息数量:", len(response['messages']))
260 |     print("\n最近的消息：")
261 |     for msg in response['messages'][-3:]:
262 |         msg_type = msg.__class__.__name__
263 |         content = msg.content[:50] + "..." if len(msg.content) > 50 else msg.content
264 |         print(f"  {msg_type}: {content}")
265 | 
266 |     print("\n关键点：")
267 |     print("  - checkpointer 保存完整的消息历史")
268 |     print("  - response['messages'] 包含所有历史消息")
269 |     print("  - 每次调用都会追加新消息")
270 | 
271 | 
272 | # ============================================================================
273 | # 示例 6：实际应用场景
274 | # ============================================================================
275 | def example_6_practical_use():
276 |     """
277 |     示例6：实际应用场景 - 客服机器人
278 | 
279 |     模拟一个记住用户信息的客服场景
280 |     """
281 |     print("\n" + "="*70)
282 |     print("示例 6：实际应用 - 客服机器人")
283 |     print("="*70)
284 | 
285 |     agent = create_agent(
286 |         model=model,
287 |         tools=[get_user_info],
288 |         system_prompt="""你是一个客服助手。
289 | 特点：
290 | - 记住用户说过的话
291 | - 友好、有耐心
292 | - 使用 get_user_info 工具查询用户信息时需要用户 ID""",
293 |         checkpointer=InMemorySaver()
294 |     )
295 | 
296 |     # 模拟用户会话
297 |     user_id = "user_12345"
298 |     config = {"configurable": {"thread_id": user_id}}
299 | 
300 |     conversations = [
301 |         "你好，我想咨询一下",
302 |         "我的用户 ID 是 123",
303 |         "帮我查一下我的信息",
304 |         "我多大来着？"  # 测试记忆
305 |     ]
306 | 
307 |     for i, user_msg in enumerate(conversations, 1):
308 |         print(f"\n轮次 {i}:")
309 |         print(f"用户: {user_msg}")
310 | 
311 |         response = agent.invoke(
312 |             {"messages": [{"role": "user", "content": user_msg}]},
313 |             config=config
314 |         )
315 | 
316 |         print(f"客服: {response['messages'][-1].content}")
317 | 
318 |     print("\n关键点：")
319 |     print("  - Agent 记住了用户的 ID")
320 |     print("  - Agent 记住了查询的结果")
321 |     print("  - 实现了流畅的多轮对话")
322 | 
323 | 
324 | # ============================================================================
325 | # 主程序
326 | # ============================================================================
327 | def main():
328 |     print("\n" + "="*70)
329 |     print(" LangChain 1.0 - Memory Basics")
330 |     print("="*70)
331 | 
332 |     try:
333 |         example_1_no_memory()
334 |         input("\n按 Enter 继续...")
335 | 
336 |         example_2_with_memory()
337 |         input("\n按 Enter 继续...")
338 | 
339 |         example_3_multiple_threads()
340 |         input("\n按 Enter 继续...")
341 | 
342 |         example_4_memory_with_tools()
343 |         input("\n按 Enter 继续...")
344 | 
345 |         example_5_inspect_memory()
346 |         input("\n按 Enter 继续...")
347 | 
348 |         example_6_practical_use()
349 | 
350 |         print("\n" + "="*70)
351 |         print(" 完成！")
352 |         print("="*70)
353 |         print("\n核心要点：")
354 |         print("  checkpointer=InMemorySaver() 添加内存")
355 |         print("  config={'configurable': {'thread_id': 'xxx'}} 指定会话")
356 |         print("  不同 thread_id = 不同会话")
357 |         print("  自动保存对话历史")
358 |         print("\n下一步：")
359 |         print("  08_context_management - 管理上下文长度")
360 | 
361 |     except KeyboardInterrupt:
362 |         print("\n\n程序中断")
363 |     except Exception as e:
364 |         print(f"\n错误: {e}")
365 |         import traceback
366 |         traceback.print_exc()
367 | 
368 | 
369 | if __name__ == "__main__":
370 |     main()
371 | 


--------------------------------------------------------------------------------