├── utils ├── __init__.py └── call_llm.py ├── requirements.txt ├── assets └── banner.png ├── main.py ├── flow.py ├── nodes.py ├── README.md ├── .gitignore ├── docs └── design.md ├── .clinerules ├── .cursorrules └── .goosehints /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pocketflow>=0.0.1 -------------------------------------------------------------------------------- /assets/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/The-Pocket/PocketFlow-Template-Python/main/assets/banner.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from flow import create_qa_flow 2 | 3 | # Example main function 4 | # Please replace this with your own main function 5 | def main(): 6 | shared = { 7 | "question": "In one sentence, what's the end of universe?", 8 | "answer": None 9 | } 10 | 11 | qa_flow = create_qa_flow() 12 | qa_flow.run(shared) 13 | print("Question:", shared["question"]) 14 | print("Answer:", shared["answer"]) 15 | 16 | if __name__ == "__main__": 17 | main() 18 | -------------------------------------------------------------------------------- /flow.py: -------------------------------------------------------------------------------- 1 | from pocketflow import Flow 2 | from nodes import GetQuestionNode, AnswerNode 3 | 4 | def create_qa_flow(): 5 | """Create and return a question-answering flow.""" 6 | # Create nodes 7 | get_question_node = GetQuestionNode() 8 | answer_node = AnswerNode() 9 | 10 | # Connect nodes in sequence 11 | get_question_node >> answer_node 12 | 13 | # Create flow starting with input node 14 | return Flow(start=get_question_node) 15 | 16 | qa_flow = create_qa_flow() -------------------------------------------------------------------------------- /utils/call_llm.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import os 3 | 4 | # Learn more about calling the LLM: https://the-pocket.github.io/PocketFlow/utility_function/llm.html 5 | def call_llm(prompt): 6 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key")) 7 | r = client.chat.completions.create( 8 | model="gpt-4o", 9 | messages=[{"role": "user", "content": prompt}] 10 | ) 11 | return r.choices[0].message.content 12 | 13 | if __name__ == "__main__": 14 | prompt = "What is the meaning of life?" 15 | print(call_llm(prompt)) 16 | -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | from pocketflow import Node 2 | from utils.call_llm import call_llm 3 | 4 | class GetQuestionNode(Node): 5 | def exec(self, _): 6 | # Get question directly from user input 7 | user_question = input("Enter your question: ") 8 | return user_question 9 | 10 | def post(self, shared, prep_res, exec_res): 11 | # Store the user's question 12 | shared["question"] = exec_res 13 | return "default" # Go to the next node 14 | 15 | class AnswerNode(Node): 16 | def prep(self, shared): 17 | # Read question from shared 18 | return shared["question"] 19 | 20 | def exec(self, question): 21 | # Call LLM to get the answer 22 | return call_llm(question) 23 | 24 | def post(self, shared, prep_res, exec_res): 25 | # Store the answer in shared 26 | shared["answer"] = exec_res -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Pocket Flow Project Template: Agentic Coding

2 | 3 |

4 | 5 | 8 | 9 |

10 | 11 | This is a project template for Agentic Coding with [Pocket Flow](https://github.com/The-Pocket/PocketFlow), a 100-line LLM framework, and your editor of choice. 12 | 13 | - We have included rules files for various AI coding assistants to help you build LLM projects: 14 | - [.cursorrules](.cursorrules) for Cursor AI 15 | - [.clinerules](.clinerules) for Cline 16 | - [.windsurfrules](.windsurfrules) for Windsurf 17 | - [.goosehints](.goosehints) for Goose 18 | - Configuration in [.github](.github) for GitHub Copilot 19 | - [CLAUDE.md](CLAUDE.md) for Claude Code 20 | - [GEMINI.md](GEMINI.md) for Gemini 21 | 22 | - Want to learn how to build LLM projects with Agentic Coding? 23 | 24 | - Check out the [Agentic Coding Guidance](https://the-pocket.github.io/PocketFlow/guide.html) 25 | 26 | - Check out the [YouTube Tutorial](https://www.youtube.com/@ZacharyLLM?sub_confirmation=1) 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | vendor/ 4 | .pnp/ 5 | .pnp.js 6 | 7 | # Build outputs 8 | dist/ 9 | build/ 10 | out/ 11 | *.pyc 12 | __pycache__/ 13 | 14 | # Environment files 15 | .env 16 | .env.local 17 | .env.*.local 18 | .env.development 19 | .env.test 20 | .env.production 21 | 22 | # IDE - VSCode 23 | .vscode/* 24 | !.vscode/settings.json 25 | !.vscode/tasks.json 26 | !.vscode/launch.json 27 | !.vscode/extensions.json 28 | 29 | # IDE - JetBrains 30 | .idea/ 31 | *.iml 32 | *.iws 33 | *.ipr 34 | 35 | # IDE - Eclipse 36 | .project 37 | .classpath 38 | .settings/ 39 | 40 | # Logs 41 | logs/ 42 | *.log 43 | npm-debug.log* 44 | yarn-debug.log* 45 | yarn-error.log* 46 | 47 | # Operating System 48 | .DS_Store 49 | Thumbs.db 50 | *.swp 51 | *.swo 52 | 53 | # Testing 54 | coverage/ 55 | .nyc_output/ 56 | 57 | # Temporary files 58 | *.tmp 59 | *.temp 60 | .cache/ 61 | 62 | # Compiled files 63 | *.com 64 | *.class 65 | *.dll 66 | *.exe 67 | *.o 68 | *.so 69 | 70 | # Package files 71 | *.7z 72 | *.dmg 73 | *.gz 74 | *.iso 75 | *.jar 76 | *.rar 77 | *.tar 78 | *.zip 79 | 80 | # Database 81 | *.sqlite 82 | *.sqlite3 83 | *.db 84 | 85 | # Optional npm cache directory 86 | .npm 87 | 88 | # Optional eslint cache 89 | .eslintcache 90 | 91 | # Optional REPL history 92 | .node_repl_history -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- 1 | # Design Doc: Your Project Name 2 | 3 | > Please DON'T remove notes for AI 4 | 5 | ## Requirements 6 | 7 | > Notes for AI: Keep it simple and clear. 8 | > If the requirements are abstract, write concrete user stories 9 | 10 | 11 | ## Flow Design 12 | 13 | > Notes for AI: 14 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit. 15 | > 2. Present a concise, high-level description of the workflow. 16 | 17 | ### Applicable Design Pattern: 18 | 19 | 1. Map the file summary into chunks, then reduce these chunks into a final summary. 20 | 2. Agentic file finder 21 | - *Context*: The entire summary of the file 22 | - *Action*: Find the file 23 | 24 | ### Flow high-level Design: 25 | 26 | 1. **First Node**: This node is for ... 27 | 2. **Second Node**: This node is for ... 28 | 3. **Third Node**: This node is for ... 29 | 30 | ```mermaid 31 | flowchart TD 32 | firstNode[First Node] --> secondNode[Second Node] 33 | secondNode --> thirdNode[Third Node] 34 | ``` 35 | ## Utility Functions 36 | 37 | > Notes for AI: 38 | > 1. Understand the utility function definition thoroughly by reviewing the doc. 39 | > 2. Include only the necessary utility functions, based on nodes in the flow. 40 | 41 | 1. **Call LLM** (`utils/call_llm.py`) 42 | - *Input*: prompt (str) 43 | - *Output*: response (str) 44 | - Generally used by most nodes for LLM tasks 45 | 46 | 2. **Embedding** (`utils/get_embedding.py`) 47 | - *Input*: str 48 | - *Output*: a vector of 3072 floats 49 | - Used by the second node to embed text 50 | 51 | ## Node Design 52 | 53 | ### Shared Store 54 | 55 | > Notes for AI: Try to minimize data redundancy 56 | 57 | The shared store structure is organized as follows: 58 | 59 | ```python 60 | shared = { 61 | "key": "value" 62 | } 63 | ``` 64 | 65 | ### Node Steps 66 | 67 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow. 68 | 69 | 1. First Node 70 | - *Purpose*: Provide a short explanation of the node’s function 71 | - *Type*: Decide between Regular, Batch, or Async 72 | - *Steps*: 73 | - *prep*: Read "key" from the shared store 74 | - *exec*: Call the utility function 75 | - *post*: Write "key" to the shared store 76 | 77 | 2. Second Node 78 | ... 79 | 80 | -------------------------------------------------------------------------------- /.clinerules: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: "Agentic Coding" 4 | --- 5 | 6 | # Agentic Coding: Humans Design, Agents code! 7 | 8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification. 9 | {: .warning } 10 | 11 | ## Agentic Coding Steps 12 | 13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation: 14 | 15 | | Steps | Human | AI | Comment | 16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------| 17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. | 18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. | 19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. | 20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. | 21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. | 22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. | 23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. | 24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. | 25 | 26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 27 | - Understand AI systems' strengths and limitations: 28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails) 29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL) 30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning) 31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features. 32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early. 33 | 34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes. 35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)). 36 | - For each node in the flow, start with a high-level one-line description of what it does. 37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine). 38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions. 39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows. 40 | - Outline the flow and draw it in a mermaid diagram. For example: 41 | ```mermaid 42 | flowchart LR 43 | start[Start] --> batch[Batch] 44 | batch --> check[Check] 45 | check -->|OK| process 46 | check -->|Error| fix[Fix] 47 | fix --> check 48 | 49 | subgraph process[Process] 50 | step1[Step 1] --> step2[Step 2] 51 | end 52 | 53 | process --> endNode[End] 54 | ``` 55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition. 56 | {: .best-practice } 57 | 58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions. 59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world: 60 |
61 | 62 | - Reading inputs (e.g., retrieving Slack messages, reading emails) 63 | - Writing outputs (e.g., generating reports, sending emails) 64 | - Using external tools (e.g., calling LLMs, searching the web) 65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system. 66 | - For each utility function, implement it and write a simple test. 67 | - Document their input/output, as well as why they are necessary. For example: 68 | - `name`: `get_embedding` (`utils/get_embedding.py`) 69 | - `input`: `str` 70 | - `output`: a vector of 3072 floats 71 | - `necessity`: Used by the second node to embed text 72 | - Example utility implementation: 73 | ```python 74 | # utils/call_llm.py 75 | from openai import OpenAI 76 | 77 | def call_llm(prompt): 78 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 79 | r = client.chat.completions.create( 80 | model="gpt-4o", 81 | messages=[{"role": "user", "content": prompt}] 82 | ) 83 | return r.choices[0].message.content 84 | 85 | if __name__ == "__main__": 86 | prompt = "What is the meaning of life?" 87 | print(call_llm(prompt)) 88 | ``` 89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them. 90 | {: .best-practice } 91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures. 92 | {: .warning } 93 | 94 | 4. **Data Design**: Design the shared store that nodes will use to communicate. 95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data. 96 | - For simple systems, use an in-memory dictionary. 97 | - For more complex systems or when persistence is required, use a database. 98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys. 99 | - Example shared store design: 100 | ```python 101 | shared = { 102 | "user": { 103 | "id": "user123", 104 | "context": { # Another nested dict 105 | "weather": {"temp": 72, "condition": "sunny"}, 106 | "location": "San Francisco" 107 | } 108 | }, 109 | "results": {} # Empty dict to store outputs 110 | } 111 | ``` 112 | 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions. 114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example: 115 | - `type`: Regular (or Batch, or Async) 116 | - `prep`: Read "text" from the shared store 117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures. 118 | - `post`: Write "embedding" to the shared store 119 | 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design. 121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins! 122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking. 123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system. 124 | - Add logging throughout the code to facilitate debugging. 125 | 126 | 7. **Optimization**: 127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start. 128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts. 129 | - If your flow design is already solid, move on to micro-optimizations: 130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity. 131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone. 132 | 133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times. 134 | > 135 | >
136 | {: .best-practice } 137 | 138 | 8. **Reliability** 139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times. 140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging. 141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain. 142 | 143 | ## Example LLM Project File Structure 144 | 145 | ``` 146 | my_project/ 147 | ├── main.py 148 | ├── nodes.py 149 | ├── flow.py 150 | ├── utils/ 151 | │ ├── __init__.py 152 | │ ├── call_llm.py 153 | │ └── search_web.py 154 | ├── requirements.txt 155 | └── docs/ 156 | └── design.md 157 | ``` 158 | 159 | - **`requirements.txt`**: Lists the Python dependencies for the project. 160 | ``` 161 | PyYAML 162 | pocketflow 163 | ``` 164 | 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*. 166 | ~~~ 167 | # Design Doc: Your Project Name 168 | 169 | > Please DON'T remove notes for AI 170 | 171 | ## Requirements 172 | 173 | > Notes for AI: Keep it simple and clear. 174 | > If the requirements are abstract, write concrete user stories 175 | 176 | 177 | ## Flow Design 178 | 179 | > Notes for AI: 180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit. 181 | > 2. Present a concise, high-level description of the workflow. 182 | 183 | ### Applicable Design Pattern: 184 | 185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary. 186 | 2. Agentic file finder 187 | - *Context*: The entire summary of the file 188 | - *Action*: Find the file 189 | 190 | ### Flow high-level Design: 191 | 192 | 1. **First Node**: This node is for ... 193 | 2. **Second Node**: This node is for ... 194 | 3. **Third Node**: This node is for ... 195 | 196 | ```mermaid 197 | flowchart TD 198 | firstNode[First Node] --> secondNode[Second Node] 199 | secondNode --> thirdNode[Third Node] 200 | ``` 201 | ## Utility Functions 202 | 203 | > Notes for AI: 204 | > 1. Understand the utility function definition thoroughly by reviewing the doc. 205 | > 2. Include only the necessary utility functions, based on nodes in the flow. 206 | 207 | 1. **Call LLM** (`utils/call_llm.py`) 208 | - *Input*: prompt (str) 209 | - *Output*: response (str) 210 | - Generally used by most nodes for LLM tasks 211 | 212 | 2. **Embedding** (`utils/get_embedding.py`) 213 | - *Input*: str 214 | - *Output*: a vector of 3072 floats 215 | - Used by the second node to embed text 216 | 217 | ## Node Design 218 | 219 | ### Shared Store 220 | 221 | > Notes for AI: Try to minimize data redundancy 222 | 223 | The shared store structure is organized as follows: 224 | 225 | ```python 226 | shared = { 227 | "key": "value" 228 | } 229 | ``` 230 | 231 | ### Node Steps 232 | 233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow. 234 | 235 | 1. First Node 236 | - *Purpose*: Provide a short explanation of the node’s function 237 | - *Type*: Decide between Regular, Batch, or Async 238 | - *Steps*: 239 | - *prep*: Read "key" from the shared store 240 | - *exec*: Call the utility function 241 | - *post*: Write "key" to the shared store 242 | 243 | 2. Second Node 244 | ... 245 | ~~~ 246 | 247 | 248 | - **`utils/`**: Contains all utility functions. 249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`. 250 | - Each file should also include a `main()` function to try that API call 251 | ```python 252 | from google import genai 253 | import os 254 | 255 | def call_llm(prompt: str) -> str: 256 | client = genai.Client( 257 | api_key=os.getenv("GEMINI_API_KEY", ""), 258 | ) 259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") 260 | response = client.models.generate_content(model=model, contents=[prompt]) 261 | return response.text 262 | 263 | if __name__ == "__main__": 264 | test_prompt = "Hello, how are you?" 265 | 266 | # First call - should hit the API 267 | print("Making call...") 268 | response1 = call_llm(test_prompt, use_cache=False) 269 | print(f"Response: {response1}") 270 | ``` 271 | 272 | - **`nodes.py`**: Contains all the node definitions. 273 | ```python 274 | # nodes.py 275 | from pocketflow import Node 276 | from utils.call_llm import call_llm 277 | 278 | class GetQuestionNode(Node): 279 | def exec(self, _): 280 | # Get question directly from user input 281 | user_question = input("Enter your question: ") 282 | return user_question 283 | 284 | def post(self, shared, prep_res, exec_res): 285 | # Store the user's question 286 | shared["question"] = exec_res 287 | return "default" # Go to the next node 288 | 289 | class AnswerNode(Node): 290 | def prep(self, shared): 291 | # Read question from shared 292 | return shared["question"] 293 | 294 | def exec(self, question): 295 | # Call LLM to get the answer 296 | return call_llm(question) 297 | 298 | def post(self, shared, prep_res, exec_res): 299 | # Store the answer in shared 300 | shared["answer"] = exec_res 301 | ``` 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them. 303 | ```python 304 | # flow.py 305 | from pocketflow import Flow 306 | from nodes import GetQuestionNode, AnswerNode 307 | 308 | def create_qa_flow(): 309 | """Create and return a question-answering flow.""" 310 | # Create nodes 311 | get_question_node = GetQuestionNode() 312 | answer_node = AnswerNode() 313 | 314 | # Connect nodes in sequence 315 | get_question_node >> answer_node 316 | 317 | # Create flow starting with input node 318 | return Flow(start=get_question_node) 319 | ``` 320 | - **`main.py`**: Serves as the project's entry point. 321 | ```python 322 | # main.py 323 | from flow import create_qa_flow 324 | 325 | # Example main function 326 | # Please replace this with your own main function 327 | def main(): 328 | shared = { 329 | "question": None, # Will be populated by GetQuestionNode from user input 330 | "answer": None # Will be populated by AnswerNode 331 | } 332 | 333 | # Create the flow and run it 334 | qa_flow = create_qa_flow() 335 | qa_flow.run(shared) 336 | print(f"Question: {shared['question']}") 337 | print(f"Answer: {shared['answer']}") 338 | 339 | if __name__ == "__main__": 340 | main() 341 | ``` 342 | 343 | ================================================ 344 | File: docs/index.md 345 | ================================================ 346 | --- 347 | layout: default 348 | title: "Home" 349 | nav_order: 1 350 | --- 351 | 352 | # Pocket Flow 353 | 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*. 355 | 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in. 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more. 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications. 359 | 360 |
361 | 362 |
363 | 364 | ## Core Abstraction 365 | 366 | We model the LLM workflow as a **Graph + Shared Store**: 367 | 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks. 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges). 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows. 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks. 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks. 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks. 374 | 375 |
376 | 377 |
378 | 379 | ## Design Pattern 380 | 381 | From there, it’s easy to implement popular design patterns: 382 | 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions. 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines. 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation. 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps. 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently. 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents. 389 | 390 |
391 | 392 |
393 | 394 | ## Utility Function 395 | 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*: 397 | 398 | - [LLM Wrapper](./utility_function/llm.md) 399 | - [Viz and Debug](./utility_function/viz.md) 400 | - [Web Search](./utility_function/websearch.md) 401 | - [Chunking](./utility_function/chunking.md) 402 | - [Embedding](./utility_function/embedding.md) 403 | - [Vector Databases](./utility_function/vector.md) 404 | - [Text-to-Speech](./utility_function/text_to_speech.md) 405 | 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework: 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs. 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally. 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in. 410 | 411 | ## Ready to build your Apps? 412 | 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow! 414 | 415 | ================================================ 416 | File: docs/core_abstraction/async.md 417 | ================================================ 418 | --- 419 | layout: default 420 | title: "(Advanced) Async" 421 | parent: "Core Abstraction" 422 | nav_order: 5 423 | --- 424 | 425 | # (Advanced) Async 426 | 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for: 428 | 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way. 430 | 2. **exec_async()**: Typically used for async LLM calls. 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`. 432 | 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes. 434 | 435 | ### Example 436 | 437 | ```python 438 | class SummarizeThenVerify(AsyncNode): 439 | async def prep_async(self, shared): 440 | # Example: read a file asynchronously 441 | doc_text = await read_file_async(shared["doc_path"]) 442 | return doc_text 443 | 444 | async def exec_async(self, prep_res): 445 | # Example: async LLM call 446 | summary = await call_llm_async(f"Summarize: {prep_res}") 447 | return summary 448 | 449 | async def post_async(self, shared, prep_res, exec_res): 450 | # Example: wait for user feedback 451 | decision = await gather_user_feedback(exec_res) 452 | if decision == "approve": 453 | shared["summary"] = exec_res 454 | return "approve" 455 | return "deny" 456 | 457 | summarize_node = SummarizeThenVerify() 458 | final_node = Finalize() 459 | 460 | # Define transitions 461 | summarize_node - "approve" >> final_node 462 | summarize_node - "deny" >> summarize_node # retry 463 | 464 | flow = AsyncFlow(start=summarize_node) 465 | 466 | async def main(): 467 | shared = {"doc_path": "document.txt"} 468 | await flow.run_async(shared) 469 | print("Final Summary:", shared.get("summary")) 470 | 471 | asyncio.run(main()) 472 | ``` 473 | 474 | ================================================ 475 | File: docs/core_abstraction/batch.md 476 | ================================================ 477 | --- 478 | layout: default 479 | title: "Batch" 480 | parent: "Core Abstraction" 481 | nav_order: 4 482 | --- 483 | 484 | # Batch 485 | 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases: 487 | - **Chunk-based** processing (e.g., splitting large texts). 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs). 489 | 490 | ## 1. BatchNode 491 | 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`: 493 | 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator). 495 | - **`exec(item)`**: called **once** per item in that iterable. 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**. 497 | 498 | 499 | ### Example: Summarize a Large File 500 | 501 | ```python 502 | class MapSummaries(BatchNode): 503 | def prep(self, shared): 504 | # Suppose we have a big file; chunk it 505 | content = shared["data"] 506 | chunk_size = 10000 507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)] 508 | return chunks 509 | 510 | def exec(self, chunk): 511 | prompt = f"Summarize this chunk in 10 words: {chunk}" 512 | summary = call_llm(prompt) 513 | return summary 514 | 515 | def post(self, shared, prep_res, exec_res_list): 516 | combined = "\n".join(exec_res_list) 517 | shared["summary"] = combined 518 | return "default" 519 | 520 | map_summaries = MapSummaries() 521 | flow = Flow(start=map_summaries) 522 | flow.run(shared) 523 | ``` 524 | 525 | --- 526 | 527 | ## 2. BatchFlow 528 | 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set. 530 | 531 | ### Example: Summarize Many Files 532 | 533 | ```python 534 | class SummarizeAllFiles(BatchFlow): 535 | def prep(self, shared): 536 | # Return a list of param dicts (one per file) 537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...] 538 | return [{"filename": fn} for fn in filenames] 539 | 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce): 541 | summarize_file = SummarizeFile(start=load_file) 542 | 543 | # Wrap that flow into a BatchFlow: 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file) 545 | summarize_all_files.run(shared) 546 | ``` 547 | 548 | ### Under the Hood 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`. 550 | 2. The **BatchFlow** loops through each dict. For each one: 551 | - It merges the dict with the BatchFlow’s own `params`. 552 | - It calls `flow.run(shared)` using the merged result. 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict. 554 | 555 | --- 556 | 557 | ## 3. Nested or Multi-Level Batches 558 | 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance: 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...). 561 | - **Inner** batch: returning a list of per-file param dicts. 562 | 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once. 564 | 565 | ```python 566 | 567 | class FileBatchFlow(BatchFlow): 568 | def prep(self, shared): 569 | directory = self.params["directory"] 570 | # e.g., files = ["file1.txt", "file2.txt", ...] 571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")] 572 | return [{"filename": f} for f in files] 573 | 574 | class DirectoryBatchFlow(BatchFlow): 575 | def prep(self, shared): 576 | directories = [ "/path/to/dirA", "/path/to/dirB"] 577 | return [{"directory": d} for d in directories] 578 | 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"} 580 | inner_flow = FileBatchFlow(start=MapSummaries()) 581 | outer_flow = DirectoryBatchFlow(start=inner_flow) 582 | ``` 583 | 584 | ================================================ 585 | File: docs/core_abstraction/communication.md 586 | ================================================ 587 | --- 588 | layout: default 589 | title: "Communication" 590 | parent: "Core Abstraction" 591 | nav_order: 3 592 | --- 593 | 594 | # Communication 595 | 596 | Nodes and Flows **communicate** in 2 ways: 597 | 598 | 1. **Shared Store (for almost all the cases)** 599 | 600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`). 601 | - Great for data results, large content, or anything multiple nodes need. 602 | - You shall design the data structure and populate it ahead. 603 | 604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md). 605 | {: .best-practice } 606 | 607 | 2. **Params (only for [Batch](./batch.md))** 608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**. 609 | - Good for identifiers like filenames or numeric IDs, in Batch mode. 610 | 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller). 612 | 613 | --- 614 | 615 | ## 1. Shared Store 616 | 617 | ### Overview 618 | 619 | A shared store is typically an in-mem dictionary, like: 620 | ```python 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...} 622 | ``` 623 | 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements. 625 | 626 | ### Example 627 | 628 | ```python 629 | class LoadData(Node): 630 | def post(self, shared, prep_res, exec_res): 631 | # We write data to shared store 632 | shared["data"] = "Some text content" 633 | return None 634 | 635 | class Summarize(Node): 636 | def prep(self, shared): 637 | # We read data from shared store 638 | return shared["data"] 639 | 640 | def exec(self, prep_res): 641 | # Call LLM to summarize 642 | prompt = f"Summarize: {prep_res}" 643 | summary = call_llm(prompt) 644 | return summary 645 | 646 | def post(self, shared, prep_res, exec_res): 647 | # We write summary to shared store 648 | shared["summary"] = exec_res 649 | return "default" 650 | 651 | load_data = LoadData() 652 | summarize = Summarize() 653 | load_data >> summarize 654 | flow = Flow(start=load_data) 655 | 656 | shared = {} 657 | flow.run(shared) 658 | ``` 659 | 660 | Here: 661 | - `LoadData` writes to `shared["data"]`. 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`. 663 | 664 | --- 665 | 666 | ## 2. Params 667 | 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are: 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`). 670 | - **Set** via `set_params()`. 671 | - **Cleared** and updated each time a parent Flow calls it. 672 | 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 674 | > 675 | > If you need to set child node params, see [Batch](./batch.md). 676 | {: .warning } 677 | 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store. 679 | 680 | ### Example 681 | 682 | ```python 683 | # 1) Create a Node that uses params 684 | class SummarizeFile(Node): 685 | def prep(self, shared): 686 | # Access the node's param 687 | filename = self.params["filename"] 688 | return shared["data"].get(filename, "") 689 | 690 | def exec(self, prep_res): 691 | prompt = f"Summarize: {prep_res}" 692 | return call_llm(prompt) 693 | 694 | def post(self, shared, prep_res, exec_res): 695 | filename = self.params["filename"] 696 | shared["summary"][filename] = exec_res 697 | return "default" 698 | 699 | # 2) Set params 700 | node = SummarizeFile() 701 | 702 | # 3) Set Node params directly (for testing) 703 | node.set_params({"filename": "doc1.txt"}) 704 | node.run(shared) 705 | 706 | # 4) Create Flow 707 | flow = Flow(start=node) 708 | 709 | # 5) Set Flow params (overwrites node params) 710 | flow.set_params({"filename": "doc2.txt"}) 711 | flow.run(shared) # The node summarizes doc2, not doc1 712 | ``` 713 | 714 | ================================================ 715 | File: docs/core_abstraction/flow.md 716 | ================================================ 717 | --- 718 | layout: default 719 | title: "Flow" 720 | parent: "Core Abstraction" 721 | nav_order: 2 722 | --- 723 | 724 | # Flow 725 | 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`. 727 | 728 | ## 1. Action-based Transitions 729 | 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`. 731 | 732 | You define transitions with the syntax: 733 | 734 | 1. **Basic default transition**: `node_a >> node_b` 735 | This means if `node_a.post()` returns `"default"`, go to `node_b`. 736 | (Equivalent to `node_a - "default" >> node_b`) 737 | 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b` 739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`. 740 | 741 | It's possible to create loops, branching, or multi-step flows. 742 | 743 | ## 2. Creating a Flow 744 | 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node. 746 | 747 | ### Example: Simple Sequence 748 | 749 | Here's a minimal flow of two nodes in a chain: 750 | 751 | ```python 752 | node_a >> node_b 753 | flow = Flow(start=node_a) 754 | flow.run(shared) 755 | ``` 756 | 757 | - When you run the flow, it executes `node_a`. 758 | - Suppose `node_a.post()` returns `"default"`. 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`. 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there. 761 | 762 | ### Example: Branching & Looping 763 | 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions: 765 | 766 | - `"approved"`: expense is approved, move to payment processing 767 | - `"needs_revision"`: expense needs changes, send back for revision 768 | - `"rejected"`: expense is denied, finish the process 769 | 770 | We can wire them like this: 771 | 772 | ```python 773 | # Define the flow connections 774 | review - "approved" >> payment # If approved, process payment 775 | review - "needs_revision" >> revise # If needs changes, go to revision 776 | review - "rejected" >> finish # If rejected, finish the process 777 | 778 | revise >> review # After revision, go back for another review 779 | payment >> finish # After payment, finish the process 780 | 781 | flow = Flow(start=review) 782 | ``` 783 | 784 | Let's see how it flows: 785 | 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review` 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops 789 | 790 | ```mermaid 791 | flowchart TD 792 | review[Review Expense] -->|approved| payment[Process Payment] 793 | review -->|needs_revision| revise[Revise Report] 794 | review -->|rejected| finish[Finish Process] 795 | 796 | revise --> review 797 | payment --> finish 798 | ``` 799 | 800 | ### Running Individual Nodes vs. Running a Flow 801 | 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue. 804 | 805 | > `node.run(shared)` **does not** proceed to the successor. 806 | > This is mainly for debugging or testing a single node. 807 | > 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly. 809 | {: .warning } 810 | 811 | ## 3. Nested Flows 812 | 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can: 814 | 815 | 1. Use a Flow as a Node within another Flow's transitions. 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse. 817 | 3. Node `params` will be a merging of **all** parents' `params`. 818 | 819 | ### Flow's Node Methods 820 | 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However: 822 | 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes. 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store. 825 | 826 | ### Basic Flow Nesting 827 | 828 | Here's how to connect a flow to another node: 829 | 830 | ```python 831 | # Create a sub-flow 832 | node_a >> node_b 833 | subflow = Flow(start=node_a) 834 | 835 | # Connect it to another node 836 | subflow >> node_c 837 | 838 | # Create the parent flow 839 | parent_flow = Flow(start=subflow) 840 | ``` 841 | 842 | When `parent_flow.run()` executes: 843 | 1. It starts `subflow` 844 | 2. `subflow` runs through its nodes (`node_a->node_b`) 845 | 3. After `subflow` completes, execution continues to `node_c` 846 | 847 | ### Example: Order Processing Pipeline 848 | 849 | Here's a practical example that breaks down order processing into nested flows: 850 | 851 | ```python 852 | # Payment processing sub-flow 853 | validate_payment >> process_payment >> payment_confirmation 854 | payment_flow = Flow(start=validate_payment) 855 | 856 | # Inventory sub-flow 857 | check_stock >> reserve_items >> update_inventory 858 | inventory_flow = Flow(start=check_stock) 859 | 860 | # Shipping sub-flow 861 | create_label >> assign_carrier >> schedule_pickup 862 | shipping_flow = Flow(start=create_label) 863 | 864 | # Connect the flows into a main order pipeline 865 | payment_flow >> inventory_flow >> shipping_flow 866 | 867 | # Create the master flow 868 | order_pipeline = Flow(start=payment_flow) 869 | 870 | # Run the entire pipeline 871 | order_pipeline.run(shared_data) 872 | ``` 873 | 874 | This creates a clean separation of concerns while maintaining a clear execution path: 875 | 876 | ```mermaid 877 | flowchart LR 878 | subgraph order_pipeline[Order Pipeline] 879 | subgraph paymentFlow["Payment Flow"] 880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation] 881 | end 882 | 883 | subgraph inventoryFlow["Inventory Flow"] 884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory] 885 | end 886 | 887 | subgraph shippingFlow["Shipping Flow"] 888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup] 889 | end 890 | 891 | paymentFlow --> inventoryFlow 892 | inventoryFlow --> shippingFlow 893 | end 894 | ``` 895 | 896 | ================================================ 897 | File: docs/core_abstraction/node.md 898 | ================================================ 899 | --- 900 | layout: default 901 | title: "Node" 902 | parent: "Core Abstraction" 903 | nav_order: 1 904 | --- 905 | 906 | # Node 907 | 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`: 909 | 910 |
911 | 912 |
913 | 914 | 1. `prep(shared)` 915 | - **Read and preprocess data** from `shared` store. 916 | - Examples: *query DB, read files, or serialize data into a string*. 917 | - Return `prep_res`, which is used by `exec()` and `post()`. 918 | 919 | 2. `exec(prep_res)` 920 | - **Execute compute logic**, with optional retries and error handling (below). 921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*. 922 | - ⚠️ This shall be only for compute and **NOT** access `shared`. 923 | - ⚠️ If retries enabled, ensure idempotent implementation. 924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism. 925 | - Return `exec_res`, which is passed to `post()`. 926 | 927 | 3. `post(shared, prep_res, exec_res)` 928 | - **Postprocess and write data** back to `shared`. 929 | - Examples: *update DB, change states, log results*. 930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*). 931 | 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately. 933 | > 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data. 935 | {: .note } 936 | 937 | ### Fault Tolerance & Retries 938 | 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node: 940 | 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry). 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off. 944 | 945 | ```python 946 | my_node = SummarizeFile(max_retries=3, wait=10) 947 | ``` 948 | 949 | When an exception occurs in `exec()`, the Node automatically retries until: 950 | 951 | - It either succeeds, or 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt. 953 | 954 | You can get the current retry times (0-based) from `self.cur_retry`. 955 | 956 | ```python 957 | class RetryNode(Node): 958 | def exec(self, prep_res): 959 | print(f"Retry {self.cur_retry} times") 960 | raise Exception("Failed") 961 | ``` 962 | 963 | ### Graceful Fallback 964 | 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override: 966 | 967 | ```python 968 | def exec_fallback(self, prep_res, exc): 969 | raise exc 970 | ``` 971 | 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`. 973 | 974 | ### Example: Summarize file 975 | 976 | ```python 977 | class SummarizeFile(Node): 978 | def prep(self, shared): 979 | return shared["data"] 980 | 981 | def exec(self, prep_res): 982 | if not prep_res: 983 | return "Empty file content" 984 | prompt = f"Summarize this text in 10 words: {prep_res}" 985 | summary = call_llm(prompt) # might fail 986 | return summary 987 | 988 | def exec_fallback(self, prep_res, exc): 989 | # Provide a simple fallback instead of crashing 990 | return "There was an error processing your request." 991 | 992 | def post(self, shared, prep_res, exec_res): 993 | shared["summary"] = exec_res 994 | # Return "default" by not returning 995 | 996 | summarize_node = SummarizeFile(max_retries=3) 997 | 998 | # node.run() calls prep->exec->post 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback() 1000 | action_result = summarize_node.run(shared) 1001 | 1002 | print("Action returned:", action_result) # "default" 1003 | print("Summary stored:", shared["summary"]) 1004 | ``` 1005 | 1006 | ================================================ 1007 | File: docs/core_abstraction/parallel.md 1008 | ================================================ 1009 | --- 1010 | layout: default 1011 | title: "(Advanced) Parallel" 1012 | parent: "Core Abstraction" 1013 | nav_order: 6 1014 | --- 1015 | 1016 | # (Advanced) Parallel 1017 | 1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 1019 | 1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O. 1021 | {: .warning } 1022 | 1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize. 1024 | > 1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals). 1026 | > 1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits. 1028 | {: .best-practice } 1029 | 1030 | ## AsyncParallelBatchNode 1031 | 1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**: 1033 | 1034 | ```python 1035 | class ParallelSummaries(AsyncParallelBatchNode): 1036 | async def prep_async(self, shared): 1037 | # e.g., multiple texts 1038 | return shared["texts"] 1039 | 1040 | async def exec_async(self, text): 1041 | prompt = f"Summarize: {text}" 1042 | return await call_llm_async(prompt) 1043 | 1044 | async def post_async(self, shared, prep_res, exec_res_list): 1045 | shared["summary"] = "\n\n".join(exec_res_list) 1046 | return "default" 1047 | 1048 | node = ParallelSummaries() 1049 | flow = AsyncFlow(start=node) 1050 | ``` 1051 | 1052 | ## AsyncParallelBatchFlow 1053 | 1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters: 1055 | 1056 | ```python 1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow): 1058 | async def prep_async(self, shared): 1059 | return [{"filename": f} for f in shared["files"]] 1060 | 1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile()) 1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow) 1063 | await parallel_flow.run_async(shared) 1064 | ``` 1065 | 1066 | ================================================ 1067 | File: docs/design_pattern/agent.md 1068 | ================================================ 1069 | --- 1070 | layout: default 1071 | title: "Agent" 1072 | parent: "Design Pattern" 1073 | nav_order: 1 1074 | --- 1075 | 1076 | # Agent 1077 | 1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context. 1079 | 1080 |
1081 | 1082 |
1083 | 1084 | ## Implement Agent with Graph 1085 | 1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions. 1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step. 1088 | 3. **Agent Node:** Provide a prompt to decide action—for example: 1089 | 1090 | ```python 1091 | f""" 1092 | ### CONTEXT 1093 | Task: {task_description} 1094 | Previous Actions: {previous_actions} 1095 | Current State: {current_state} 1096 | 1097 | ### ACTION SPACE 1098 | [1] search 1099 | Description: Use web search to get results 1100 | Parameters: 1101 | - query (str): What to search for 1102 | 1103 | [2] answer 1104 | Description: Conclude based on the results 1105 | Parameters: 1106 | - result (str): Final answer to provide 1107 | 1108 | ### NEXT ACTION 1109 | Decide the next action based on the current context and available action space. 1110 | Return your response in the following format: 1111 | 1112 | ```yaml 1113 | thinking: | 1114 | 1115 | action: 1116 | parameters: 1117 | : 1118 | ```""" 1119 | ``` 1120 | 1121 | The core of building **high-performance** and **reliable** agents boils down to: 1122 | 1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content. 1124 | 1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database. 1126 | 1127 | ## Example Good Action Design 1128 | 1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once. 1130 | 1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts). 1132 | 1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files. 1134 | 1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends. 1136 | 1137 | ## Example: Search Agent 1138 | 1139 | This agent: 1140 | 1. Decides whether to search or answer 1141 | 2. If searches, loops back to decide if more search needed 1142 | 3. Answers when enough context gathered 1143 | 1144 | ```python 1145 | class DecideAction(Node): 1146 | def prep(self, shared): 1147 | context = shared.get("context", "No previous search") 1148 | query = shared["query"] 1149 | return query, context 1150 | 1151 | def exec(self, inputs): 1152 | query, context = inputs 1153 | prompt = f""" 1154 | Given input: {query} 1155 | Previous search results: {context} 1156 | Should I: 1) Search web for more info 2) Answer with current knowledge 1157 | Output in yaml: 1158 | ```yaml 1159 | action: search/answer 1160 | reason: why this action 1161 | search_term: search phrase if action is search 1162 | ```""" 1163 | resp = call_llm(prompt) 1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip() 1165 | result = yaml.safe_load(yaml_str) 1166 | 1167 | assert isinstance(result, dict) 1168 | assert "action" in result 1169 | assert "reason" in result 1170 | assert result["action"] in ["search", "answer"] 1171 | if result["action"] == "search": 1172 | assert "search_term" in result 1173 | 1174 | return result 1175 | 1176 | def post(self, shared, prep_res, exec_res): 1177 | if exec_res["action"] == "search": 1178 | shared["search_term"] = exec_res["search_term"] 1179 | return exec_res["action"] 1180 | 1181 | class SearchWeb(Node): 1182 | def prep(self, shared): 1183 | return shared["search_term"] 1184 | 1185 | def exec(self, search_term): 1186 | return search_web(search_term) 1187 | 1188 | def post(self, shared, prep_res, exec_res): 1189 | prev_searches = shared.get("context", []) 1190 | shared["context"] = prev_searches + [ 1191 | {"term": shared["search_term"], "result": exec_res} 1192 | ] 1193 | return "decide" 1194 | 1195 | class DirectAnswer(Node): 1196 | def prep(self, shared): 1197 | return shared["query"], shared.get("context", "") 1198 | 1199 | def exec(self, inputs): 1200 | query, context = inputs 1201 | return call_llm(f"Context: {context}\nAnswer: {query}") 1202 | 1203 | def post(self, shared, prep_res, exec_res): 1204 | print(f"Answer: {exec_res}") 1205 | shared["answer"] = exec_res 1206 | 1207 | # Connect nodes 1208 | decide = DecideAction() 1209 | search = SearchWeb() 1210 | answer = DirectAnswer() 1211 | 1212 | decide - "search" >> search 1213 | decide - "answer" >> answer 1214 | search - "decide" >> decide # Loop back 1215 | 1216 | flow = Flow(start=decide) 1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"}) 1218 | ``` 1219 | 1220 | ================================================ 1221 | File: docs/design_pattern/mapreduce.md 1222 | ================================================ 1223 | --- 1224 | layout: default 1225 | title: "Map Reduce" 1226 | parent: "Design Pattern" 1227 | nav_order: 4 1228 | --- 1229 | 1230 | # Map Reduce 1231 | 1232 | MapReduce is a design pattern suitable when you have either: 1233 | - Large input data (e.g., multiple files to process), or 1234 | - Large output data (e.g., multiple forms to fill) 1235 | 1236 | and there is a logical way to break the task into smaller, ideally independent parts. 1237 | 1238 |
1239 | 1240 |
1241 | 1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase. 1243 | 1244 | ### Example: Document Summarization 1245 | 1246 | ```python 1247 | class SummarizeAllFiles(BatchNode): 1248 | def prep(self, shared): 1249 | files_dict = shared["files"] # e.g. 10 files 1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...] 1251 | 1252 | def exec(self, one_file): 1253 | filename, file_content = one_file 1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}") 1255 | return (filename, summary_text) 1256 | 1257 | def post(self, shared, prep_res, exec_res_list): 1258 | shared["file_summaries"] = dict(exec_res_list) 1259 | 1260 | class CombineSummaries(Node): 1261 | def prep(self, shared): 1262 | return shared["file_summaries"] 1263 | 1264 | def exec(self, file_summaries): 1265 | # format as: "File1: summary\nFile2: summary...\n" 1266 | text_list = [] 1267 | for fname, summ in file_summaries.items(): 1268 | text_list.append(f"{fname} summary:\n{summ}\n") 1269 | big_text = "\n---\n".join(text_list) 1270 | 1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}") 1272 | 1273 | def post(self, shared, prep_res, final_summary): 1274 | shared["all_files_summary"] = final_summary 1275 | 1276 | batch_node = SummarizeAllFiles() 1277 | combine_node = CombineSummaries() 1278 | batch_node >> combine_node 1279 | 1280 | flow = Flow(start=batch_node) 1281 | 1282 | shared = { 1283 | "files": { 1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...", 1285 | "file2.txt": "Some other interesting text ...", 1286 | # ... 1287 | } 1288 | } 1289 | flow.run(shared) 1290 | print("Individual Summaries:", shared["file_summaries"]) 1291 | print("\nFinal Summary:\n", shared["all_files_summary"]) 1292 | ``` 1293 | 1294 | ================================================ 1295 | File: docs/design_pattern/rag.md 1296 | ================================================ 1297 | --- 1298 | layout: default 1299 | title: "RAG" 1300 | parent: "Design Pattern" 1301 | nav_order: 3 1302 | --- 1303 | 1304 | # RAG (Retrieval Augmented Generation) 1305 | 1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline: 1307 | 1308 |
1309 | 1310 |
1311 | 1312 | 1. **Offline stage**: Preprocess and index documents ("building the index"). 1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context. 1314 | 1315 | --- 1316 | ## Stage 1: Offline Indexing 1317 | 1318 | We create three Nodes: 1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text. 1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk. 1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md). 1322 | 1323 | ```python 1324 | class ChunkDocs(BatchNode): 1325 | def prep(self, shared): 1326 | # A list of file paths in shared["files"]. We process each file. 1327 | return shared["files"] 1328 | 1329 | def exec(self, filepath): 1330 | # read file content. In real usage, do error handling. 1331 | with open(filepath, "r", encoding="utf-8") as f: 1332 | text = f.read() 1333 | # chunk by 100 chars each 1334 | chunks = [] 1335 | size = 100 1336 | for i in range(0, len(text), size): 1337 | chunks.append(text[i : i + size]) 1338 | return chunks 1339 | 1340 | def post(self, shared, prep_res, exec_res_list): 1341 | # exec_res_list is a list of chunk-lists, one per file. 1342 | # flatten them all into a single list of chunks. 1343 | all_chunks = [] 1344 | for chunk_list in exec_res_list: 1345 | all_chunks.extend(chunk_list) 1346 | shared["all_chunks"] = all_chunks 1347 | 1348 | class EmbedDocs(BatchNode): 1349 | def prep(self, shared): 1350 | return shared["all_chunks"] 1351 | 1352 | def exec(self, chunk): 1353 | return get_embedding(chunk) 1354 | 1355 | def post(self, shared, prep_res, exec_res_list): 1356 | # Store the list of embeddings. 1357 | shared["all_embeds"] = exec_res_list 1358 | print(f"Total embeddings: {len(exec_res_list)}") 1359 | 1360 | class StoreIndex(Node): 1361 | def prep(self, shared): 1362 | # We'll read all embeds from shared. 1363 | return shared["all_embeds"] 1364 | 1365 | def exec(self, all_embeds): 1366 | # Create a vector index (faiss or other DB in real usage). 1367 | index = create_index(all_embeds) 1368 | return index 1369 | 1370 | def post(self, shared, prep_res, index): 1371 | shared["index"] = index 1372 | 1373 | # Wire them in sequence 1374 | chunk_node = ChunkDocs() 1375 | embed_node = EmbedDocs() 1376 | store_node = StoreIndex() 1377 | 1378 | chunk_node >> embed_node >> store_node 1379 | 1380 | OfflineFlow = Flow(start=chunk_node) 1381 | ``` 1382 | 1383 | Usage example: 1384 | 1385 | ```python 1386 | shared = { 1387 | "files": ["doc1.txt", "doc2.txt"], # any text files 1388 | } 1389 | OfflineFlow.run(shared) 1390 | ``` 1391 | 1392 | --- 1393 | ## Stage 2: Online Query & Answer 1394 | 1395 | We have 3 nodes: 1396 | 1. `EmbedQuery` – embeds the user’s question. 1397 | 2. `RetrieveDocs` – retrieves top chunk from the index. 1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer. 1399 | 1400 | ```python 1401 | class EmbedQuery(Node): 1402 | def prep(self, shared): 1403 | return shared["question"] 1404 | 1405 | def exec(self, question): 1406 | return get_embedding(question) 1407 | 1408 | def post(self, shared, prep_res, q_emb): 1409 | shared["q_emb"] = q_emb 1410 | 1411 | class RetrieveDocs(Node): 1412 | def prep(self, shared): 1413 | # We'll need the query embedding, plus the offline index/chunks 1414 | return shared["q_emb"], shared["index"], shared["all_chunks"] 1415 | 1416 | def exec(self, inputs): 1417 | q_emb, index, chunks = inputs 1418 | I, D = search_index(index, q_emb, top_k=1) 1419 | best_id = I[0][0] 1420 | relevant_chunk = chunks[best_id] 1421 | return relevant_chunk 1422 | 1423 | def post(self, shared, prep_res, relevant_chunk): 1424 | shared["retrieved_chunk"] = relevant_chunk 1425 | print("Retrieved chunk:", relevant_chunk[:60], "...") 1426 | 1427 | class GenerateAnswer(Node): 1428 | def prep(self, shared): 1429 | return shared["question"], shared["retrieved_chunk"] 1430 | 1431 | def exec(self, inputs): 1432 | question, chunk = inputs 1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:" 1434 | return call_llm(prompt) 1435 | 1436 | def post(self, shared, prep_res, answer): 1437 | shared["answer"] = answer 1438 | print("Answer:", answer) 1439 | 1440 | embed_qnode = EmbedQuery() 1441 | retrieve_node = RetrieveDocs() 1442 | generate_node = GenerateAnswer() 1443 | 1444 | embed_qnode >> retrieve_node >> generate_node 1445 | OnlineFlow = Flow(start=embed_qnode) 1446 | ``` 1447 | 1448 | Usage example: 1449 | 1450 | ```python 1451 | # Suppose we already ran OfflineFlow and have: 1452 | # shared["all_chunks"], shared["index"], etc. 1453 | shared["question"] = "Why do people like cats?" 1454 | 1455 | OnlineFlow.run(shared) 1456 | # final answer in shared["answer"] 1457 | ``` 1458 | 1459 | ================================================ 1460 | File: docs/design_pattern/structure.md 1461 | ================================================ 1462 | --- 1463 | layout: default 1464 | title: "Structured Output" 1465 | parent: "Design Pattern" 1466 | nav_order: 5 1467 | --- 1468 | 1469 | # Structured Output 1470 | 1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys. 1472 | 1473 | There are several approaches to achieve a structured output: 1474 | - **Prompting** the LLM to strictly return a defined structure. 1475 | - Using LLMs that natively support **schema enforcement**. 1476 | - **Post-processing** the LLM's response to extract structured content. 1477 | 1478 | In practice, **Prompting** is simple and reliable for modern LLMs. 1479 | 1480 | ### Example Use Cases 1481 | 1482 | - Extracting Key Information 1483 | 1484 | ```yaml 1485 | product: 1486 | name: Widget Pro 1487 | price: 199.99 1488 | description: | 1489 | A high-quality widget designed for professionals. 1490 | Recommended for advanced users. 1491 | ``` 1492 | 1493 | - Summarizing Documents into Bullet Points 1494 | 1495 | ```yaml 1496 | summary: 1497 | - This product is easy to use. 1498 | - It is cost-effective. 1499 | - Suitable for all skill levels. 1500 | ``` 1501 | 1502 | - Generating Configuration Files 1503 | 1504 | ```yaml 1505 | server: 1506 | host: 127.0.0.1 1507 | port: 8080 1508 | ssl: true 1509 | ``` 1510 | 1511 | ## Prompt Engineering 1512 | 1513 | When prompting the LLM to produce **structured** output: 1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`). 1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry). 1516 | 1517 | ### Example Text Summarization 1518 | 1519 | ```python 1520 | class SummarizeNode(Node): 1521 | def exec(self, prep_res): 1522 | # Suppose `prep_res` is the text to summarize. 1523 | prompt = f""" 1524 | Please summarize the following text as YAML, with exactly 3 bullet points 1525 | 1526 | {prep_res} 1527 | 1528 | Now, output: 1529 | ```yaml 1530 | summary: 1531 | - bullet 1 1532 | - bullet 2 1533 | - bullet 3 1534 | ```""" 1535 | response = call_llm(prompt) 1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip() 1537 | 1538 | import yaml 1539 | structured_result = yaml.safe_load(yaml_str) 1540 | 1541 | assert "summary" in structured_result 1542 | assert isinstance(structured_result["summary"], list) 1543 | 1544 | return structured_result 1545 | ``` 1546 | 1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic) 1548 | {: .note } 1549 | 1550 | ### Why YAML instead of JSON? 1551 | 1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes. 1553 | 1554 | **In JSON** 1555 | 1556 | ```json 1557 | { 1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\"" 1559 | } 1560 | ``` 1561 | 1562 | - Every double quote inside the string must be escaped with `\"`. 1563 | - Each newline in the dialogue must be represented as `\n`. 1564 | 1565 | **In YAML** 1566 | 1567 | ```yaml 1568 | dialogue: | 1569 | Alice said: "Hello Bob. 1570 | How are you? 1571 | I am good." 1572 | ``` 1573 | 1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`). 1575 | - Newlines are naturally preserved without needing `\n`. 1576 | 1577 | ================================================ 1578 | File: docs/design_pattern/workflow.md 1579 | ================================================ 1580 | --- 1581 | layout: default 1582 | title: "Workflow" 1583 | parent: "Design Pattern" 1584 | nav_order: 2 1585 | --- 1586 | 1587 | # Workflow 1588 | 1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes. 1590 | 1591 |
1592 | 1593 |
1594 | 1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*. 1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*. 1597 | > 1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md). 1599 | {: .best-practice } 1600 | 1601 | ### Example: Article Writing 1602 | 1603 | ```python 1604 | class GenerateOutline(Node): 1605 | def prep(self, shared): return shared["topic"] 1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}") 1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res 1608 | 1609 | class WriteSection(Node): 1610 | def prep(self, shared): return shared["outline"] 1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}") 1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res 1613 | 1614 | class ReviewAndRefine(Node): 1615 | def prep(self, shared): return shared["draft"] 1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}") 1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res 1618 | 1619 | # Connect nodes 1620 | outline = GenerateOutline() 1621 | write = WriteSection() 1622 | review = ReviewAndRefine() 1623 | 1624 | outline >> write >> review 1625 | 1626 | # Create and run flow 1627 | writing_flow = Flow(start=outline) 1628 | shared = {"topic": "AI Safety"} 1629 | writing_flow.run(shared) 1630 | ``` 1631 | 1632 | For *dynamic cases*, consider using [Agents](./agent.md). 1633 | 1634 | ================================================ 1635 | File: docs/utility_function/llm.md 1636 | ================================================ 1637 | --- 1638 | layout: default 1639 | title: "LLM Wrapper" 1640 | parent: "Utility Function" 1641 | nav_order: 1 1642 | --- 1643 | 1644 | # LLM Wrappers 1645 | 1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 1647 | Here, we provide some minimal example implementations: 1648 | 1649 | 1. OpenAI 1650 | ```python 1651 | def call_llm(prompt): 1652 | from openai import OpenAI 1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1654 | r = client.chat.completions.create( 1655 | model="gpt-4o", 1656 | messages=[{"role": "user", "content": prompt}] 1657 | ) 1658 | return r.choices[0].message.content 1659 | 1660 | # Example usage 1661 | call_llm("How are you?") 1662 | ``` 1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security. 1664 | {: .best-practice } 1665 | 1666 | 2. Claude (Anthropic) 1667 | ```python 1668 | def call_llm(prompt): 1669 | from anthropic import Anthropic 1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE") 1671 | r = client.messages.create( 1672 | model="claude-sonnet-4-0", 1673 | messages=[ 1674 | {"role": "user", "content": prompt} 1675 | ] 1676 | ) 1677 | return r.content[0].text 1678 | ``` 1679 | 1680 | 3. Google (Generative AI Studio / PaLM API) 1681 | ```python 1682 | def call_llm(prompt): 1683 | from google import genai 1684 | client = genai.Client(api_key='GEMINI_API_KEY') 1685 | response = client.models.generate_content( 1686 | model='gemini-2.5-pro', 1687 | contents=prompt 1688 | ) 1689 | return response.text 1690 | ``` 1691 | 1692 | 4. Azure (Azure OpenAI) 1693 | ```python 1694 | def call_llm(prompt): 1695 | from openai import AzureOpenAI 1696 | client = AzureOpenAI( 1697 | azure_endpoint="https://.openai.azure.com/", 1698 | api_key="YOUR_API_KEY_HERE", 1699 | api_version="2023-05-15" 1700 | ) 1701 | r = client.chat.completions.create( 1702 | model="", 1703 | messages=[{"role": "user", "content": prompt}] 1704 | ) 1705 | return r.choices[0].message.content 1706 | ``` 1707 | 1708 | 5. Ollama (Local LLM) 1709 | ```python 1710 | def call_llm(prompt): 1711 | from ollama import chat 1712 | response = chat( 1713 | model="llama2", 1714 | messages=[{"role": "user", "content": prompt}] 1715 | ) 1716 | return response.message.content 1717 | ``` 1718 | 1719 | ## Improvements 1720 | Feel free to enhance your `call_llm` function as needed. Here are examples: 1721 | 1722 | - Handle chat history: 1723 | 1724 | ```python 1725 | def call_llm(messages): 1726 | from openai import OpenAI 1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1728 | r = client.chat.completions.create( 1729 | model="gpt-4o", 1730 | messages=messages 1731 | ) 1732 | return r.choices[0].message.content 1733 | ``` 1734 | 1735 | - Add in-memory caching 1736 | 1737 | ```python 1738 | from functools import lru_cache 1739 | 1740 | @lru_cache(maxsize=1000) 1741 | def call_llm(prompt): 1742 | # Your implementation here 1743 | pass 1744 | ``` 1745 | 1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result. 1747 | > 1748 | > To address this, you could use cached results only if not retried. 1749 | {: .warning } 1750 | 1751 | 1752 | ```python 1753 | from functools import lru_cache 1754 | 1755 | @lru_cache(maxsize=1000) 1756 | def cached_call(prompt): 1757 | pass 1758 | 1759 | def call_llm(prompt, use_cache): 1760 | if use_cache: 1761 | return cached_call(prompt) 1762 | # Call the underlying function directly 1763 | return cached_call.__wrapped__(prompt) 1764 | 1765 | class SummarizeNode(Node): 1766 | def exec(self, text): 1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0) 1768 | ``` 1769 | 1770 | - Enable logging: 1771 | 1772 | ```python 1773 | def call_llm(prompt): 1774 | import logging 1775 | logging.info(f"Prompt: {prompt}") 1776 | response = ... # Your implementation here 1777 | logging.info(f"Response: {response}") 1778 | return response 1779 | ``` -------------------------------------------------------------------------------- /.cursorrules: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: "Agentic Coding" 4 | --- 5 | 6 | # Agentic Coding: Humans Design, Agents code! 7 | 8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification. 9 | {: .warning } 10 | 11 | ## Agentic Coding Steps 12 | 13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation: 14 | 15 | | Steps | Human | AI | Comment | 16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------| 17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. | 18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. | 19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. | 20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. | 21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. | 22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. | 23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. | 24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. | 25 | 26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 27 | - Understand AI systems' strengths and limitations: 28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails) 29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL) 30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning) 31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features. 32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early. 33 | 34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes. 35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)). 36 | - For each node in the flow, start with a high-level one-line description of what it does. 37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine). 38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions. 39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows. 40 | - Outline the flow and draw it in a mermaid diagram. For example: 41 | ```mermaid 42 | flowchart LR 43 | start[Start] --> batch[Batch] 44 | batch --> check[Check] 45 | check -->|OK| process 46 | check -->|Error| fix[Fix] 47 | fix --> check 48 | 49 | subgraph process[Process] 50 | step1[Step 1] --> step2[Step 2] 51 | end 52 | 53 | process --> endNode[End] 54 | ``` 55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition. 56 | {: .best-practice } 57 | 58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions. 59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world: 60 |
61 | 62 | - Reading inputs (e.g., retrieving Slack messages, reading emails) 63 | - Writing outputs (e.g., generating reports, sending emails) 64 | - Using external tools (e.g., calling LLMs, searching the web) 65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system. 66 | - For each utility function, implement it and write a simple test. 67 | - Document their input/output, as well as why they are necessary. For example: 68 | - `name`: `get_embedding` (`utils/get_embedding.py`) 69 | - `input`: `str` 70 | - `output`: a vector of 3072 floats 71 | - `necessity`: Used by the second node to embed text 72 | - Example utility implementation: 73 | ```python 74 | # utils/call_llm.py 75 | from openai import OpenAI 76 | 77 | def call_llm(prompt): 78 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 79 | r = client.chat.completions.create( 80 | model="gpt-4o", 81 | messages=[{"role": "user", "content": prompt}] 82 | ) 83 | return r.choices[0].message.content 84 | 85 | if __name__ == "__main__": 86 | prompt = "What is the meaning of life?" 87 | print(call_llm(prompt)) 88 | ``` 89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them. 90 | {: .best-practice } 91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures. 92 | {: .warning } 93 | 94 | 4. **Data Design**: Design the shared store that nodes will use to communicate. 95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data. 96 | - For simple systems, use an in-memory dictionary. 97 | - For more complex systems or when persistence is required, use a database. 98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys. 99 | - Example shared store design: 100 | ```python 101 | shared = { 102 | "user": { 103 | "id": "user123", 104 | "context": { # Another nested dict 105 | "weather": {"temp": 72, "condition": "sunny"}, 106 | "location": "San Francisco" 107 | } 108 | }, 109 | "results": {} # Empty dict to store outputs 110 | } 111 | ``` 112 | 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions. 114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example: 115 | - `type`: Regular (or Batch, or Async) 116 | - `prep`: Read "text" from the shared store 117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures. 118 | - `post`: Write "embedding" to the shared store 119 | 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design. 121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins! 122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking. 123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system. 124 | - Add logging throughout the code to facilitate debugging. 125 | 126 | 7. **Optimization**: 127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start. 128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts. 129 | - If your flow design is already solid, move on to micro-optimizations: 130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity. 131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone. 132 | 133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times. 134 | > 135 | >
136 | {: .best-practice } 137 | 138 | 8. **Reliability** 139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times. 140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging. 141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain. 142 | 143 | ## Example LLM Project File Structure 144 | 145 | ``` 146 | my_project/ 147 | ├── main.py 148 | ├── nodes.py 149 | ├── flow.py 150 | ├── utils/ 151 | │ ├── __init__.py 152 | │ ├── call_llm.py 153 | │ └── search_web.py 154 | ├── requirements.txt 155 | └── docs/ 156 | └── design.md 157 | ``` 158 | 159 | - **`requirements.txt`**: Lists the Python dependencies for the project. 160 | ``` 161 | PyYAML 162 | pocketflow 163 | ``` 164 | 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*. 166 | ~~~ 167 | # Design Doc: Your Project Name 168 | 169 | > Please DON'T remove notes for AI 170 | 171 | ## Requirements 172 | 173 | > Notes for AI: Keep it simple and clear. 174 | > If the requirements are abstract, write concrete user stories 175 | 176 | 177 | ## Flow Design 178 | 179 | > Notes for AI: 180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit. 181 | > 2. Present a concise, high-level description of the workflow. 182 | 183 | ### Applicable Design Pattern: 184 | 185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary. 186 | 2. Agentic file finder 187 | - *Context*: The entire summary of the file 188 | - *Action*: Find the file 189 | 190 | ### Flow high-level Design: 191 | 192 | 1. **First Node**: This node is for ... 193 | 2. **Second Node**: This node is for ... 194 | 3. **Third Node**: This node is for ... 195 | 196 | ```mermaid 197 | flowchart TD 198 | firstNode[First Node] --> secondNode[Second Node] 199 | secondNode --> thirdNode[Third Node] 200 | ``` 201 | ## Utility Functions 202 | 203 | > Notes for AI: 204 | > 1. Understand the utility function definition thoroughly by reviewing the doc. 205 | > 2. Include only the necessary utility functions, based on nodes in the flow. 206 | 207 | 1. **Call LLM** (`utils/call_llm.py`) 208 | - *Input*: prompt (str) 209 | - *Output*: response (str) 210 | - Generally used by most nodes for LLM tasks 211 | 212 | 2. **Embedding** (`utils/get_embedding.py`) 213 | - *Input*: str 214 | - *Output*: a vector of 3072 floats 215 | - Used by the second node to embed text 216 | 217 | ## Node Design 218 | 219 | ### Shared Store 220 | 221 | > Notes for AI: Try to minimize data redundancy 222 | 223 | The shared store structure is organized as follows: 224 | 225 | ```python 226 | shared = { 227 | "key": "value" 228 | } 229 | ``` 230 | 231 | ### Node Steps 232 | 233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow. 234 | 235 | 1. First Node 236 | - *Purpose*: Provide a short explanation of the node’s function 237 | - *Type*: Decide between Regular, Batch, or Async 238 | - *Steps*: 239 | - *prep*: Read "key" from the shared store 240 | - *exec*: Call the utility function 241 | - *post*: Write "key" to the shared store 242 | 243 | 2. Second Node 244 | ... 245 | ~~~ 246 | 247 | 248 | - **`utils/`**: Contains all utility functions. 249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`. 250 | - Each file should also include a `main()` function to try that API call 251 | ```python 252 | from google import genai 253 | import os 254 | 255 | def call_llm(prompt: str) -> str: 256 | client = genai.Client( 257 | api_key=os.getenv("GEMINI_API_KEY", ""), 258 | ) 259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") 260 | response = client.models.generate_content(model=model, contents=[prompt]) 261 | return response.text 262 | 263 | if __name__ == "__main__": 264 | test_prompt = "Hello, how are you?" 265 | 266 | # First call - should hit the API 267 | print("Making call...") 268 | response1 = call_llm(test_prompt, use_cache=False) 269 | print(f"Response: {response1}") 270 | ``` 271 | 272 | - **`nodes.py`**: Contains all the node definitions. 273 | ```python 274 | # nodes.py 275 | from pocketflow import Node 276 | from utils.call_llm import call_llm 277 | 278 | class GetQuestionNode(Node): 279 | def exec(self, _): 280 | # Get question directly from user input 281 | user_question = input("Enter your question: ") 282 | return user_question 283 | 284 | def post(self, shared, prep_res, exec_res): 285 | # Store the user's question 286 | shared["question"] = exec_res 287 | return "default" # Go to the next node 288 | 289 | class AnswerNode(Node): 290 | def prep(self, shared): 291 | # Read question from shared 292 | return shared["question"] 293 | 294 | def exec(self, question): 295 | # Call LLM to get the answer 296 | return call_llm(question) 297 | 298 | def post(self, shared, prep_res, exec_res): 299 | # Store the answer in shared 300 | shared["answer"] = exec_res 301 | ``` 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them. 303 | ```python 304 | # flow.py 305 | from pocketflow import Flow 306 | from nodes import GetQuestionNode, AnswerNode 307 | 308 | def create_qa_flow(): 309 | """Create and return a question-answering flow.""" 310 | # Create nodes 311 | get_question_node = GetQuestionNode() 312 | answer_node = AnswerNode() 313 | 314 | # Connect nodes in sequence 315 | get_question_node >> answer_node 316 | 317 | # Create flow starting with input node 318 | return Flow(start=get_question_node) 319 | ``` 320 | - **`main.py`**: Serves as the project's entry point. 321 | ```python 322 | # main.py 323 | from flow import create_qa_flow 324 | 325 | # Example main function 326 | # Please replace this with your own main function 327 | def main(): 328 | shared = { 329 | "question": None, # Will be populated by GetQuestionNode from user input 330 | "answer": None # Will be populated by AnswerNode 331 | } 332 | 333 | # Create the flow and run it 334 | qa_flow = create_qa_flow() 335 | qa_flow.run(shared) 336 | print(f"Question: {shared['question']}") 337 | print(f"Answer: {shared['answer']}") 338 | 339 | if __name__ == "__main__": 340 | main() 341 | ``` 342 | 343 | ================================================ 344 | File: docs/index.md 345 | ================================================ 346 | --- 347 | layout: default 348 | title: "Home" 349 | nav_order: 1 350 | --- 351 | 352 | # Pocket Flow 353 | 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*. 355 | 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in. 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more. 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications. 359 | 360 |
361 | 362 |
363 | 364 | ## Core Abstraction 365 | 366 | We model the LLM workflow as a **Graph + Shared Store**: 367 | 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks. 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges). 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows. 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks. 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks. 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks. 374 | 375 |
376 | 377 |
378 | 379 | ## Design Pattern 380 | 381 | From there, it’s easy to implement popular design patterns: 382 | 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions. 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines. 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation. 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps. 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently. 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents. 389 | 390 |
391 | 392 |
393 | 394 | ## Utility Function 395 | 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*: 397 | 398 | - [LLM Wrapper](./utility_function/llm.md) 399 | - [Viz and Debug](./utility_function/viz.md) 400 | - [Web Search](./utility_function/websearch.md) 401 | - [Chunking](./utility_function/chunking.md) 402 | - [Embedding](./utility_function/embedding.md) 403 | - [Vector Databases](./utility_function/vector.md) 404 | - [Text-to-Speech](./utility_function/text_to_speech.md) 405 | 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework: 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs. 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally. 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in. 410 | 411 | ## Ready to build your Apps? 412 | 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow! 414 | 415 | ================================================ 416 | File: docs/core_abstraction/async.md 417 | ================================================ 418 | --- 419 | layout: default 420 | title: "(Advanced) Async" 421 | parent: "Core Abstraction" 422 | nav_order: 5 423 | --- 424 | 425 | # (Advanced) Async 426 | 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for: 428 | 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way. 430 | 2. **exec_async()**: Typically used for async LLM calls. 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`. 432 | 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes. 434 | 435 | ### Example 436 | 437 | ```python 438 | class SummarizeThenVerify(AsyncNode): 439 | async def prep_async(self, shared): 440 | # Example: read a file asynchronously 441 | doc_text = await read_file_async(shared["doc_path"]) 442 | return doc_text 443 | 444 | async def exec_async(self, prep_res): 445 | # Example: async LLM call 446 | summary = await call_llm_async(f"Summarize: {prep_res}") 447 | return summary 448 | 449 | async def post_async(self, shared, prep_res, exec_res): 450 | # Example: wait for user feedback 451 | decision = await gather_user_feedback(exec_res) 452 | if decision == "approve": 453 | shared["summary"] = exec_res 454 | return "approve" 455 | return "deny" 456 | 457 | summarize_node = SummarizeThenVerify() 458 | final_node = Finalize() 459 | 460 | # Define transitions 461 | summarize_node - "approve" >> final_node 462 | summarize_node - "deny" >> summarize_node # retry 463 | 464 | flow = AsyncFlow(start=summarize_node) 465 | 466 | async def main(): 467 | shared = {"doc_path": "document.txt"} 468 | await flow.run_async(shared) 469 | print("Final Summary:", shared.get("summary")) 470 | 471 | asyncio.run(main()) 472 | ``` 473 | 474 | ================================================ 475 | File: docs/core_abstraction/batch.md 476 | ================================================ 477 | --- 478 | layout: default 479 | title: "Batch" 480 | parent: "Core Abstraction" 481 | nav_order: 4 482 | --- 483 | 484 | # Batch 485 | 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases: 487 | - **Chunk-based** processing (e.g., splitting large texts). 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs). 489 | 490 | ## 1. BatchNode 491 | 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`: 493 | 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator). 495 | - **`exec(item)`**: called **once** per item in that iterable. 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**. 497 | 498 | 499 | ### Example: Summarize a Large File 500 | 501 | ```python 502 | class MapSummaries(BatchNode): 503 | def prep(self, shared): 504 | # Suppose we have a big file; chunk it 505 | content = shared["data"] 506 | chunk_size = 10000 507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)] 508 | return chunks 509 | 510 | def exec(self, chunk): 511 | prompt = f"Summarize this chunk in 10 words: {chunk}" 512 | summary = call_llm(prompt) 513 | return summary 514 | 515 | def post(self, shared, prep_res, exec_res_list): 516 | combined = "\n".join(exec_res_list) 517 | shared["summary"] = combined 518 | return "default" 519 | 520 | map_summaries = MapSummaries() 521 | flow = Flow(start=map_summaries) 522 | flow.run(shared) 523 | ``` 524 | 525 | --- 526 | 527 | ## 2. BatchFlow 528 | 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set. 530 | 531 | ### Example: Summarize Many Files 532 | 533 | ```python 534 | class SummarizeAllFiles(BatchFlow): 535 | def prep(self, shared): 536 | # Return a list of param dicts (one per file) 537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...] 538 | return [{"filename": fn} for fn in filenames] 539 | 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce): 541 | summarize_file = SummarizeFile(start=load_file) 542 | 543 | # Wrap that flow into a BatchFlow: 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file) 545 | summarize_all_files.run(shared) 546 | ``` 547 | 548 | ### Under the Hood 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`. 550 | 2. The **BatchFlow** loops through each dict. For each one: 551 | - It merges the dict with the BatchFlow’s own `params`. 552 | - It calls `flow.run(shared)` using the merged result. 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict. 554 | 555 | --- 556 | 557 | ## 3. Nested or Multi-Level Batches 558 | 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance: 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...). 561 | - **Inner** batch: returning a list of per-file param dicts. 562 | 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once. 564 | 565 | ```python 566 | 567 | class FileBatchFlow(BatchFlow): 568 | def prep(self, shared): 569 | directory = self.params["directory"] 570 | # e.g., files = ["file1.txt", "file2.txt", ...] 571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")] 572 | return [{"filename": f} for f in files] 573 | 574 | class DirectoryBatchFlow(BatchFlow): 575 | def prep(self, shared): 576 | directories = [ "/path/to/dirA", "/path/to/dirB"] 577 | return [{"directory": d} for d in directories] 578 | 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"} 580 | inner_flow = FileBatchFlow(start=MapSummaries()) 581 | outer_flow = DirectoryBatchFlow(start=inner_flow) 582 | ``` 583 | 584 | ================================================ 585 | File: docs/core_abstraction/communication.md 586 | ================================================ 587 | --- 588 | layout: default 589 | title: "Communication" 590 | parent: "Core Abstraction" 591 | nav_order: 3 592 | --- 593 | 594 | # Communication 595 | 596 | Nodes and Flows **communicate** in 2 ways: 597 | 598 | 1. **Shared Store (for almost all the cases)** 599 | 600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`). 601 | - Great for data results, large content, or anything multiple nodes need. 602 | - You shall design the data structure and populate it ahead. 603 | 604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md). 605 | {: .best-practice } 606 | 607 | 2. **Params (only for [Batch](./batch.md))** 608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**. 609 | - Good for identifiers like filenames or numeric IDs, in Batch mode. 610 | 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller). 612 | 613 | --- 614 | 615 | ## 1. Shared Store 616 | 617 | ### Overview 618 | 619 | A shared store is typically an in-mem dictionary, like: 620 | ```python 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...} 622 | ``` 623 | 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements. 625 | 626 | ### Example 627 | 628 | ```python 629 | class LoadData(Node): 630 | def post(self, shared, prep_res, exec_res): 631 | # We write data to shared store 632 | shared["data"] = "Some text content" 633 | return None 634 | 635 | class Summarize(Node): 636 | def prep(self, shared): 637 | # We read data from shared store 638 | return shared["data"] 639 | 640 | def exec(self, prep_res): 641 | # Call LLM to summarize 642 | prompt = f"Summarize: {prep_res}" 643 | summary = call_llm(prompt) 644 | return summary 645 | 646 | def post(self, shared, prep_res, exec_res): 647 | # We write summary to shared store 648 | shared["summary"] = exec_res 649 | return "default" 650 | 651 | load_data = LoadData() 652 | summarize = Summarize() 653 | load_data >> summarize 654 | flow = Flow(start=load_data) 655 | 656 | shared = {} 657 | flow.run(shared) 658 | ``` 659 | 660 | Here: 661 | - `LoadData` writes to `shared["data"]`. 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`. 663 | 664 | --- 665 | 666 | ## 2. Params 667 | 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are: 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`). 670 | - **Set** via `set_params()`. 671 | - **Cleared** and updated each time a parent Flow calls it. 672 | 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 674 | > 675 | > If you need to set child node params, see [Batch](./batch.md). 676 | {: .warning } 677 | 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store. 679 | 680 | ### Example 681 | 682 | ```python 683 | # 1) Create a Node that uses params 684 | class SummarizeFile(Node): 685 | def prep(self, shared): 686 | # Access the node's param 687 | filename = self.params["filename"] 688 | return shared["data"].get(filename, "") 689 | 690 | def exec(self, prep_res): 691 | prompt = f"Summarize: {prep_res}" 692 | return call_llm(prompt) 693 | 694 | def post(self, shared, prep_res, exec_res): 695 | filename = self.params["filename"] 696 | shared["summary"][filename] = exec_res 697 | return "default" 698 | 699 | # 2) Set params 700 | node = SummarizeFile() 701 | 702 | # 3) Set Node params directly (for testing) 703 | node.set_params({"filename": "doc1.txt"}) 704 | node.run(shared) 705 | 706 | # 4) Create Flow 707 | flow = Flow(start=node) 708 | 709 | # 5) Set Flow params (overwrites node params) 710 | flow.set_params({"filename": "doc2.txt"}) 711 | flow.run(shared) # The node summarizes doc2, not doc1 712 | ``` 713 | 714 | ================================================ 715 | File: docs/core_abstraction/flow.md 716 | ================================================ 717 | --- 718 | layout: default 719 | title: "Flow" 720 | parent: "Core Abstraction" 721 | nav_order: 2 722 | --- 723 | 724 | # Flow 725 | 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`. 727 | 728 | ## 1. Action-based Transitions 729 | 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`. 731 | 732 | You define transitions with the syntax: 733 | 734 | 1. **Basic default transition**: `node_a >> node_b` 735 | This means if `node_a.post()` returns `"default"`, go to `node_b`. 736 | (Equivalent to `node_a - "default" >> node_b`) 737 | 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b` 739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`. 740 | 741 | It's possible to create loops, branching, or multi-step flows. 742 | 743 | ## 2. Creating a Flow 744 | 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node. 746 | 747 | ### Example: Simple Sequence 748 | 749 | Here's a minimal flow of two nodes in a chain: 750 | 751 | ```python 752 | node_a >> node_b 753 | flow = Flow(start=node_a) 754 | flow.run(shared) 755 | ``` 756 | 757 | - When you run the flow, it executes `node_a`. 758 | - Suppose `node_a.post()` returns `"default"`. 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`. 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there. 761 | 762 | ### Example: Branching & Looping 763 | 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions: 765 | 766 | - `"approved"`: expense is approved, move to payment processing 767 | - `"needs_revision"`: expense needs changes, send back for revision 768 | - `"rejected"`: expense is denied, finish the process 769 | 770 | We can wire them like this: 771 | 772 | ```python 773 | # Define the flow connections 774 | review - "approved" >> payment # If approved, process payment 775 | review - "needs_revision" >> revise # If needs changes, go to revision 776 | review - "rejected" >> finish # If rejected, finish the process 777 | 778 | revise >> review # After revision, go back for another review 779 | payment >> finish # After payment, finish the process 780 | 781 | flow = Flow(start=review) 782 | ``` 783 | 784 | Let's see how it flows: 785 | 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review` 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops 789 | 790 | ```mermaid 791 | flowchart TD 792 | review[Review Expense] -->|approved| payment[Process Payment] 793 | review -->|needs_revision| revise[Revise Report] 794 | review -->|rejected| finish[Finish Process] 795 | 796 | revise --> review 797 | payment --> finish 798 | ``` 799 | 800 | ### Running Individual Nodes vs. Running a Flow 801 | 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue. 804 | 805 | > `node.run(shared)` **does not** proceed to the successor. 806 | > This is mainly for debugging or testing a single node. 807 | > 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly. 809 | {: .warning } 810 | 811 | ## 3. Nested Flows 812 | 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can: 814 | 815 | 1. Use a Flow as a Node within another Flow's transitions. 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse. 817 | 3. Node `params` will be a merging of **all** parents' `params`. 818 | 819 | ### Flow's Node Methods 820 | 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However: 822 | 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes. 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store. 825 | 826 | ### Basic Flow Nesting 827 | 828 | Here's how to connect a flow to another node: 829 | 830 | ```python 831 | # Create a sub-flow 832 | node_a >> node_b 833 | subflow = Flow(start=node_a) 834 | 835 | # Connect it to another node 836 | subflow >> node_c 837 | 838 | # Create the parent flow 839 | parent_flow = Flow(start=subflow) 840 | ``` 841 | 842 | When `parent_flow.run()` executes: 843 | 1. It starts `subflow` 844 | 2. `subflow` runs through its nodes (`node_a->node_b`) 845 | 3. After `subflow` completes, execution continues to `node_c` 846 | 847 | ### Example: Order Processing Pipeline 848 | 849 | Here's a practical example that breaks down order processing into nested flows: 850 | 851 | ```python 852 | # Payment processing sub-flow 853 | validate_payment >> process_payment >> payment_confirmation 854 | payment_flow = Flow(start=validate_payment) 855 | 856 | # Inventory sub-flow 857 | check_stock >> reserve_items >> update_inventory 858 | inventory_flow = Flow(start=check_stock) 859 | 860 | # Shipping sub-flow 861 | create_label >> assign_carrier >> schedule_pickup 862 | shipping_flow = Flow(start=create_label) 863 | 864 | # Connect the flows into a main order pipeline 865 | payment_flow >> inventory_flow >> shipping_flow 866 | 867 | # Create the master flow 868 | order_pipeline = Flow(start=payment_flow) 869 | 870 | # Run the entire pipeline 871 | order_pipeline.run(shared_data) 872 | ``` 873 | 874 | This creates a clean separation of concerns while maintaining a clear execution path: 875 | 876 | ```mermaid 877 | flowchart LR 878 | subgraph order_pipeline[Order Pipeline] 879 | subgraph paymentFlow["Payment Flow"] 880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation] 881 | end 882 | 883 | subgraph inventoryFlow["Inventory Flow"] 884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory] 885 | end 886 | 887 | subgraph shippingFlow["Shipping Flow"] 888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup] 889 | end 890 | 891 | paymentFlow --> inventoryFlow 892 | inventoryFlow --> shippingFlow 893 | end 894 | ``` 895 | 896 | ================================================ 897 | File: docs/core_abstraction/node.md 898 | ================================================ 899 | --- 900 | layout: default 901 | title: "Node" 902 | parent: "Core Abstraction" 903 | nav_order: 1 904 | --- 905 | 906 | # Node 907 | 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`: 909 | 910 |
911 | 912 |
913 | 914 | 1. `prep(shared)` 915 | - **Read and preprocess data** from `shared` store. 916 | - Examples: *query DB, read files, or serialize data into a string*. 917 | - Return `prep_res`, which is used by `exec()` and `post()`. 918 | 919 | 2. `exec(prep_res)` 920 | - **Execute compute logic**, with optional retries and error handling (below). 921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*. 922 | - ⚠️ This shall be only for compute and **NOT** access `shared`. 923 | - ⚠️ If retries enabled, ensure idempotent implementation. 924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism. 925 | - Return `exec_res`, which is passed to `post()`. 926 | 927 | 3. `post(shared, prep_res, exec_res)` 928 | - **Postprocess and write data** back to `shared`. 929 | - Examples: *update DB, change states, log results*. 930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*). 931 | 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately. 933 | > 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data. 935 | {: .note } 936 | 937 | ### Fault Tolerance & Retries 938 | 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node: 940 | 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry). 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off. 944 | 945 | ```python 946 | my_node = SummarizeFile(max_retries=3, wait=10) 947 | ``` 948 | 949 | When an exception occurs in `exec()`, the Node automatically retries until: 950 | 951 | - It either succeeds, or 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt. 953 | 954 | You can get the current retry times (0-based) from `self.cur_retry`. 955 | 956 | ```python 957 | class RetryNode(Node): 958 | def exec(self, prep_res): 959 | print(f"Retry {self.cur_retry} times") 960 | raise Exception("Failed") 961 | ``` 962 | 963 | ### Graceful Fallback 964 | 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override: 966 | 967 | ```python 968 | def exec_fallback(self, prep_res, exc): 969 | raise exc 970 | ``` 971 | 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`. 973 | 974 | ### Example: Summarize file 975 | 976 | ```python 977 | class SummarizeFile(Node): 978 | def prep(self, shared): 979 | return shared["data"] 980 | 981 | def exec(self, prep_res): 982 | if not prep_res: 983 | return "Empty file content" 984 | prompt = f"Summarize this text in 10 words: {prep_res}" 985 | summary = call_llm(prompt) # might fail 986 | return summary 987 | 988 | def exec_fallback(self, prep_res, exc): 989 | # Provide a simple fallback instead of crashing 990 | return "There was an error processing your request." 991 | 992 | def post(self, shared, prep_res, exec_res): 993 | shared["summary"] = exec_res 994 | # Return "default" by not returning 995 | 996 | summarize_node = SummarizeFile(max_retries=3) 997 | 998 | # node.run() calls prep->exec->post 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback() 1000 | action_result = summarize_node.run(shared) 1001 | 1002 | print("Action returned:", action_result) # "default" 1003 | print("Summary stored:", shared["summary"]) 1004 | ``` 1005 | 1006 | ================================================ 1007 | File: docs/core_abstraction/parallel.md 1008 | ================================================ 1009 | --- 1010 | layout: default 1011 | title: "(Advanced) Parallel" 1012 | parent: "Core Abstraction" 1013 | nav_order: 6 1014 | --- 1015 | 1016 | # (Advanced) Parallel 1017 | 1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 1019 | 1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O. 1021 | {: .warning } 1022 | 1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize. 1024 | > 1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals). 1026 | > 1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits. 1028 | {: .best-practice } 1029 | 1030 | ## AsyncParallelBatchNode 1031 | 1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**: 1033 | 1034 | ```python 1035 | class ParallelSummaries(AsyncParallelBatchNode): 1036 | async def prep_async(self, shared): 1037 | # e.g., multiple texts 1038 | return shared["texts"] 1039 | 1040 | async def exec_async(self, text): 1041 | prompt = f"Summarize: {text}" 1042 | return await call_llm_async(prompt) 1043 | 1044 | async def post_async(self, shared, prep_res, exec_res_list): 1045 | shared["summary"] = "\n\n".join(exec_res_list) 1046 | return "default" 1047 | 1048 | node = ParallelSummaries() 1049 | flow = AsyncFlow(start=node) 1050 | ``` 1051 | 1052 | ## AsyncParallelBatchFlow 1053 | 1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters: 1055 | 1056 | ```python 1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow): 1058 | async def prep_async(self, shared): 1059 | return [{"filename": f} for f in shared["files"]] 1060 | 1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile()) 1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow) 1063 | await parallel_flow.run_async(shared) 1064 | ``` 1065 | 1066 | ================================================ 1067 | File: docs/design_pattern/agent.md 1068 | ================================================ 1069 | --- 1070 | layout: default 1071 | title: "Agent" 1072 | parent: "Design Pattern" 1073 | nav_order: 1 1074 | --- 1075 | 1076 | # Agent 1077 | 1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context. 1079 | 1080 |
1081 | 1082 |
1083 | 1084 | ## Implement Agent with Graph 1085 | 1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions. 1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step. 1088 | 3. **Agent Node:** Provide a prompt to decide action—for example: 1089 | 1090 | ```python 1091 | f""" 1092 | ### CONTEXT 1093 | Task: {task_description} 1094 | Previous Actions: {previous_actions} 1095 | Current State: {current_state} 1096 | 1097 | ### ACTION SPACE 1098 | [1] search 1099 | Description: Use web search to get results 1100 | Parameters: 1101 | - query (str): What to search for 1102 | 1103 | [2] answer 1104 | Description: Conclude based on the results 1105 | Parameters: 1106 | - result (str): Final answer to provide 1107 | 1108 | ### NEXT ACTION 1109 | Decide the next action based on the current context and available action space. 1110 | Return your response in the following format: 1111 | 1112 | ```yaml 1113 | thinking: | 1114 | 1115 | action: 1116 | parameters: 1117 | : 1118 | ```""" 1119 | ``` 1120 | 1121 | The core of building **high-performance** and **reliable** agents boils down to: 1122 | 1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content. 1124 | 1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database. 1126 | 1127 | ## Example Good Action Design 1128 | 1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once. 1130 | 1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts). 1132 | 1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files. 1134 | 1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends. 1136 | 1137 | ## Example: Search Agent 1138 | 1139 | This agent: 1140 | 1. Decides whether to search or answer 1141 | 2. If searches, loops back to decide if more search needed 1142 | 3. Answers when enough context gathered 1143 | 1144 | ```python 1145 | class DecideAction(Node): 1146 | def prep(self, shared): 1147 | context = shared.get("context", "No previous search") 1148 | query = shared["query"] 1149 | return query, context 1150 | 1151 | def exec(self, inputs): 1152 | query, context = inputs 1153 | prompt = f""" 1154 | Given input: {query} 1155 | Previous search results: {context} 1156 | Should I: 1) Search web for more info 2) Answer with current knowledge 1157 | Output in yaml: 1158 | ```yaml 1159 | action: search/answer 1160 | reason: why this action 1161 | search_term: search phrase if action is search 1162 | ```""" 1163 | resp = call_llm(prompt) 1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip() 1165 | result = yaml.safe_load(yaml_str) 1166 | 1167 | assert isinstance(result, dict) 1168 | assert "action" in result 1169 | assert "reason" in result 1170 | assert result["action"] in ["search", "answer"] 1171 | if result["action"] == "search": 1172 | assert "search_term" in result 1173 | 1174 | return result 1175 | 1176 | def post(self, shared, prep_res, exec_res): 1177 | if exec_res["action"] == "search": 1178 | shared["search_term"] = exec_res["search_term"] 1179 | return exec_res["action"] 1180 | 1181 | class SearchWeb(Node): 1182 | def prep(self, shared): 1183 | return shared["search_term"] 1184 | 1185 | def exec(self, search_term): 1186 | return search_web(search_term) 1187 | 1188 | def post(self, shared, prep_res, exec_res): 1189 | prev_searches = shared.get("context", []) 1190 | shared["context"] = prev_searches + [ 1191 | {"term": shared["search_term"], "result": exec_res} 1192 | ] 1193 | return "decide" 1194 | 1195 | class DirectAnswer(Node): 1196 | def prep(self, shared): 1197 | return shared["query"], shared.get("context", "") 1198 | 1199 | def exec(self, inputs): 1200 | query, context = inputs 1201 | return call_llm(f"Context: {context}\nAnswer: {query}") 1202 | 1203 | def post(self, shared, prep_res, exec_res): 1204 | print(f"Answer: {exec_res}") 1205 | shared["answer"] = exec_res 1206 | 1207 | # Connect nodes 1208 | decide = DecideAction() 1209 | search = SearchWeb() 1210 | answer = DirectAnswer() 1211 | 1212 | decide - "search" >> search 1213 | decide - "answer" >> answer 1214 | search - "decide" >> decide # Loop back 1215 | 1216 | flow = Flow(start=decide) 1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"}) 1218 | ``` 1219 | 1220 | ================================================ 1221 | File: docs/design_pattern/mapreduce.md 1222 | ================================================ 1223 | --- 1224 | layout: default 1225 | title: "Map Reduce" 1226 | parent: "Design Pattern" 1227 | nav_order: 4 1228 | --- 1229 | 1230 | # Map Reduce 1231 | 1232 | MapReduce is a design pattern suitable when you have either: 1233 | - Large input data (e.g., multiple files to process), or 1234 | - Large output data (e.g., multiple forms to fill) 1235 | 1236 | and there is a logical way to break the task into smaller, ideally independent parts. 1237 | 1238 |
1239 | 1240 |
1241 | 1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase. 1243 | 1244 | ### Example: Document Summarization 1245 | 1246 | ```python 1247 | class SummarizeAllFiles(BatchNode): 1248 | def prep(self, shared): 1249 | files_dict = shared["files"] # e.g. 10 files 1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...] 1251 | 1252 | def exec(self, one_file): 1253 | filename, file_content = one_file 1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}") 1255 | return (filename, summary_text) 1256 | 1257 | def post(self, shared, prep_res, exec_res_list): 1258 | shared["file_summaries"] = dict(exec_res_list) 1259 | 1260 | class CombineSummaries(Node): 1261 | def prep(self, shared): 1262 | return shared["file_summaries"] 1263 | 1264 | def exec(self, file_summaries): 1265 | # format as: "File1: summary\nFile2: summary...\n" 1266 | text_list = [] 1267 | for fname, summ in file_summaries.items(): 1268 | text_list.append(f"{fname} summary:\n{summ}\n") 1269 | big_text = "\n---\n".join(text_list) 1270 | 1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}") 1272 | 1273 | def post(self, shared, prep_res, final_summary): 1274 | shared["all_files_summary"] = final_summary 1275 | 1276 | batch_node = SummarizeAllFiles() 1277 | combine_node = CombineSummaries() 1278 | batch_node >> combine_node 1279 | 1280 | flow = Flow(start=batch_node) 1281 | 1282 | shared = { 1283 | "files": { 1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...", 1285 | "file2.txt": "Some other interesting text ...", 1286 | # ... 1287 | } 1288 | } 1289 | flow.run(shared) 1290 | print("Individual Summaries:", shared["file_summaries"]) 1291 | print("\nFinal Summary:\n", shared["all_files_summary"]) 1292 | ``` 1293 | 1294 | ================================================ 1295 | File: docs/design_pattern/rag.md 1296 | ================================================ 1297 | --- 1298 | layout: default 1299 | title: "RAG" 1300 | parent: "Design Pattern" 1301 | nav_order: 3 1302 | --- 1303 | 1304 | # RAG (Retrieval Augmented Generation) 1305 | 1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline: 1307 | 1308 |
1309 | 1310 |
1311 | 1312 | 1. **Offline stage**: Preprocess and index documents ("building the index"). 1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context. 1314 | 1315 | --- 1316 | ## Stage 1: Offline Indexing 1317 | 1318 | We create three Nodes: 1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text. 1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk. 1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md). 1322 | 1323 | ```python 1324 | class ChunkDocs(BatchNode): 1325 | def prep(self, shared): 1326 | # A list of file paths in shared["files"]. We process each file. 1327 | return shared["files"] 1328 | 1329 | def exec(self, filepath): 1330 | # read file content. In real usage, do error handling. 1331 | with open(filepath, "r", encoding="utf-8") as f: 1332 | text = f.read() 1333 | # chunk by 100 chars each 1334 | chunks = [] 1335 | size = 100 1336 | for i in range(0, len(text), size): 1337 | chunks.append(text[i : i + size]) 1338 | return chunks 1339 | 1340 | def post(self, shared, prep_res, exec_res_list): 1341 | # exec_res_list is a list of chunk-lists, one per file. 1342 | # flatten them all into a single list of chunks. 1343 | all_chunks = [] 1344 | for chunk_list in exec_res_list: 1345 | all_chunks.extend(chunk_list) 1346 | shared["all_chunks"] = all_chunks 1347 | 1348 | class EmbedDocs(BatchNode): 1349 | def prep(self, shared): 1350 | return shared["all_chunks"] 1351 | 1352 | def exec(self, chunk): 1353 | return get_embedding(chunk) 1354 | 1355 | def post(self, shared, prep_res, exec_res_list): 1356 | # Store the list of embeddings. 1357 | shared["all_embeds"] = exec_res_list 1358 | print(f"Total embeddings: {len(exec_res_list)}") 1359 | 1360 | class StoreIndex(Node): 1361 | def prep(self, shared): 1362 | # We'll read all embeds from shared. 1363 | return shared["all_embeds"] 1364 | 1365 | def exec(self, all_embeds): 1366 | # Create a vector index (faiss or other DB in real usage). 1367 | index = create_index(all_embeds) 1368 | return index 1369 | 1370 | def post(self, shared, prep_res, index): 1371 | shared["index"] = index 1372 | 1373 | # Wire them in sequence 1374 | chunk_node = ChunkDocs() 1375 | embed_node = EmbedDocs() 1376 | store_node = StoreIndex() 1377 | 1378 | chunk_node >> embed_node >> store_node 1379 | 1380 | OfflineFlow = Flow(start=chunk_node) 1381 | ``` 1382 | 1383 | Usage example: 1384 | 1385 | ```python 1386 | shared = { 1387 | "files": ["doc1.txt", "doc2.txt"], # any text files 1388 | } 1389 | OfflineFlow.run(shared) 1390 | ``` 1391 | 1392 | --- 1393 | ## Stage 2: Online Query & Answer 1394 | 1395 | We have 3 nodes: 1396 | 1. `EmbedQuery` – embeds the user’s question. 1397 | 2. `RetrieveDocs` – retrieves top chunk from the index. 1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer. 1399 | 1400 | ```python 1401 | class EmbedQuery(Node): 1402 | def prep(self, shared): 1403 | return shared["question"] 1404 | 1405 | def exec(self, question): 1406 | return get_embedding(question) 1407 | 1408 | def post(self, shared, prep_res, q_emb): 1409 | shared["q_emb"] = q_emb 1410 | 1411 | class RetrieveDocs(Node): 1412 | def prep(self, shared): 1413 | # We'll need the query embedding, plus the offline index/chunks 1414 | return shared["q_emb"], shared["index"], shared["all_chunks"] 1415 | 1416 | def exec(self, inputs): 1417 | q_emb, index, chunks = inputs 1418 | I, D = search_index(index, q_emb, top_k=1) 1419 | best_id = I[0][0] 1420 | relevant_chunk = chunks[best_id] 1421 | return relevant_chunk 1422 | 1423 | def post(self, shared, prep_res, relevant_chunk): 1424 | shared["retrieved_chunk"] = relevant_chunk 1425 | print("Retrieved chunk:", relevant_chunk[:60], "...") 1426 | 1427 | class GenerateAnswer(Node): 1428 | def prep(self, shared): 1429 | return shared["question"], shared["retrieved_chunk"] 1430 | 1431 | def exec(self, inputs): 1432 | question, chunk = inputs 1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:" 1434 | return call_llm(prompt) 1435 | 1436 | def post(self, shared, prep_res, answer): 1437 | shared["answer"] = answer 1438 | print("Answer:", answer) 1439 | 1440 | embed_qnode = EmbedQuery() 1441 | retrieve_node = RetrieveDocs() 1442 | generate_node = GenerateAnswer() 1443 | 1444 | embed_qnode >> retrieve_node >> generate_node 1445 | OnlineFlow = Flow(start=embed_qnode) 1446 | ``` 1447 | 1448 | Usage example: 1449 | 1450 | ```python 1451 | # Suppose we already ran OfflineFlow and have: 1452 | # shared["all_chunks"], shared["index"], etc. 1453 | shared["question"] = "Why do people like cats?" 1454 | 1455 | OnlineFlow.run(shared) 1456 | # final answer in shared["answer"] 1457 | ``` 1458 | 1459 | ================================================ 1460 | File: docs/design_pattern/structure.md 1461 | ================================================ 1462 | --- 1463 | layout: default 1464 | title: "Structured Output" 1465 | parent: "Design Pattern" 1466 | nav_order: 5 1467 | --- 1468 | 1469 | # Structured Output 1470 | 1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys. 1472 | 1473 | There are several approaches to achieve a structured output: 1474 | - **Prompting** the LLM to strictly return a defined structure. 1475 | - Using LLMs that natively support **schema enforcement**. 1476 | - **Post-processing** the LLM's response to extract structured content. 1477 | 1478 | In practice, **Prompting** is simple and reliable for modern LLMs. 1479 | 1480 | ### Example Use Cases 1481 | 1482 | - Extracting Key Information 1483 | 1484 | ```yaml 1485 | product: 1486 | name: Widget Pro 1487 | price: 199.99 1488 | description: | 1489 | A high-quality widget designed for professionals. 1490 | Recommended for advanced users. 1491 | ``` 1492 | 1493 | - Summarizing Documents into Bullet Points 1494 | 1495 | ```yaml 1496 | summary: 1497 | - This product is easy to use. 1498 | - It is cost-effective. 1499 | - Suitable for all skill levels. 1500 | ``` 1501 | 1502 | - Generating Configuration Files 1503 | 1504 | ```yaml 1505 | server: 1506 | host: 127.0.0.1 1507 | port: 8080 1508 | ssl: true 1509 | ``` 1510 | 1511 | ## Prompt Engineering 1512 | 1513 | When prompting the LLM to produce **structured** output: 1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`). 1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry). 1516 | 1517 | ### Example Text Summarization 1518 | 1519 | ```python 1520 | class SummarizeNode(Node): 1521 | def exec(self, prep_res): 1522 | # Suppose `prep_res` is the text to summarize. 1523 | prompt = f""" 1524 | Please summarize the following text as YAML, with exactly 3 bullet points 1525 | 1526 | {prep_res} 1527 | 1528 | Now, output: 1529 | ```yaml 1530 | summary: 1531 | - bullet 1 1532 | - bullet 2 1533 | - bullet 3 1534 | ```""" 1535 | response = call_llm(prompt) 1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip() 1537 | 1538 | import yaml 1539 | structured_result = yaml.safe_load(yaml_str) 1540 | 1541 | assert "summary" in structured_result 1542 | assert isinstance(structured_result["summary"], list) 1543 | 1544 | return structured_result 1545 | ``` 1546 | 1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic) 1548 | {: .note } 1549 | 1550 | ### Why YAML instead of JSON? 1551 | 1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes. 1553 | 1554 | **In JSON** 1555 | 1556 | ```json 1557 | { 1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\"" 1559 | } 1560 | ``` 1561 | 1562 | - Every double quote inside the string must be escaped with `\"`. 1563 | - Each newline in the dialogue must be represented as `\n`. 1564 | 1565 | **In YAML** 1566 | 1567 | ```yaml 1568 | dialogue: | 1569 | Alice said: "Hello Bob. 1570 | How are you? 1571 | I am good." 1572 | ``` 1573 | 1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`). 1575 | - Newlines are naturally preserved without needing `\n`. 1576 | 1577 | ================================================ 1578 | File: docs/design_pattern/workflow.md 1579 | ================================================ 1580 | --- 1581 | layout: default 1582 | title: "Workflow" 1583 | parent: "Design Pattern" 1584 | nav_order: 2 1585 | --- 1586 | 1587 | # Workflow 1588 | 1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes. 1590 | 1591 |
1592 | 1593 |
1594 | 1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*. 1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*. 1597 | > 1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md). 1599 | {: .best-practice } 1600 | 1601 | ### Example: Article Writing 1602 | 1603 | ```python 1604 | class GenerateOutline(Node): 1605 | def prep(self, shared): return shared["topic"] 1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}") 1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res 1608 | 1609 | class WriteSection(Node): 1610 | def prep(self, shared): return shared["outline"] 1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}") 1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res 1613 | 1614 | class ReviewAndRefine(Node): 1615 | def prep(self, shared): return shared["draft"] 1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}") 1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res 1618 | 1619 | # Connect nodes 1620 | outline = GenerateOutline() 1621 | write = WriteSection() 1622 | review = ReviewAndRefine() 1623 | 1624 | outline >> write >> review 1625 | 1626 | # Create and run flow 1627 | writing_flow = Flow(start=outline) 1628 | shared = {"topic": "AI Safety"} 1629 | writing_flow.run(shared) 1630 | ``` 1631 | 1632 | For *dynamic cases*, consider using [Agents](./agent.md). 1633 | 1634 | ================================================ 1635 | File: docs/utility_function/llm.md 1636 | ================================================ 1637 | --- 1638 | layout: default 1639 | title: "LLM Wrapper" 1640 | parent: "Utility Function" 1641 | nav_order: 1 1642 | --- 1643 | 1644 | # LLM Wrappers 1645 | 1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 1647 | Here, we provide some minimal example implementations: 1648 | 1649 | 1. OpenAI 1650 | ```python 1651 | def call_llm(prompt): 1652 | from openai import OpenAI 1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1654 | r = client.chat.completions.create( 1655 | model="gpt-4o", 1656 | messages=[{"role": "user", "content": prompt}] 1657 | ) 1658 | return r.choices[0].message.content 1659 | 1660 | # Example usage 1661 | call_llm("How are you?") 1662 | ``` 1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security. 1664 | {: .best-practice } 1665 | 1666 | 2. Claude (Anthropic) 1667 | ```python 1668 | def call_llm(prompt): 1669 | from anthropic import Anthropic 1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE") 1671 | r = client.messages.create( 1672 | model="claude-sonnet-4-0", 1673 | messages=[ 1674 | {"role": "user", "content": prompt} 1675 | ] 1676 | ) 1677 | return r.content[0].text 1678 | ``` 1679 | 1680 | 3. Google (Generative AI Studio / PaLM API) 1681 | ```python 1682 | def call_llm(prompt): 1683 | from google import genai 1684 | client = genai.Client(api_key='GEMINI_API_KEY') 1685 | response = client.models.generate_content( 1686 | model='gemini-2.5-pro', 1687 | contents=prompt 1688 | ) 1689 | return response.text 1690 | ``` 1691 | 1692 | 4. Azure (Azure OpenAI) 1693 | ```python 1694 | def call_llm(prompt): 1695 | from openai import AzureOpenAI 1696 | client = AzureOpenAI( 1697 | azure_endpoint="https://.openai.azure.com/", 1698 | api_key="YOUR_API_KEY_HERE", 1699 | api_version="2023-05-15" 1700 | ) 1701 | r = client.chat.completions.create( 1702 | model="", 1703 | messages=[{"role": "user", "content": prompt}] 1704 | ) 1705 | return r.choices[0].message.content 1706 | ``` 1707 | 1708 | 5. Ollama (Local LLM) 1709 | ```python 1710 | def call_llm(prompt): 1711 | from ollama import chat 1712 | response = chat( 1713 | model="llama2", 1714 | messages=[{"role": "user", "content": prompt}] 1715 | ) 1716 | return response.message.content 1717 | ``` 1718 | 1719 | ## Improvements 1720 | Feel free to enhance your `call_llm` function as needed. Here are examples: 1721 | 1722 | - Handle chat history: 1723 | 1724 | ```python 1725 | def call_llm(messages): 1726 | from openai import OpenAI 1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1728 | r = client.chat.completions.create( 1729 | model="gpt-4o", 1730 | messages=messages 1731 | ) 1732 | return r.choices[0].message.content 1733 | ``` 1734 | 1735 | - Add in-memory caching 1736 | 1737 | ```python 1738 | from functools import lru_cache 1739 | 1740 | @lru_cache(maxsize=1000) 1741 | def call_llm(prompt): 1742 | # Your implementation here 1743 | pass 1744 | ``` 1745 | 1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result. 1747 | > 1748 | > To address this, you could use cached results only if not retried. 1749 | {: .warning } 1750 | 1751 | 1752 | ```python 1753 | from functools import lru_cache 1754 | 1755 | @lru_cache(maxsize=1000) 1756 | def cached_call(prompt): 1757 | pass 1758 | 1759 | def call_llm(prompt, use_cache): 1760 | if use_cache: 1761 | return cached_call(prompt) 1762 | # Call the underlying function directly 1763 | return cached_call.__wrapped__(prompt) 1764 | 1765 | class SummarizeNode(Node): 1766 | def exec(self, text): 1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0) 1768 | ``` 1769 | 1770 | - Enable logging: 1771 | 1772 | ```python 1773 | def call_llm(prompt): 1774 | import logging 1775 | logging.info(f"Prompt: {prompt}") 1776 | response = ... # Your implementation here 1777 | logging.info(f"Response: {response}") 1778 | return response 1779 | ``` -------------------------------------------------------------------------------- /.goosehints: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: "Agentic Coding" 4 | --- 5 | 6 | # Agentic Coding: Humans Design, Agents code! 7 | 8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification. 9 | {: .warning } 10 | 11 | ## Agentic Coding Steps 12 | 13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation: 14 | 15 | | Steps | Human | AI | Comment | 16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------| 17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. | 18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. | 19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. | 20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. | 21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. | 22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. | 23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. | 24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. | 25 | 26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 27 | - Understand AI systems' strengths and limitations: 28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails) 29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL) 30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning) 31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features. 32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early. 33 | 34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes. 35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)). 36 | - For each node in the flow, start with a high-level one-line description of what it does. 37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine). 38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions. 39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows. 40 | - Outline the flow and draw it in a mermaid diagram. For example: 41 | ```mermaid 42 | flowchart LR 43 | start[Start] --> batch[Batch] 44 | batch --> check[Check] 45 | check -->|OK| process 46 | check -->|Error| fix[Fix] 47 | fix --> check 48 | 49 | subgraph process[Process] 50 | step1[Step 1] --> step2[Step 2] 51 | end 52 | 53 | process --> endNode[End] 54 | ``` 55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition. 56 | {: .best-practice } 57 | 58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions. 59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world: 60 |
61 | 62 | - Reading inputs (e.g., retrieving Slack messages, reading emails) 63 | - Writing outputs (e.g., generating reports, sending emails) 64 | - Using external tools (e.g., calling LLMs, searching the web) 65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system. 66 | - For each utility function, implement it and write a simple test. 67 | - Document their input/output, as well as why they are necessary. For example: 68 | - `name`: `get_embedding` (`utils/get_embedding.py`) 69 | - `input`: `str` 70 | - `output`: a vector of 3072 floats 71 | - `necessity`: Used by the second node to embed text 72 | - Example utility implementation: 73 | ```python 74 | # utils/call_llm.py 75 | from openai import OpenAI 76 | 77 | def call_llm(prompt): 78 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 79 | r = client.chat.completions.create( 80 | model="gpt-4o", 81 | messages=[{"role": "user", "content": prompt}] 82 | ) 83 | return r.choices[0].message.content 84 | 85 | if __name__ == "__main__": 86 | prompt = "What is the meaning of life?" 87 | print(call_llm(prompt)) 88 | ``` 89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them. 90 | {: .best-practice } 91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures. 92 | {: .warning } 93 | 94 | 4. **Data Design**: Design the shared store that nodes will use to communicate. 95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data. 96 | - For simple systems, use an in-memory dictionary. 97 | - For more complex systems or when persistence is required, use a database. 98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys. 99 | - Example shared store design: 100 | ```python 101 | shared = { 102 | "user": { 103 | "id": "user123", 104 | "context": { # Another nested dict 105 | "weather": {"temp": 72, "condition": "sunny"}, 106 | "location": "San Francisco" 107 | } 108 | }, 109 | "results": {} # Empty dict to store outputs 110 | } 111 | ``` 112 | 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions. 114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example: 115 | - `type`: Regular (or Batch, or Async) 116 | - `prep`: Read "text" from the shared store 117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures. 118 | - `post`: Write "embedding" to the shared store 119 | 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design. 121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins! 122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking. 123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system. 124 | - Add logging throughout the code to facilitate debugging. 125 | 126 | 7. **Optimization**: 127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start. 128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts. 129 | - If your flow design is already solid, move on to micro-optimizations: 130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity. 131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone. 132 | 133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times. 134 | > 135 | >
136 | {: .best-practice } 137 | 138 | 8. **Reliability** 139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times. 140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging. 141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain. 142 | 143 | ## Example LLM Project File Structure 144 | 145 | ``` 146 | my_project/ 147 | ├── main.py 148 | ├── nodes.py 149 | ├── flow.py 150 | ├── utils/ 151 | │ ├── __init__.py 152 | │ ├── call_llm.py 153 | │ └── search_web.py 154 | ├── requirements.txt 155 | └── docs/ 156 | └── design.md 157 | ``` 158 | 159 | - **`requirements.txt`**: Lists the Python dependencies for the project. 160 | ``` 161 | PyYAML 162 | pocketflow 163 | ``` 164 | 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*. 166 | ~~~ 167 | # Design Doc: Your Project Name 168 | 169 | > Please DON'T remove notes for AI 170 | 171 | ## Requirements 172 | 173 | > Notes for AI: Keep it simple and clear. 174 | > If the requirements are abstract, write concrete user stories 175 | 176 | 177 | ## Flow Design 178 | 179 | > Notes for AI: 180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit. 181 | > 2. Present a concise, high-level description of the workflow. 182 | 183 | ### Applicable Design Pattern: 184 | 185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary. 186 | 2. Agentic file finder 187 | - *Context*: The entire summary of the file 188 | - *Action*: Find the file 189 | 190 | ### Flow high-level Design: 191 | 192 | 1. **First Node**: This node is for ... 193 | 2. **Second Node**: This node is for ... 194 | 3. **Third Node**: This node is for ... 195 | 196 | ```mermaid 197 | flowchart TD 198 | firstNode[First Node] --> secondNode[Second Node] 199 | secondNode --> thirdNode[Third Node] 200 | ``` 201 | ## Utility Functions 202 | 203 | > Notes for AI: 204 | > 1. Understand the utility function definition thoroughly by reviewing the doc. 205 | > 2. Include only the necessary utility functions, based on nodes in the flow. 206 | 207 | 1. **Call LLM** (`utils/call_llm.py`) 208 | - *Input*: prompt (str) 209 | - *Output*: response (str) 210 | - Generally used by most nodes for LLM tasks 211 | 212 | 2. **Embedding** (`utils/get_embedding.py`) 213 | - *Input*: str 214 | - *Output*: a vector of 3072 floats 215 | - Used by the second node to embed text 216 | 217 | ## Node Design 218 | 219 | ### Shared Store 220 | 221 | > Notes for AI: Try to minimize data redundancy 222 | 223 | The shared store structure is organized as follows: 224 | 225 | ```python 226 | shared = { 227 | "key": "value" 228 | } 229 | ``` 230 | 231 | ### Node Steps 232 | 233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow. 234 | 235 | 1. First Node 236 | - *Purpose*: Provide a short explanation of the node’s function 237 | - *Type*: Decide between Regular, Batch, or Async 238 | - *Steps*: 239 | - *prep*: Read "key" from the shared store 240 | - *exec*: Call the utility function 241 | - *post*: Write "key" to the shared store 242 | 243 | 2. Second Node 244 | ... 245 | ~~~ 246 | 247 | 248 | - **`utils/`**: Contains all utility functions. 249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`. 250 | - Each file should also include a `main()` function to try that API call 251 | ```python 252 | from google import genai 253 | import os 254 | 255 | def call_llm(prompt: str) -> str: 256 | client = genai.Client( 257 | api_key=os.getenv("GEMINI_API_KEY", ""), 258 | ) 259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash") 260 | response = client.models.generate_content(model=model, contents=[prompt]) 261 | return response.text 262 | 263 | if __name__ == "__main__": 264 | test_prompt = "Hello, how are you?" 265 | 266 | # First call - should hit the API 267 | print("Making call...") 268 | response1 = call_llm(test_prompt, use_cache=False) 269 | print(f"Response: {response1}") 270 | ``` 271 | 272 | - **`nodes.py`**: Contains all the node definitions. 273 | ```python 274 | # nodes.py 275 | from pocketflow import Node 276 | from utils.call_llm import call_llm 277 | 278 | class GetQuestionNode(Node): 279 | def exec(self, _): 280 | # Get question directly from user input 281 | user_question = input("Enter your question: ") 282 | return user_question 283 | 284 | def post(self, shared, prep_res, exec_res): 285 | # Store the user's question 286 | shared["question"] = exec_res 287 | return "default" # Go to the next node 288 | 289 | class AnswerNode(Node): 290 | def prep(self, shared): 291 | # Read question from shared 292 | return shared["question"] 293 | 294 | def exec(self, question): 295 | # Call LLM to get the answer 296 | return call_llm(question) 297 | 298 | def post(self, shared, prep_res, exec_res): 299 | # Store the answer in shared 300 | shared["answer"] = exec_res 301 | ``` 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them. 303 | ```python 304 | # flow.py 305 | from pocketflow import Flow 306 | from nodes import GetQuestionNode, AnswerNode 307 | 308 | def create_qa_flow(): 309 | """Create and return a question-answering flow.""" 310 | # Create nodes 311 | get_question_node = GetQuestionNode() 312 | answer_node = AnswerNode() 313 | 314 | # Connect nodes in sequence 315 | get_question_node >> answer_node 316 | 317 | # Create flow starting with input node 318 | return Flow(start=get_question_node) 319 | ``` 320 | - **`main.py`**: Serves as the project's entry point. 321 | ```python 322 | # main.py 323 | from flow import create_qa_flow 324 | 325 | # Example main function 326 | # Please replace this with your own main function 327 | def main(): 328 | shared = { 329 | "question": None, # Will be populated by GetQuestionNode from user input 330 | "answer": None # Will be populated by AnswerNode 331 | } 332 | 333 | # Create the flow and run it 334 | qa_flow = create_qa_flow() 335 | qa_flow.run(shared) 336 | print(f"Question: {shared['question']}") 337 | print(f"Answer: {shared['answer']}") 338 | 339 | if __name__ == "__main__": 340 | main() 341 | ``` 342 | 343 | ================================================ 344 | File: docs/index.md 345 | ================================================ 346 | --- 347 | layout: default 348 | title: "Home" 349 | nav_order: 1 350 | --- 351 | 352 | # Pocket Flow 353 | 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*. 355 | 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in. 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more. 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications. 359 | 360 |
361 | 362 |
363 | 364 | ## Core Abstraction 365 | 366 | We model the LLM workflow as a **Graph + Shared Store**: 367 | 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks. 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges). 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows. 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks. 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks. 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks. 374 | 375 |
376 | 377 |
378 | 379 | ## Design Pattern 380 | 381 | From there, it’s easy to implement popular design patterns: 382 | 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions. 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines. 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation. 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps. 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently. 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents. 389 | 390 |
391 | 392 |
393 | 394 | ## Utility Function 395 | 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*: 397 | 398 | - [LLM Wrapper](./utility_function/llm.md) 399 | - [Viz and Debug](./utility_function/viz.md) 400 | - [Web Search](./utility_function/websearch.md) 401 | - [Chunking](./utility_function/chunking.md) 402 | - [Embedding](./utility_function/embedding.md) 403 | - [Vector Databases](./utility_function/vector.md) 404 | - [Text-to-Speech](./utility_function/text_to_speech.md) 405 | 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework: 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs. 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally. 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in. 410 | 411 | ## Ready to build your Apps? 412 | 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow! 414 | 415 | ================================================ 416 | File: docs/core_abstraction/async.md 417 | ================================================ 418 | --- 419 | layout: default 420 | title: "(Advanced) Async" 421 | parent: "Core Abstraction" 422 | nav_order: 5 423 | --- 424 | 425 | # (Advanced) Async 426 | 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for: 428 | 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way. 430 | 2. **exec_async()**: Typically used for async LLM calls. 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`. 432 | 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes. 434 | 435 | ### Example 436 | 437 | ```python 438 | class SummarizeThenVerify(AsyncNode): 439 | async def prep_async(self, shared): 440 | # Example: read a file asynchronously 441 | doc_text = await read_file_async(shared["doc_path"]) 442 | return doc_text 443 | 444 | async def exec_async(self, prep_res): 445 | # Example: async LLM call 446 | summary = await call_llm_async(f"Summarize: {prep_res}") 447 | return summary 448 | 449 | async def post_async(self, shared, prep_res, exec_res): 450 | # Example: wait for user feedback 451 | decision = await gather_user_feedback(exec_res) 452 | if decision == "approve": 453 | shared["summary"] = exec_res 454 | return "approve" 455 | return "deny" 456 | 457 | summarize_node = SummarizeThenVerify() 458 | final_node = Finalize() 459 | 460 | # Define transitions 461 | summarize_node - "approve" >> final_node 462 | summarize_node - "deny" >> summarize_node # retry 463 | 464 | flow = AsyncFlow(start=summarize_node) 465 | 466 | async def main(): 467 | shared = {"doc_path": "document.txt"} 468 | await flow.run_async(shared) 469 | print("Final Summary:", shared.get("summary")) 470 | 471 | asyncio.run(main()) 472 | ``` 473 | 474 | ================================================ 475 | File: docs/core_abstraction/batch.md 476 | ================================================ 477 | --- 478 | layout: default 479 | title: "Batch" 480 | parent: "Core Abstraction" 481 | nav_order: 4 482 | --- 483 | 484 | # Batch 485 | 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases: 487 | - **Chunk-based** processing (e.g., splitting large texts). 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs). 489 | 490 | ## 1. BatchNode 491 | 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`: 493 | 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator). 495 | - **`exec(item)`**: called **once** per item in that iterable. 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**. 497 | 498 | 499 | ### Example: Summarize a Large File 500 | 501 | ```python 502 | class MapSummaries(BatchNode): 503 | def prep(self, shared): 504 | # Suppose we have a big file; chunk it 505 | content = shared["data"] 506 | chunk_size = 10000 507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)] 508 | return chunks 509 | 510 | def exec(self, chunk): 511 | prompt = f"Summarize this chunk in 10 words: {chunk}" 512 | summary = call_llm(prompt) 513 | return summary 514 | 515 | def post(self, shared, prep_res, exec_res_list): 516 | combined = "\n".join(exec_res_list) 517 | shared["summary"] = combined 518 | return "default" 519 | 520 | map_summaries = MapSummaries() 521 | flow = Flow(start=map_summaries) 522 | flow.run(shared) 523 | ``` 524 | 525 | --- 526 | 527 | ## 2. BatchFlow 528 | 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set. 530 | 531 | ### Example: Summarize Many Files 532 | 533 | ```python 534 | class SummarizeAllFiles(BatchFlow): 535 | def prep(self, shared): 536 | # Return a list of param dicts (one per file) 537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...] 538 | return [{"filename": fn} for fn in filenames] 539 | 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce): 541 | summarize_file = SummarizeFile(start=load_file) 542 | 543 | # Wrap that flow into a BatchFlow: 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file) 545 | summarize_all_files.run(shared) 546 | ``` 547 | 548 | ### Under the Hood 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`. 550 | 2. The **BatchFlow** loops through each dict. For each one: 551 | - It merges the dict with the BatchFlow’s own `params`. 552 | - It calls `flow.run(shared)` using the merged result. 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict. 554 | 555 | --- 556 | 557 | ## 3. Nested or Multi-Level Batches 558 | 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance: 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...). 561 | - **Inner** batch: returning a list of per-file param dicts. 562 | 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once. 564 | 565 | ```python 566 | 567 | class FileBatchFlow(BatchFlow): 568 | def prep(self, shared): 569 | directory = self.params["directory"] 570 | # e.g., files = ["file1.txt", "file2.txt", ...] 571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")] 572 | return [{"filename": f} for f in files] 573 | 574 | class DirectoryBatchFlow(BatchFlow): 575 | def prep(self, shared): 576 | directories = [ "/path/to/dirA", "/path/to/dirB"] 577 | return [{"directory": d} for d in directories] 578 | 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"} 580 | inner_flow = FileBatchFlow(start=MapSummaries()) 581 | outer_flow = DirectoryBatchFlow(start=inner_flow) 582 | ``` 583 | 584 | ================================================ 585 | File: docs/core_abstraction/communication.md 586 | ================================================ 587 | --- 588 | layout: default 589 | title: "Communication" 590 | parent: "Core Abstraction" 591 | nav_order: 3 592 | --- 593 | 594 | # Communication 595 | 596 | Nodes and Flows **communicate** in 2 ways: 597 | 598 | 1. **Shared Store (for almost all the cases)** 599 | 600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`). 601 | - Great for data results, large content, or anything multiple nodes need. 602 | - You shall design the data structure and populate it ahead. 603 | 604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md). 605 | {: .best-practice } 606 | 607 | 2. **Params (only for [Batch](./batch.md))** 608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**. 609 | - Good for identifiers like filenames or numeric IDs, in Batch mode. 610 | 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller). 612 | 613 | --- 614 | 615 | ## 1. Shared Store 616 | 617 | ### Overview 618 | 619 | A shared store is typically an in-mem dictionary, like: 620 | ```python 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...} 622 | ``` 623 | 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements. 625 | 626 | ### Example 627 | 628 | ```python 629 | class LoadData(Node): 630 | def post(self, shared, prep_res, exec_res): 631 | # We write data to shared store 632 | shared["data"] = "Some text content" 633 | return None 634 | 635 | class Summarize(Node): 636 | def prep(self, shared): 637 | # We read data from shared store 638 | return shared["data"] 639 | 640 | def exec(self, prep_res): 641 | # Call LLM to summarize 642 | prompt = f"Summarize: {prep_res}" 643 | summary = call_llm(prompt) 644 | return summary 645 | 646 | def post(self, shared, prep_res, exec_res): 647 | # We write summary to shared store 648 | shared["summary"] = exec_res 649 | return "default" 650 | 651 | load_data = LoadData() 652 | summarize = Summarize() 653 | load_data >> summarize 654 | flow = Flow(start=load_data) 655 | 656 | shared = {} 657 | flow.run(shared) 658 | ``` 659 | 660 | Here: 661 | - `LoadData` writes to `shared["data"]`. 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`. 663 | 664 | --- 665 | 666 | ## 2. Params 667 | 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are: 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`). 670 | - **Set** via `set_params()`. 671 | - **Cleared** and updated each time a parent Flow calls it. 672 | 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 674 | > 675 | > If you need to set child node params, see [Batch](./batch.md). 676 | {: .warning } 677 | 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store. 679 | 680 | ### Example 681 | 682 | ```python 683 | # 1) Create a Node that uses params 684 | class SummarizeFile(Node): 685 | def prep(self, shared): 686 | # Access the node's param 687 | filename = self.params["filename"] 688 | return shared["data"].get(filename, "") 689 | 690 | def exec(self, prep_res): 691 | prompt = f"Summarize: {prep_res}" 692 | return call_llm(prompt) 693 | 694 | def post(self, shared, prep_res, exec_res): 695 | filename = self.params["filename"] 696 | shared["summary"][filename] = exec_res 697 | return "default" 698 | 699 | # 2) Set params 700 | node = SummarizeFile() 701 | 702 | # 3) Set Node params directly (for testing) 703 | node.set_params({"filename": "doc1.txt"}) 704 | node.run(shared) 705 | 706 | # 4) Create Flow 707 | flow = Flow(start=node) 708 | 709 | # 5) Set Flow params (overwrites node params) 710 | flow.set_params({"filename": "doc2.txt"}) 711 | flow.run(shared) # The node summarizes doc2, not doc1 712 | ``` 713 | 714 | ================================================ 715 | File: docs/core_abstraction/flow.md 716 | ================================================ 717 | --- 718 | layout: default 719 | title: "Flow" 720 | parent: "Core Abstraction" 721 | nav_order: 2 722 | --- 723 | 724 | # Flow 725 | 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`. 727 | 728 | ## 1. Action-based Transitions 729 | 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`. 731 | 732 | You define transitions with the syntax: 733 | 734 | 1. **Basic default transition**: `node_a >> node_b` 735 | This means if `node_a.post()` returns `"default"`, go to `node_b`. 736 | (Equivalent to `node_a - "default" >> node_b`) 737 | 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b` 739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`. 740 | 741 | It's possible to create loops, branching, or multi-step flows. 742 | 743 | ## 2. Creating a Flow 744 | 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node. 746 | 747 | ### Example: Simple Sequence 748 | 749 | Here's a minimal flow of two nodes in a chain: 750 | 751 | ```python 752 | node_a >> node_b 753 | flow = Flow(start=node_a) 754 | flow.run(shared) 755 | ``` 756 | 757 | - When you run the flow, it executes `node_a`. 758 | - Suppose `node_a.post()` returns `"default"`. 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`. 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there. 761 | 762 | ### Example: Branching & Looping 763 | 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions: 765 | 766 | - `"approved"`: expense is approved, move to payment processing 767 | - `"needs_revision"`: expense needs changes, send back for revision 768 | - `"rejected"`: expense is denied, finish the process 769 | 770 | We can wire them like this: 771 | 772 | ```python 773 | # Define the flow connections 774 | review - "approved" >> payment # If approved, process payment 775 | review - "needs_revision" >> revise # If needs changes, go to revision 776 | review - "rejected" >> finish # If rejected, finish the process 777 | 778 | revise >> review # After revision, go back for another review 779 | payment >> finish # After payment, finish the process 780 | 781 | flow = Flow(start=review) 782 | ``` 783 | 784 | Let's see how it flows: 785 | 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review` 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops 789 | 790 | ```mermaid 791 | flowchart TD 792 | review[Review Expense] -->|approved| payment[Process Payment] 793 | review -->|needs_revision| revise[Revise Report] 794 | review -->|rejected| finish[Finish Process] 795 | 796 | revise --> review 797 | payment --> finish 798 | ``` 799 | 800 | ### Running Individual Nodes vs. Running a Flow 801 | 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue. 804 | 805 | > `node.run(shared)` **does not** proceed to the successor. 806 | > This is mainly for debugging or testing a single node. 807 | > 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly. 809 | {: .warning } 810 | 811 | ## 3. Nested Flows 812 | 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can: 814 | 815 | 1. Use a Flow as a Node within another Flow's transitions. 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse. 817 | 3. Node `params` will be a merging of **all** parents' `params`. 818 | 819 | ### Flow's Node Methods 820 | 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However: 822 | 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes. 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store. 825 | 826 | ### Basic Flow Nesting 827 | 828 | Here's how to connect a flow to another node: 829 | 830 | ```python 831 | # Create a sub-flow 832 | node_a >> node_b 833 | subflow = Flow(start=node_a) 834 | 835 | # Connect it to another node 836 | subflow >> node_c 837 | 838 | # Create the parent flow 839 | parent_flow = Flow(start=subflow) 840 | ``` 841 | 842 | When `parent_flow.run()` executes: 843 | 1. It starts `subflow` 844 | 2. `subflow` runs through its nodes (`node_a->node_b`) 845 | 3. After `subflow` completes, execution continues to `node_c` 846 | 847 | ### Example: Order Processing Pipeline 848 | 849 | Here's a practical example that breaks down order processing into nested flows: 850 | 851 | ```python 852 | # Payment processing sub-flow 853 | validate_payment >> process_payment >> payment_confirmation 854 | payment_flow = Flow(start=validate_payment) 855 | 856 | # Inventory sub-flow 857 | check_stock >> reserve_items >> update_inventory 858 | inventory_flow = Flow(start=check_stock) 859 | 860 | # Shipping sub-flow 861 | create_label >> assign_carrier >> schedule_pickup 862 | shipping_flow = Flow(start=create_label) 863 | 864 | # Connect the flows into a main order pipeline 865 | payment_flow >> inventory_flow >> shipping_flow 866 | 867 | # Create the master flow 868 | order_pipeline = Flow(start=payment_flow) 869 | 870 | # Run the entire pipeline 871 | order_pipeline.run(shared_data) 872 | ``` 873 | 874 | This creates a clean separation of concerns while maintaining a clear execution path: 875 | 876 | ```mermaid 877 | flowchart LR 878 | subgraph order_pipeline[Order Pipeline] 879 | subgraph paymentFlow["Payment Flow"] 880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation] 881 | end 882 | 883 | subgraph inventoryFlow["Inventory Flow"] 884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory] 885 | end 886 | 887 | subgraph shippingFlow["Shipping Flow"] 888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup] 889 | end 890 | 891 | paymentFlow --> inventoryFlow 892 | inventoryFlow --> shippingFlow 893 | end 894 | ``` 895 | 896 | ================================================ 897 | File: docs/core_abstraction/node.md 898 | ================================================ 899 | --- 900 | layout: default 901 | title: "Node" 902 | parent: "Core Abstraction" 903 | nav_order: 1 904 | --- 905 | 906 | # Node 907 | 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`: 909 | 910 |
911 | 912 |
913 | 914 | 1. `prep(shared)` 915 | - **Read and preprocess data** from `shared` store. 916 | - Examples: *query DB, read files, or serialize data into a string*. 917 | - Return `prep_res`, which is used by `exec()` and `post()`. 918 | 919 | 2. `exec(prep_res)` 920 | - **Execute compute logic**, with optional retries and error handling (below). 921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*. 922 | - ⚠️ This shall be only for compute and **NOT** access `shared`. 923 | - ⚠️ If retries enabled, ensure idempotent implementation. 924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism. 925 | - Return `exec_res`, which is passed to `post()`. 926 | 927 | 3. `post(shared, prep_res, exec_res)` 928 | - **Postprocess and write data** back to `shared`. 929 | - Examples: *update DB, change states, log results*. 930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*). 931 | 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately. 933 | > 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data. 935 | {: .note } 936 | 937 | ### Fault Tolerance & Retries 938 | 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node: 940 | 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry). 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off. 944 | 945 | ```python 946 | my_node = SummarizeFile(max_retries=3, wait=10) 947 | ``` 948 | 949 | When an exception occurs in `exec()`, the Node automatically retries until: 950 | 951 | - It either succeeds, or 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt. 953 | 954 | You can get the current retry times (0-based) from `self.cur_retry`. 955 | 956 | ```python 957 | class RetryNode(Node): 958 | def exec(self, prep_res): 959 | print(f"Retry {self.cur_retry} times") 960 | raise Exception("Failed") 961 | ``` 962 | 963 | ### Graceful Fallback 964 | 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override: 966 | 967 | ```python 968 | def exec_fallback(self, prep_res, exc): 969 | raise exc 970 | ``` 971 | 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`. 973 | 974 | ### Example: Summarize file 975 | 976 | ```python 977 | class SummarizeFile(Node): 978 | def prep(self, shared): 979 | return shared["data"] 980 | 981 | def exec(self, prep_res): 982 | if not prep_res: 983 | return "Empty file content" 984 | prompt = f"Summarize this text in 10 words: {prep_res}" 985 | summary = call_llm(prompt) # might fail 986 | return summary 987 | 988 | def exec_fallback(self, prep_res, exc): 989 | # Provide a simple fallback instead of crashing 990 | return "There was an error processing your request." 991 | 992 | def post(self, shared, prep_res, exec_res): 993 | shared["summary"] = exec_res 994 | # Return "default" by not returning 995 | 996 | summarize_node = SummarizeFile(max_retries=3) 997 | 998 | # node.run() calls prep->exec->post 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback() 1000 | action_result = summarize_node.run(shared) 1001 | 1002 | print("Action returned:", action_result) # "default" 1003 | print("Summary stored:", shared["summary"]) 1004 | ``` 1005 | 1006 | ================================================ 1007 | File: docs/core_abstraction/parallel.md 1008 | ================================================ 1009 | --- 1010 | layout: default 1011 | title: "(Advanced) Parallel" 1012 | parent: "Core Abstraction" 1013 | nav_order: 6 1014 | --- 1015 | 1016 | # (Advanced) Parallel 1017 | 1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 1019 | 1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O. 1021 | {: .warning } 1022 | 1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize. 1024 | > 1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals). 1026 | > 1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits. 1028 | {: .best-practice } 1029 | 1030 | ## AsyncParallelBatchNode 1031 | 1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**: 1033 | 1034 | ```python 1035 | class ParallelSummaries(AsyncParallelBatchNode): 1036 | async def prep_async(self, shared): 1037 | # e.g., multiple texts 1038 | return shared["texts"] 1039 | 1040 | async def exec_async(self, text): 1041 | prompt = f"Summarize: {text}" 1042 | return await call_llm_async(prompt) 1043 | 1044 | async def post_async(self, shared, prep_res, exec_res_list): 1045 | shared["summary"] = "\n\n".join(exec_res_list) 1046 | return "default" 1047 | 1048 | node = ParallelSummaries() 1049 | flow = AsyncFlow(start=node) 1050 | ``` 1051 | 1052 | ## AsyncParallelBatchFlow 1053 | 1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters: 1055 | 1056 | ```python 1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow): 1058 | async def prep_async(self, shared): 1059 | return [{"filename": f} for f in shared["files"]] 1060 | 1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile()) 1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow) 1063 | await parallel_flow.run_async(shared) 1064 | ``` 1065 | 1066 | ================================================ 1067 | File: docs/design_pattern/agent.md 1068 | ================================================ 1069 | --- 1070 | layout: default 1071 | title: "Agent" 1072 | parent: "Design Pattern" 1073 | nav_order: 1 1074 | --- 1075 | 1076 | # Agent 1077 | 1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context. 1079 | 1080 |
1081 | 1082 |
1083 | 1084 | ## Implement Agent with Graph 1085 | 1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions. 1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step. 1088 | 3. **Agent Node:** Provide a prompt to decide action—for example: 1089 | 1090 | ```python 1091 | f""" 1092 | ### CONTEXT 1093 | Task: {task_description} 1094 | Previous Actions: {previous_actions} 1095 | Current State: {current_state} 1096 | 1097 | ### ACTION SPACE 1098 | [1] search 1099 | Description: Use web search to get results 1100 | Parameters: 1101 | - query (str): What to search for 1102 | 1103 | [2] answer 1104 | Description: Conclude based on the results 1105 | Parameters: 1106 | - result (str): Final answer to provide 1107 | 1108 | ### NEXT ACTION 1109 | Decide the next action based on the current context and available action space. 1110 | Return your response in the following format: 1111 | 1112 | ```yaml 1113 | thinking: | 1114 | 1115 | action: 1116 | parameters: 1117 | : 1118 | ```""" 1119 | ``` 1120 | 1121 | The core of building **high-performance** and **reliable** agents boils down to: 1122 | 1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content. 1124 | 1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database. 1126 | 1127 | ## Example Good Action Design 1128 | 1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once. 1130 | 1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts). 1132 | 1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files. 1134 | 1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends. 1136 | 1137 | ## Example: Search Agent 1138 | 1139 | This agent: 1140 | 1. Decides whether to search or answer 1141 | 2. If searches, loops back to decide if more search needed 1142 | 3. Answers when enough context gathered 1143 | 1144 | ```python 1145 | class DecideAction(Node): 1146 | def prep(self, shared): 1147 | context = shared.get("context", "No previous search") 1148 | query = shared["query"] 1149 | return query, context 1150 | 1151 | def exec(self, inputs): 1152 | query, context = inputs 1153 | prompt = f""" 1154 | Given input: {query} 1155 | Previous search results: {context} 1156 | Should I: 1) Search web for more info 2) Answer with current knowledge 1157 | Output in yaml: 1158 | ```yaml 1159 | action: search/answer 1160 | reason: why this action 1161 | search_term: search phrase if action is search 1162 | ```""" 1163 | resp = call_llm(prompt) 1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip() 1165 | result = yaml.safe_load(yaml_str) 1166 | 1167 | assert isinstance(result, dict) 1168 | assert "action" in result 1169 | assert "reason" in result 1170 | assert result["action"] in ["search", "answer"] 1171 | if result["action"] == "search": 1172 | assert "search_term" in result 1173 | 1174 | return result 1175 | 1176 | def post(self, shared, prep_res, exec_res): 1177 | if exec_res["action"] == "search": 1178 | shared["search_term"] = exec_res["search_term"] 1179 | return exec_res["action"] 1180 | 1181 | class SearchWeb(Node): 1182 | def prep(self, shared): 1183 | return shared["search_term"] 1184 | 1185 | def exec(self, search_term): 1186 | return search_web(search_term) 1187 | 1188 | def post(self, shared, prep_res, exec_res): 1189 | prev_searches = shared.get("context", []) 1190 | shared["context"] = prev_searches + [ 1191 | {"term": shared["search_term"], "result": exec_res} 1192 | ] 1193 | return "decide" 1194 | 1195 | class DirectAnswer(Node): 1196 | def prep(self, shared): 1197 | return shared["query"], shared.get("context", "") 1198 | 1199 | def exec(self, inputs): 1200 | query, context = inputs 1201 | return call_llm(f"Context: {context}\nAnswer: {query}") 1202 | 1203 | def post(self, shared, prep_res, exec_res): 1204 | print(f"Answer: {exec_res}") 1205 | shared["answer"] = exec_res 1206 | 1207 | # Connect nodes 1208 | decide = DecideAction() 1209 | search = SearchWeb() 1210 | answer = DirectAnswer() 1211 | 1212 | decide - "search" >> search 1213 | decide - "answer" >> answer 1214 | search - "decide" >> decide # Loop back 1215 | 1216 | flow = Flow(start=decide) 1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"}) 1218 | ``` 1219 | 1220 | ================================================ 1221 | File: docs/design_pattern/mapreduce.md 1222 | ================================================ 1223 | --- 1224 | layout: default 1225 | title: "Map Reduce" 1226 | parent: "Design Pattern" 1227 | nav_order: 4 1228 | --- 1229 | 1230 | # Map Reduce 1231 | 1232 | MapReduce is a design pattern suitable when you have either: 1233 | - Large input data (e.g., multiple files to process), or 1234 | - Large output data (e.g., multiple forms to fill) 1235 | 1236 | and there is a logical way to break the task into smaller, ideally independent parts. 1237 | 1238 |
1239 | 1240 |
1241 | 1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase. 1243 | 1244 | ### Example: Document Summarization 1245 | 1246 | ```python 1247 | class SummarizeAllFiles(BatchNode): 1248 | def prep(self, shared): 1249 | files_dict = shared["files"] # e.g. 10 files 1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...] 1251 | 1252 | def exec(self, one_file): 1253 | filename, file_content = one_file 1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}") 1255 | return (filename, summary_text) 1256 | 1257 | def post(self, shared, prep_res, exec_res_list): 1258 | shared["file_summaries"] = dict(exec_res_list) 1259 | 1260 | class CombineSummaries(Node): 1261 | def prep(self, shared): 1262 | return shared["file_summaries"] 1263 | 1264 | def exec(self, file_summaries): 1265 | # format as: "File1: summary\nFile2: summary...\n" 1266 | text_list = [] 1267 | for fname, summ in file_summaries.items(): 1268 | text_list.append(f"{fname} summary:\n{summ}\n") 1269 | big_text = "\n---\n".join(text_list) 1270 | 1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}") 1272 | 1273 | def post(self, shared, prep_res, final_summary): 1274 | shared["all_files_summary"] = final_summary 1275 | 1276 | batch_node = SummarizeAllFiles() 1277 | combine_node = CombineSummaries() 1278 | batch_node >> combine_node 1279 | 1280 | flow = Flow(start=batch_node) 1281 | 1282 | shared = { 1283 | "files": { 1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...", 1285 | "file2.txt": "Some other interesting text ...", 1286 | # ... 1287 | } 1288 | } 1289 | flow.run(shared) 1290 | print("Individual Summaries:", shared["file_summaries"]) 1291 | print("\nFinal Summary:\n", shared["all_files_summary"]) 1292 | ``` 1293 | 1294 | ================================================ 1295 | File: docs/design_pattern/rag.md 1296 | ================================================ 1297 | --- 1298 | layout: default 1299 | title: "RAG" 1300 | parent: "Design Pattern" 1301 | nav_order: 3 1302 | --- 1303 | 1304 | # RAG (Retrieval Augmented Generation) 1305 | 1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline: 1307 | 1308 |
1309 | 1310 |
1311 | 1312 | 1. **Offline stage**: Preprocess and index documents ("building the index"). 1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context. 1314 | 1315 | --- 1316 | ## Stage 1: Offline Indexing 1317 | 1318 | We create three Nodes: 1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text. 1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk. 1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md). 1322 | 1323 | ```python 1324 | class ChunkDocs(BatchNode): 1325 | def prep(self, shared): 1326 | # A list of file paths in shared["files"]. We process each file. 1327 | return shared["files"] 1328 | 1329 | def exec(self, filepath): 1330 | # read file content. In real usage, do error handling. 1331 | with open(filepath, "r", encoding="utf-8") as f: 1332 | text = f.read() 1333 | # chunk by 100 chars each 1334 | chunks = [] 1335 | size = 100 1336 | for i in range(0, len(text), size): 1337 | chunks.append(text[i : i + size]) 1338 | return chunks 1339 | 1340 | def post(self, shared, prep_res, exec_res_list): 1341 | # exec_res_list is a list of chunk-lists, one per file. 1342 | # flatten them all into a single list of chunks. 1343 | all_chunks = [] 1344 | for chunk_list in exec_res_list: 1345 | all_chunks.extend(chunk_list) 1346 | shared["all_chunks"] = all_chunks 1347 | 1348 | class EmbedDocs(BatchNode): 1349 | def prep(self, shared): 1350 | return shared["all_chunks"] 1351 | 1352 | def exec(self, chunk): 1353 | return get_embedding(chunk) 1354 | 1355 | def post(self, shared, prep_res, exec_res_list): 1356 | # Store the list of embeddings. 1357 | shared["all_embeds"] = exec_res_list 1358 | print(f"Total embeddings: {len(exec_res_list)}") 1359 | 1360 | class StoreIndex(Node): 1361 | def prep(self, shared): 1362 | # We'll read all embeds from shared. 1363 | return shared["all_embeds"] 1364 | 1365 | def exec(self, all_embeds): 1366 | # Create a vector index (faiss or other DB in real usage). 1367 | index = create_index(all_embeds) 1368 | return index 1369 | 1370 | def post(self, shared, prep_res, index): 1371 | shared["index"] = index 1372 | 1373 | # Wire them in sequence 1374 | chunk_node = ChunkDocs() 1375 | embed_node = EmbedDocs() 1376 | store_node = StoreIndex() 1377 | 1378 | chunk_node >> embed_node >> store_node 1379 | 1380 | OfflineFlow = Flow(start=chunk_node) 1381 | ``` 1382 | 1383 | Usage example: 1384 | 1385 | ```python 1386 | shared = { 1387 | "files": ["doc1.txt", "doc2.txt"], # any text files 1388 | } 1389 | OfflineFlow.run(shared) 1390 | ``` 1391 | 1392 | --- 1393 | ## Stage 2: Online Query & Answer 1394 | 1395 | We have 3 nodes: 1396 | 1. `EmbedQuery` – embeds the user’s question. 1397 | 2. `RetrieveDocs` – retrieves top chunk from the index. 1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer. 1399 | 1400 | ```python 1401 | class EmbedQuery(Node): 1402 | def prep(self, shared): 1403 | return shared["question"] 1404 | 1405 | def exec(self, question): 1406 | return get_embedding(question) 1407 | 1408 | def post(self, shared, prep_res, q_emb): 1409 | shared["q_emb"] = q_emb 1410 | 1411 | class RetrieveDocs(Node): 1412 | def prep(self, shared): 1413 | # We'll need the query embedding, plus the offline index/chunks 1414 | return shared["q_emb"], shared["index"], shared["all_chunks"] 1415 | 1416 | def exec(self, inputs): 1417 | q_emb, index, chunks = inputs 1418 | I, D = search_index(index, q_emb, top_k=1) 1419 | best_id = I[0][0] 1420 | relevant_chunk = chunks[best_id] 1421 | return relevant_chunk 1422 | 1423 | def post(self, shared, prep_res, relevant_chunk): 1424 | shared["retrieved_chunk"] = relevant_chunk 1425 | print("Retrieved chunk:", relevant_chunk[:60], "...") 1426 | 1427 | class GenerateAnswer(Node): 1428 | def prep(self, shared): 1429 | return shared["question"], shared["retrieved_chunk"] 1430 | 1431 | def exec(self, inputs): 1432 | question, chunk = inputs 1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:" 1434 | return call_llm(prompt) 1435 | 1436 | def post(self, shared, prep_res, answer): 1437 | shared["answer"] = answer 1438 | print("Answer:", answer) 1439 | 1440 | embed_qnode = EmbedQuery() 1441 | retrieve_node = RetrieveDocs() 1442 | generate_node = GenerateAnswer() 1443 | 1444 | embed_qnode >> retrieve_node >> generate_node 1445 | OnlineFlow = Flow(start=embed_qnode) 1446 | ``` 1447 | 1448 | Usage example: 1449 | 1450 | ```python 1451 | # Suppose we already ran OfflineFlow and have: 1452 | # shared["all_chunks"], shared["index"], etc. 1453 | shared["question"] = "Why do people like cats?" 1454 | 1455 | OnlineFlow.run(shared) 1456 | # final answer in shared["answer"] 1457 | ``` 1458 | 1459 | ================================================ 1460 | File: docs/design_pattern/structure.md 1461 | ================================================ 1462 | --- 1463 | layout: default 1464 | title: "Structured Output" 1465 | parent: "Design Pattern" 1466 | nav_order: 5 1467 | --- 1468 | 1469 | # Structured Output 1470 | 1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys. 1472 | 1473 | There are several approaches to achieve a structured output: 1474 | - **Prompting** the LLM to strictly return a defined structure. 1475 | - Using LLMs that natively support **schema enforcement**. 1476 | - **Post-processing** the LLM's response to extract structured content. 1477 | 1478 | In practice, **Prompting** is simple and reliable for modern LLMs. 1479 | 1480 | ### Example Use Cases 1481 | 1482 | - Extracting Key Information 1483 | 1484 | ```yaml 1485 | product: 1486 | name: Widget Pro 1487 | price: 199.99 1488 | description: | 1489 | A high-quality widget designed for professionals. 1490 | Recommended for advanced users. 1491 | ``` 1492 | 1493 | - Summarizing Documents into Bullet Points 1494 | 1495 | ```yaml 1496 | summary: 1497 | - This product is easy to use. 1498 | - It is cost-effective. 1499 | - Suitable for all skill levels. 1500 | ``` 1501 | 1502 | - Generating Configuration Files 1503 | 1504 | ```yaml 1505 | server: 1506 | host: 127.0.0.1 1507 | port: 8080 1508 | ssl: true 1509 | ``` 1510 | 1511 | ## Prompt Engineering 1512 | 1513 | When prompting the LLM to produce **structured** output: 1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`). 1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry). 1516 | 1517 | ### Example Text Summarization 1518 | 1519 | ```python 1520 | class SummarizeNode(Node): 1521 | def exec(self, prep_res): 1522 | # Suppose `prep_res` is the text to summarize. 1523 | prompt = f""" 1524 | Please summarize the following text as YAML, with exactly 3 bullet points 1525 | 1526 | {prep_res} 1527 | 1528 | Now, output: 1529 | ```yaml 1530 | summary: 1531 | - bullet 1 1532 | - bullet 2 1533 | - bullet 3 1534 | ```""" 1535 | response = call_llm(prompt) 1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip() 1537 | 1538 | import yaml 1539 | structured_result = yaml.safe_load(yaml_str) 1540 | 1541 | assert "summary" in structured_result 1542 | assert isinstance(structured_result["summary"], list) 1543 | 1544 | return structured_result 1545 | ``` 1546 | 1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic) 1548 | {: .note } 1549 | 1550 | ### Why YAML instead of JSON? 1551 | 1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes. 1553 | 1554 | **In JSON** 1555 | 1556 | ```json 1557 | { 1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\"" 1559 | } 1560 | ``` 1561 | 1562 | - Every double quote inside the string must be escaped with `\"`. 1563 | - Each newline in the dialogue must be represented as `\n`. 1564 | 1565 | **In YAML** 1566 | 1567 | ```yaml 1568 | dialogue: | 1569 | Alice said: "Hello Bob. 1570 | How are you? 1571 | I am good." 1572 | ``` 1573 | 1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`). 1575 | - Newlines are naturally preserved without needing `\n`. 1576 | 1577 | ================================================ 1578 | File: docs/design_pattern/workflow.md 1579 | ================================================ 1580 | --- 1581 | layout: default 1582 | title: "Workflow" 1583 | parent: "Design Pattern" 1584 | nav_order: 2 1585 | --- 1586 | 1587 | # Workflow 1588 | 1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes. 1590 | 1591 |
1592 | 1593 |
1594 | 1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*. 1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*. 1597 | > 1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md). 1599 | {: .best-practice } 1600 | 1601 | ### Example: Article Writing 1602 | 1603 | ```python 1604 | class GenerateOutline(Node): 1605 | def prep(self, shared): return shared["topic"] 1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}") 1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res 1608 | 1609 | class WriteSection(Node): 1610 | def prep(self, shared): return shared["outline"] 1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}") 1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res 1613 | 1614 | class ReviewAndRefine(Node): 1615 | def prep(self, shared): return shared["draft"] 1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}") 1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res 1618 | 1619 | # Connect nodes 1620 | outline = GenerateOutline() 1621 | write = WriteSection() 1622 | review = ReviewAndRefine() 1623 | 1624 | outline >> write >> review 1625 | 1626 | # Create and run flow 1627 | writing_flow = Flow(start=outline) 1628 | shared = {"topic": "AI Safety"} 1629 | writing_flow.run(shared) 1630 | ``` 1631 | 1632 | For *dynamic cases*, consider using [Agents](./agent.md). 1633 | 1634 | ================================================ 1635 | File: docs/utility_function/llm.md 1636 | ================================================ 1637 | --- 1638 | layout: default 1639 | title: "LLM Wrapper" 1640 | parent: "Utility Function" 1641 | nav_order: 1 1642 | --- 1643 | 1644 | # LLM Wrappers 1645 | 1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 1647 | Here, we provide some minimal example implementations: 1648 | 1649 | 1. OpenAI 1650 | ```python 1651 | def call_llm(prompt): 1652 | from openai import OpenAI 1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1654 | r = client.chat.completions.create( 1655 | model="gpt-4o", 1656 | messages=[{"role": "user", "content": prompt}] 1657 | ) 1658 | return r.choices[0].message.content 1659 | 1660 | # Example usage 1661 | call_llm("How are you?") 1662 | ``` 1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security. 1664 | {: .best-practice } 1665 | 1666 | 2. Claude (Anthropic) 1667 | ```python 1668 | def call_llm(prompt): 1669 | from anthropic import Anthropic 1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE") 1671 | r = client.messages.create( 1672 | model="claude-sonnet-4-0", 1673 | messages=[ 1674 | {"role": "user", "content": prompt} 1675 | ] 1676 | ) 1677 | return r.content[0].text 1678 | ``` 1679 | 1680 | 3. Google (Generative AI Studio / PaLM API) 1681 | ```python 1682 | def call_llm(prompt): 1683 | from google import genai 1684 | client = genai.Client(api_key='GEMINI_API_KEY') 1685 | response = client.models.generate_content( 1686 | model='gemini-2.5-pro', 1687 | contents=prompt 1688 | ) 1689 | return response.text 1690 | ``` 1691 | 1692 | 4. Azure (Azure OpenAI) 1693 | ```python 1694 | def call_llm(prompt): 1695 | from openai import AzureOpenAI 1696 | client = AzureOpenAI( 1697 | azure_endpoint="https://.openai.azure.com/", 1698 | api_key="YOUR_API_KEY_HERE", 1699 | api_version="2023-05-15" 1700 | ) 1701 | r = client.chat.completions.create( 1702 | model="", 1703 | messages=[{"role": "user", "content": prompt}] 1704 | ) 1705 | return r.choices[0].message.content 1706 | ``` 1707 | 1708 | 5. Ollama (Local LLM) 1709 | ```python 1710 | def call_llm(prompt): 1711 | from ollama import chat 1712 | response = chat( 1713 | model="llama2", 1714 | messages=[{"role": "user", "content": prompt}] 1715 | ) 1716 | return response.message.content 1717 | ``` 1718 | 1719 | ## Improvements 1720 | Feel free to enhance your `call_llm` function as needed. Here are examples: 1721 | 1722 | - Handle chat history: 1723 | 1724 | ```python 1725 | def call_llm(messages): 1726 | from openai import OpenAI 1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE") 1728 | r = client.chat.completions.create( 1729 | model="gpt-4o", 1730 | messages=messages 1731 | ) 1732 | return r.choices[0].message.content 1733 | ``` 1734 | 1735 | - Add in-memory caching 1736 | 1737 | ```python 1738 | from functools import lru_cache 1739 | 1740 | @lru_cache(maxsize=1000) 1741 | def call_llm(prompt): 1742 | # Your implementation here 1743 | pass 1744 | ``` 1745 | 1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result. 1747 | > 1748 | > To address this, you could use cached results only if not retried. 1749 | {: .warning } 1750 | 1751 | 1752 | ```python 1753 | from functools import lru_cache 1754 | 1755 | @lru_cache(maxsize=1000) 1756 | def cached_call(prompt): 1757 | pass 1758 | 1759 | def call_llm(prompt, use_cache): 1760 | if use_cache: 1761 | return cached_call(prompt) 1762 | # Call the underlying function directly 1763 | return cached_call.__wrapped__(prompt) 1764 | 1765 | class SummarizeNode(Node): 1766 | def exec(self, text): 1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0) 1768 | ``` 1769 | 1770 | - Enable logging: 1771 | 1772 | ```python 1773 | def call_llm(prompt): 1774 | import logging 1775 | logging.info(f"Prompt: {prompt}") 1776 | response = ... # Your implementation here 1777 | logging.info(f"Response: {response}") 1778 | return response 1779 | ``` --------------------------------------------------------------------------------