├── utils
├── __init__.py
└── call_llm.py
├── requirements.txt
├── assets
└── banner.png
├── main.py
├── flow.py
├── nodes.py
├── README.md
├── .gitignore
├── docs
└── design.md
├── .clinerules
├── .cursorrules
└── .goosehints
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pocketflow>=0.0.1
--------------------------------------------------------------------------------
/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Pocket/PocketFlow-Template-Python/main/assets/banner.png
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | from flow import create_qa_flow
2 |
3 | # Example main function
4 | # Please replace this with your own main function
5 | def main():
6 | shared = {
7 | "question": "In one sentence, what's the end of universe?",
8 | "answer": None
9 | }
10 |
11 | qa_flow = create_qa_flow()
12 | qa_flow.run(shared)
13 | print("Question:", shared["question"])
14 | print("Answer:", shared["answer"])
15 |
16 | if __name__ == "__main__":
17 | main()
18 |
--------------------------------------------------------------------------------
/flow.py:
--------------------------------------------------------------------------------
1 | from pocketflow import Flow
2 | from nodes import GetQuestionNode, AnswerNode
3 |
4 | def create_qa_flow():
5 | """Create and return a question-answering flow."""
6 | # Create nodes
7 | get_question_node = GetQuestionNode()
8 | answer_node = AnswerNode()
9 |
10 | # Connect nodes in sequence
11 | get_question_node >> answer_node
12 |
13 | # Create flow starting with input node
14 | return Flow(start=get_question_node)
15 |
16 | qa_flow = create_qa_flow()
--------------------------------------------------------------------------------
/utils/call_llm.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | import os
3 |
4 | # Learn more about calling the LLM: https://the-pocket.github.io/PocketFlow/utility_function/llm.html
5 | def call_llm(prompt):
6 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
7 | r = client.chat.completions.create(
8 | model="gpt-4o",
9 | messages=[{"role": "user", "content": prompt}]
10 | )
11 | return r.choices[0].message.content
12 |
13 | if __name__ == "__main__":
14 | prompt = "What is the meaning of life?"
15 | print(call_llm(prompt))
16 |
--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
1 | from pocketflow import Node
2 | from utils.call_llm import call_llm
3 |
4 | class GetQuestionNode(Node):
5 | def exec(self, _):
6 | # Get question directly from user input
7 | user_question = input("Enter your question: ")
8 | return user_question
9 |
10 | def post(self, shared, prep_res, exec_res):
11 | # Store the user's question
12 | shared["question"] = exec_res
13 | return "default" # Go to the next node
14 |
15 | class AnswerNode(Node):
16 | def prep(self, shared):
17 | # Read question from shared
18 | return shared["question"]
19 |
20 | def exec(self, question):
21 | # Call LLM to get the answer
22 | return call_llm(question)
23 |
24 | def post(self, shared, prep_res, exec_res):
25 | # Store the answer in shared
26 | shared["answer"] = exec_res
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Pocket Flow Project Template: Agentic Coding
2 |
3 |
4 |
5 |
8 |
9 |
10 |
11 | This is a project template for Agentic Coding with [Pocket Flow](https://github.com/The-Pocket/PocketFlow), a 100-line LLM framework, and your editor of choice.
12 |
13 | - We have included rules files for various AI coding assistants to help you build LLM projects:
14 | - [.cursorrules](.cursorrules) for Cursor AI
15 | - [.clinerules](.clinerules) for Cline
16 | - [.windsurfrules](.windsurfrules) for Windsurf
17 | - [.goosehints](.goosehints) for Goose
18 | - Configuration in [.github](.github) for GitHub Copilot
19 | - [CLAUDE.md](CLAUDE.md) for Claude Code
20 | - [GEMINI.md](GEMINI.md) for Gemini
21 |
22 | - Want to learn how to build LLM projects with Agentic Coding?
23 |
24 | - Check out the [Agentic Coding Guidance](https://the-pocket.github.io/PocketFlow/guide.html)
25 |
26 | - Check out the [YouTube Tutorial](https://www.youtube.com/@ZacharyLLM?sub_confirmation=1)
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | node_modules/
3 | vendor/
4 | .pnp/
5 | .pnp.js
6 |
7 | # Build outputs
8 | dist/
9 | build/
10 | out/
11 | *.pyc
12 | __pycache__/
13 |
14 | # Environment files
15 | .env
16 | .env.local
17 | .env.*.local
18 | .env.development
19 | .env.test
20 | .env.production
21 |
22 | # IDE - VSCode
23 | .vscode/*
24 | !.vscode/settings.json
25 | !.vscode/tasks.json
26 | !.vscode/launch.json
27 | !.vscode/extensions.json
28 |
29 | # IDE - JetBrains
30 | .idea/
31 | *.iml
32 | *.iws
33 | *.ipr
34 |
35 | # IDE - Eclipse
36 | .project
37 | .classpath
38 | .settings/
39 |
40 | # Logs
41 | logs/
42 | *.log
43 | npm-debug.log*
44 | yarn-debug.log*
45 | yarn-error.log*
46 |
47 | # Operating System
48 | .DS_Store
49 | Thumbs.db
50 | *.swp
51 | *.swo
52 |
53 | # Testing
54 | coverage/
55 | .nyc_output/
56 |
57 | # Temporary files
58 | *.tmp
59 | *.temp
60 | .cache/
61 |
62 | # Compiled files
63 | *.com
64 | *.class
65 | *.dll
66 | *.exe
67 | *.o
68 | *.so
69 |
70 | # Package files
71 | *.7z
72 | *.dmg
73 | *.gz
74 | *.iso
75 | *.jar
76 | *.rar
77 | *.tar
78 | *.zip
79 |
80 | # Database
81 | *.sqlite
82 | *.sqlite3
83 | *.db
84 |
85 | # Optional npm cache directory
86 | .npm
87 |
88 | # Optional eslint cache
89 | .eslintcache
90 |
91 | # Optional REPL history
92 | .node_repl_history
--------------------------------------------------------------------------------
/docs/design.md:
--------------------------------------------------------------------------------
1 | # Design Doc: Your Project Name
2 |
3 | > Please DON'T remove notes for AI
4 |
5 | ## Requirements
6 |
7 | > Notes for AI: Keep it simple and clear.
8 | > If the requirements are abstract, write concrete user stories
9 |
10 |
11 | ## Flow Design
12 |
13 | > Notes for AI:
14 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
15 | > 2. Present a concise, high-level description of the workflow.
16 |
17 | ### Applicable Design Pattern:
18 |
19 | 1. Map the file summary into chunks, then reduce these chunks into a final summary.
20 | 2. Agentic file finder
21 | - *Context*: The entire summary of the file
22 | - *Action*: Find the file
23 |
24 | ### Flow high-level Design:
25 |
26 | 1. **First Node**: This node is for ...
27 | 2. **Second Node**: This node is for ...
28 | 3. **Third Node**: This node is for ...
29 |
30 | ```mermaid
31 | flowchart TD
32 | firstNode[First Node] --> secondNode[Second Node]
33 | secondNode --> thirdNode[Third Node]
34 | ```
35 | ## Utility Functions
36 |
37 | > Notes for AI:
38 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
39 | > 2. Include only the necessary utility functions, based on nodes in the flow.
40 |
41 | 1. **Call LLM** (`utils/call_llm.py`)
42 | - *Input*: prompt (str)
43 | - *Output*: response (str)
44 | - Generally used by most nodes for LLM tasks
45 |
46 | 2. **Embedding** (`utils/get_embedding.py`)
47 | - *Input*: str
48 | - *Output*: a vector of 3072 floats
49 | - Used by the second node to embed text
50 |
51 | ## Node Design
52 |
53 | ### Shared Store
54 |
55 | > Notes for AI: Try to minimize data redundancy
56 |
57 | The shared store structure is organized as follows:
58 |
59 | ```python
60 | shared = {
61 | "key": "value"
62 | }
63 | ```
64 |
65 | ### Node Steps
66 |
67 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
68 |
69 | 1. First Node
70 | - *Purpose*: Provide a short explanation of the node’s function
71 | - *Type*: Decide between Regular, Batch, or Async
72 | - *Steps*:
73 | - *prep*: Read "key" from the shared store
74 | - *exec*: Call the utility function
75 | - *post*: Write "key" to the shared store
76 |
77 | 2. Second Node
78 | ...
79 |
80 |
--------------------------------------------------------------------------------
/.clinerules:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: "Agentic Coding"
4 | ---
5 |
6 | # Agentic Coding: Humans Design, Agents code!
7 |
8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
9 | {: .warning }
10 |
11 | ## Agentic Coding Steps
12 |
13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
14 |
15 | | Steps | Human | AI | Comment |
16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. |
18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. |
19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. |
21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. |
22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. |
23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. |
25 |
26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit.
27 | - Understand AI systems' strengths and limitations:
28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
33 |
34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
36 | - For each node in the flow, start with a high-level one-line description of what it does.
37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
40 | - Outline the flow and draw it in a mermaid diagram. For example:
41 | ```mermaid
42 | flowchart LR
43 | start[Start] --> batch[Batch]
44 | batch --> check[Check]
45 | check -->|OK| process
46 | check -->|Error| fix[Fix]
47 | fix --> check
48 |
49 | subgraph process[Process]
50 | step1[Step 1] --> step2[Step 2]
51 | end
52 |
53 | process --> endNode[End]
54 | ```
55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.
56 | {: .best-practice }
57 |
58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
60 |
61 |
62 | - Reading inputs (e.g., retrieving Slack messages, reading emails)
63 | - Writing outputs (e.g., generating reports, sending emails)
64 | - Using external tools (e.g., calling LLMs, searching the web)
65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
66 | - For each utility function, implement it and write a simple test.
67 | - Document their input/output, as well as why they are necessary. For example:
68 | - `name`: `get_embedding` (`utils/get_embedding.py`)
69 | - `input`: `str`
70 | - `output`: a vector of 3072 floats
71 | - `necessity`: Used by the second node to embed text
72 | - Example utility implementation:
73 | ```python
74 | # utils/call_llm.py
75 | from openai import OpenAI
76 |
77 | def call_llm(prompt):
78 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
79 | r = client.chat.completions.create(
80 | model="gpt-4o",
81 | messages=[{"role": "user", "content": prompt}]
82 | )
83 | return r.choices[0].message.content
84 |
85 | if __name__ == "__main__":
86 | prompt = "What is the meaning of life?"
87 | print(call_llm(prompt))
88 | ```
89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
90 | {: .best-practice }
91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
92 | {: .warning }
93 |
94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
96 | - For simple systems, use an in-memory dictionary.
97 | - For more complex systems or when persistence is required, use a database.
98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
99 | - Example shared store design:
100 | ```python
101 | shared = {
102 | "user": {
103 | "id": "user123",
104 | "context": { # Another nested dict
105 | "weather": {"temp": 72, "condition": "sunny"},
106 | "location": "San Francisco"
107 | }
108 | },
109 | "results": {} # Empty dict to store outputs
110 | }
111 | ```
112 |
113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
115 | - `type`: Regular (or Batch, or Async)
116 | - `prep`: Read "text" from the shared store
117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
118 | - `post`: Write "embedding" to the shared store
119 |
120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
124 | - Add logging throughout the code to facilitate debugging.
125 |
126 | 7. **Optimization**:
127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
129 | - If your flow design is already solid, move on to micro-optimizations:
130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
132 |
133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
134 | >
135 | >
136 | {: .best-practice }
137 |
138 | 8. **Reliability**
139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
142 |
143 | ## Example LLM Project File Structure
144 |
145 | ```
146 | my_project/
147 | ├── main.py
148 | ├── nodes.py
149 | ├── flow.py
150 | ├── utils/
151 | │ ├── __init__.py
152 | │ ├── call_llm.py
153 | │ └── search_web.py
154 | ├── requirements.txt
155 | └── docs/
156 | └── design.md
157 | ```
158 |
159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
160 | ```
161 | PyYAML
162 | pocketflow
163 | ```
164 |
165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
166 | ~~~
167 | # Design Doc: Your Project Name
168 |
169 | > Please DON'T remove notes for AI
170 |
171 | ## Requirements
172 |
173 | > Notes for AI: Keep it simple and clear.
174 | > If the requirements are abstract, write concrete user stories
175 |
176 |
177 | ## Flow Design
178 |
179 | > Notes for AI:
180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
181 | > 2. Present a concise, high-level description of the workflow.
182 |
183 | ### Applicable Design Pattern:
184 |
185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary.
186 | 2. Agentic file finder
187 | - *Context*: The entire summary of the file
188 | - *Action*: Find the file
189 |
190 | ### Flow high-level Design:
191 |
192 | 1. **First Node**: This node is for ...
193 | 2. **Second Node**: This node is for ...
194 | 3. **Third Node**: This node is for ...
195 |
196 | ```mermaid
197 | flowchart TD
198 | firstNode[First Node] --> secondNode[Second Node]
199 | secondNode --> thirdNode[Third Node]
200 | ```
201 | ## Utility Functions
202 |
203 | > Notes for AI:
204 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
205 | > 2. Include only the necessary utility functions, based on nodes in the flow.
206 |
207 | 1. **Call LLM** (`utils/call_llm.py`)
208 | - *Input*: prompt (str)
209 | - *Output*: response (str)
210 | - Generally used by most nodes for LLM tasks
211 |
212 | 2. **Embedding** (`utils/get_embedding.py`)
213 | - *Input*: str
214 | - *Output*: a vector of 3072 floats
215 | - Used by the second node to embed text
216 |
217 | ## Node Design
218 |
219 | ### Shared Store
220 |
221 | > Notes for AI: Try to minimize data redundancy
222 |
223 | The shared store structure is organized as follows:
224 |
225 | ```python
226 | shared = {
227 | "key": "value"
228 | }
229 | ```
230 |
231 | ### Node Steps
232 |
233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
234 |
235 | 1. First Node
236 | - *Purpose*: Provide a short explanation of the node’s function
237 | - *Type*: Decide between Regular, Batch, or Async
238 | - *Steps*:
239 | - *prep*: Read "key" from the shared store
240 | - *exec*: Call the utility function
241 | - *post*: Write "key" to the shared store
242 |
243 | 2. Second Node
244 | ...
245 | ~~~
246 |
247 |
248 | - **`utils/`**: Contains all utility functions.
249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
250 | - Each file should also include a `main()` function to try that API call
251 | ```python
252 | from google import genai
253 | import os
254 |
255 | def call_llm(prompt: str) -> str:
256 | client = genai.Client(
257 | api_key=os.getenv("GEMINI_API_KEY", ""),
258 | )
259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
260 | response = client.models.generate_content(model=model, contents=[prompt])
261 | return response.text
262 |
263 | if __name__ == "__main__":
264 | test_prompt = "Hello, how are you?"
265 |
266 | # First call - should hit the API
267 | print("Making call...")
268 | response1 = call_llm(test_prompt, use_cache=False)
269 | print(f"Response: {response1}")
270 | ```
271 |
272 | - **`nodes.py`**: Contains all the node definitions.
273 | ```python
274 | # nodes.py
275 | from pocketflow import Node
276 | from utils.call_llm import call_llm
277 |
278 | class GetQuestionNode(Node):
279 | def exec(self, _):
280 | # Get question directly from user input
281 | user_question = input("Enter your question: ")
282 | return user_question
283 |
284 | def post(self, shared, prep_res, exec_res):
285 | # Store the user's question
286 | shared["question"] = exec_res
287 | return "default" # Go to the next node
288 |
289 | class AnswerNode(Node):
290 | def prep(self, shared):
291 | # Read question from shared
292 | return shared["question"]
293 |
294 | def exec(self, question):
295 | # Call LLM to get the answer
296 | return call_llm(question)
297 |
298 | def post(self, shared, prep_res, exec_res):
299 | # Store the answer in shared
300 | shared["answer"] = exec_res
301 | ```
302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
303 | ```python
304 | # flow.py
305 | from pocketflow import Flow
306 | from nodes import GetQuestionNode, AnswerNode
307 |
308 | def create_qa_flow():
309 | """Create and return a question-answering flow."""
310 | # Create nodes
311 | get_question_node = GetQuestionNode()
312 | answer_node = AnswerNode()
313 |
314 | # Connect nodes in sequence
315 | get_question_node >> answer_node
316 |
317 | # Create flow starting with input node
318 | return Flow(start=get_question_node)
319 | ```
320 | - **`main.py`**: Serves as the project's entry point.
321 | ```python
322 | # main.py
323 | from flow import create_qa_flow
324 |
325 | # Example main function
326 | # Please replace this with your own main function
327 | def main():
328 | shared = {
329 | "question": None, # Will be populated by GetQuestionNode from user input
330 | "answer": None # Will be populated by AnswerNode
331 | }
332 |
333 | # Create the flow and run it
334 | qa_flow = create_qa_flow()
335 | qa_flow.run(shared)
336 | print(f"Question: {shared['question']}")
337 | print(f"Answer: {shared['answer']}")
338 |
339 | if __name__ == "__main__":
340 | main()
341 | ```
342 |
343 | ================================================
344 | File: docs/index.md
345 | ================================================
346 | ---
347 | layout: default
348 | title: "Home"
349 | nav_order: 1
350 | ---
351 |
352 | # Pocket Flow
353 |
354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
355 |
356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.
358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
359 |
360 |
361 |

362 |
363 |
364 | ## Core Abstraction
365 |
366 | We model the LLM workflow as a **Graph + Shared Store**:
367 |
368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
374 |
375 |
376 |

377 |
378 |
379 | ## Design Pattern
380 |
381 | From there, it’s easy to implement popular design patterns:
382 |
383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
389 |
390 |
391 |

392 |
393 |
394 | ## Utility Function
395 |
396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
397 |
398 | - [LLM Wrapper](./utility_function/llm.md)
399 | - [Viz and Debug](./utility_function/viz.md)
400 | - [Web Search](./utility_function/websearch.md)
401 | - [Chunking](./utility_function/chunking.md)
402 | - [Embedding](./utility_function/embedding.md)
403 | - [Vector Databases](./utility_function/vector.md)
404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
405 |
406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
410 |
411 | ## Ready to build your Apps?
412 |
413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
414 |
415 | ================================================
416 | File: docs/core_abstraction/async.md
417 | ================================================
418 | ---
419 | layout: default
420 | title: "(Advanced) Async"
421 | parent: "Core Abstraction"
422 | nav_order: 5
423 | ---
424 |
425 | # (Advanced) Async
426 |
427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
428 |
429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
430 | 2. **exec_async()**: Typically used for async LLM calls.
431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
432 |
433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
434 |
435 | ### Example
436 |
437 | ```python
438 | class SummarizeThenVerify(AsyncNode):
439 | async def prep_async(self, shared):
440 | # Example: read a file asynchronously
441 | doc_text = await read_file_async(shared["doc_path"])
442 | return doc_text
443 |
444 | async def exec_async(self, prep_res):
445 | # Example: async LLM call
446 | summary = await call_llm_async(f"Summarize: {prep_res}")
447 | return summary
448 |
449 | async def post_async(self, shared, prep_res, exec_res):
450 | # Example: wait for user feedback
451 | decision = await gather_user_feedback(exec_res)
452 | if decision == "approve":
453 | shared["summary"] = exec_res
454 | return "approve"
455 | return "deny"
456 |
457 | summarize_node = SummarizeThenVerify()
458 | final_node = Finalize()
459 |
460 | # Define transitions
461 | summarize_node - "approve" >> final_node
462 | summarize_node - "deny" >> summarize_node # retry
463 |
464 | flow = AsyncFlow(start=summarize_node)
465 |
466 | async def main():
467 | shared = {"doc_path": "document.txt"}
468 | await flow.run_async(shared)
469 | print("Final Summary:", shared.get("summary"))
470 |
471 | asyncio.run(main())
472 | ```
473 |
474 | ================================================
475 | File: docs/core_abstraction/batch.md
476 | ================================================
477 | ---
478 | layout: default
479 | title: "Batch"
480 | parent: "Core Abstraction"
481 | nav_order: 4
482 | ---
483 |
484 | # Batch
485 |
486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
487 | - **Chunk-based** processing (e.g., splitting large texts).
488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
489 |
490 | ## 1. BatchNode
491 |
492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
493 |
494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
495 | - **`exec(item)`**: called **once** per item in that iterable.
496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
497 |
498 |
499 | ### Example: Summarize a Large File
500 |
501 | ```python
502 | class MapSummaries(BatchNode):
503 | def prep(self, shared):
504 | # Suppose we have a big file; chunk it
505 | content = shared["data"]
506 | chunk_size = 10000
507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
508 | return chunks
509 |
510 | def exec(self, chunk):
511 | prompt = f"Summarize this chunk in 10 words: {chunk}"
512 | summary = call_llm(prompt)
513 | return summary
514 |
515 | def post(self, shared, prep_res, exec_res_list):
516 | combined = "\n".join(exec_res_list)
517 | shared["summary"] = combined
518 | return "default"
519 |
520 | map_summaries = MapSummaries()
521 | flow = Flow(start=map_summaries)
522 | flow.run(shared)
523 | ```
524 |
525 | ---
526 |
527 | ## 2. BatchFlow
528 |
529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
530 |
531 | ### Example: Summarize Many Files
532 |
533 | ```python
534 | class SummarizeAllFiles(BatchFlow):
535 | def prep(self, shared):
536 | # Return a list of param dicts (one per file)
537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
538 | return [{"filename": fn} for fn in filenames]
539 |
540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
541 | summarize_file = SummarizeFile(start=load_file)
542 |
543 | # Wrap that flow into a BatchFlow:
544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
545 | summarize_all_files.run(shared)
546 | ```
547 |
548 | ### Under the Hood
549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
550 | 2. The **BatchFlow** loops through each dict. For each one:
551 | - It merges the dict with the BatchFlow’s own `params`.
552 | - It calls `flow.run(shared)` using the merged result.
553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
554 |
555 | ---
556 |
557 | ## 3. Nested or Multi-Level Batches
558 |
559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
561 | - **Inner** batch: returning a list of per-file param dicts.
562 |
563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
564 |
565 | ```python
566 |
567 | class FileBatchFlow(BatchFlow):
568 | def prep(self, shared):
569 | directory = self.params["directory"]
570 | # e.g., files = ["file1.txt", "file2.txt", ...]
571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")]
572 | return [{"filename": f} for f in files]
573 |
574 | class DirectoryBatchFlow(BatchFlow):
575 | def prep(self, shared):
576 | directories = [ "/path/to/dirA", "/path/to/dirB"]
577 | return [{"directory": d} for d in directories]
578 |
579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
580 | inner_flow = FileBatchFlow(start=MapSummaries())
581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
582 | ```
583 |
584 | ================================================
585 | File: docs/core_abstraction/communication.md
586 | ================================================
587 | ---
588 | layout: default
589 | title: "Communication"
590 | parent: "Core Abstraction"
591 | nav_order: 3
592 | ---
593 |
594 | # Communication
595 |
596 | Nodes and Flows **communicate** in 2 ways:
597 |
598 | 1. **Shared Store (for almost all the cases)**
599 |
600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).
601 | - Great for data results, large content, or anything multiple nodes need.
602 | - You shall design the data structure and populate it ahead.
603 |
604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
605 | {: .best-practice }
606 |
607 | 2. **Params (only for [Batch](./batch.md))**
608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
609 | - Good for identifiers like filenames or numeric IDs, in Batch mode.
610 |
611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
612 |
613 | ---
614 |
615 | ## 1. Shared Store
616 |
617 | ### Overview
618 |
619 | A shared store is typically an in-mem dictionary, like:
620 | ```python
621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
622 | ```
623 |
624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
625 |
626 | ### Example
627 |
628 | ```python
629 | class LoadData(Node):
630 | def post(self, shared, prep_res, exec_res):
631 | # We write data to shared store
632 | shared["data"] = "Some text content"
633 | return None
634 |
635 | class Summarize(Node):
636 | def prep(self, shared):
637 | # We read data from shared store
638 | return shared["data"]
639 |
640 | def exec(self, prep_res):
641 | # Call LLM to summarize
642 | prompt = f"Summarize: {prep_res}"
643 | summary = call_llm(prompt)
644 | return summary
645 |
646 | def post(self, shared, prep_res, exec_res):
647 | # We write summary to shared store
648 | shared["summary"] = exec_res
649 | return "default"
650 |
651 | load_data = LoadData()
652 | summarize = Summarize()
653 | load_data >> summarize
654 | flow = Flow(start=load_data)
655 |
656 | shared = {}
657 | flow.run(shared)
658 | ```
659 |
660 | Here:
661 | - `LoadData` writes to `shared["data"]`.
662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
663 |
664 | ---
665 |
666 | ## 2. Params
667 |
668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
670 | - **Set** via `set_params()`.
671 | - **Cleared** and updated each time a parent Flow calls it.
672 |
673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow.
674 | >
675 | > If you need to set child node params, see [Batch](./batch.md).
676 | {: .warning }
677 |
678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
679 |
680 | ### Example
681 |
682 | ```python
683 | # 1) Create a Node that uses params
684 | class SummarizeFile(Node):
685 | def prep(self, shared):
686 | # Access the node's param
687 | filename = self.params["filename"]
688 | return shared["data"].get(filename, "")
689 |
690 | def exec(self, prep_res):
691 | prompt = f"Summarize: {prep_res}"
692 | return call_llm(prompt)
693 |
694 | def post(self, shared, prep_res, exec_res):
695 | filename = self.params["filename"]
696 | shared["summary"][filename] = exec_res
697 | return "default"
698 |
699 | # 2) Set params
700 | node = SummarizeFile()
701 |
702 | # 3) Set Node params directly (for testing)
703 | node.set_params({"filename": "doc1.txt"})
704 | node.run(shared)
705 |
706 | # 4) Create Flow
707 | flow = Flow(start=node)
708 |
709 | # 5) Set Flow params (overwrites node params)
710 | flow.set_params({"filename": "doc2.txt"})
711 | flow.run(shared) # The node summarizes doc2, not doc1
712 | ```
713 |
714 | ================================================
715 | File: docs/core_abstraction/flow.md
716 | ================================================
717 | ---
718 | layout: default
719 | title: "Flow"
720 | parent: "Core Abstraction"
721 | nav_order: 2
722 | ---
723 |
724 | # Flow
725 |
726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
727 |
728 | ## 1. Action-based Transitions
729 |
730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
731 |
732 | You define transitions with the syntax:
733 |
734 | 1. **Basic default transition**: `node_a >> node_b`
735 | This means if `node_a.post()` returns `"default"`, go to `node_b`.
736 | (Equivalent to `node_a - "default" >> node_b`)
737 |
738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
740 |
741 | It's possible to create loops, branching, or multi-step flows.
742 |
743 | ## 2. Creating a Flow
744 |
745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
746 |
747 | ### Example: Simple Sequence
748 |
749 | Here's a minimal flow of two nodes in a chain:
750 |
751 | ```python
752 | node_a >> node_b
753 | flow = Flow(start=node_a)
754 | flow.run(shared)
755 | ```
756 |
757 | - When you run the flow, it executes `node_a`.
758 | - Suppose `node_a.post()` returns `"default"`.
759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.
760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
761 |
762 | ### Example: Branching & Looping
763 |
764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
765 |
766 | - `"approved"`: expense is approved, move to payment processing
767 | - `"needs_revision"`: expense needs changes, send back for revision
768 | - `"rejected"`: expense is denied, finish the process
769 |
770 | We can wire them like this:
771 |
772 | ```python
773 | # Define the flow connections
774 | review - "approved" >> payment # If approved, process payment
775 | review - "needs_revision" >> revise # If needs changes, go to revision
776 | review - "rejected" >> finish # If rejected, finish the process
777 |
778 | revise >> review # After revision, go back for another review
779 | payment >> finish # After payment, finish the process
780 |
781 | flow = Flow(start=review)
782 | ```
783 |
784 | Let's see how it flows:
785 |
786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
789 |
790 | ```mermaid
791 | flowchart TD
792 | review[Review Expense] -->|approved| payment[Process Payment]
793 | review -->|needs_revision| revise[Revise Report]
794 | review -->|rejected| finish[Finish Process]
795 |
796 | revise --> review
797 | payment --> finish
798 | ```
799 |
800 | ### Running Individual Nodes vs. Running a Flow
801 |
802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action.
803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
804 |
805 | > `node.run(shared)` **does not** proceed to the successor.
806 | > This is mainly for debugging or testing a single node.
807 | >
808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
809 | {: .warning }
810 |
811 | ## 3. Nested Flows
812 |
813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
814 |
815 | 1. Use a Flow as a Node within another Flow's transitions.
816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.
817 | 3. Node `params` will be a merging of **all** parents' `params`.
818 |
819 | ### Flow's Node Methods
820 |
821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
822 |
823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
825 |
826 | ### Basic Flow Nesting
827 |
828 | Here's how to connect a flow to another node:
829 |
830 | ```python
831 | # Create a sub-flow
832 | node_a >> node_b
833 | subflow = Flow(start=node_a)
834 |
835 | # Connect it to another node
836 | subflow >> node_c
837 |
838 | # Create the parent flow
839 | parent_flow = Flow(start=subflow)
840 | ```
841 |
842 | When `parent_flow.run()` executes:
843 | 1. It starts `subflow`
844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
845 | 3. After `subflow` completes, execution continues to `node_c`
846 |
847 | ### Example: Order Processing Pipeline
848 |
849 | Here's a practical example that breaks down order processing into nested flows:
850 |
851 | ```python
852 | # Payment processing sub-flow
853 | validate_payment >> process_payment >> payment_confirmation
854 | payment_flow = Flow(start=validate_payment)
855 |
856 | # Inventory sub-flow
857 | check_stock >> reserve_items >> update_inventory
858 | inventory_flow = Flow(start=check_stock)
859 |
860 | # Shipping sub-flow
861 | create_label >> assign_carrier >> schedule_pickup
862 | shipping_flow = Flow(start=create_label)
863 |
864 | # Connect the flows into a main order pipeline
865 | payment_flow >> inventory_flow >> shipping_flow
866 |
867 | # Create the master flow
868 | order_pipeline = Flow(start=payment_flow)
869 |
870 | # Run the entire pipeline
871 | order_pipeline.run(shared_data)
872 | ```
873 |
874 | This creates a clean separation of concerns while maintaining a clear execution path:
875 |
876 | ```mermaid
877 | flowchart LR
878 | subgraph order_pipeline[Order Pipeline]
879 | subgraph paymentFlow["Payment Flow"]
880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
881 | end
882 |
883 | subgraph inventoryFlow["Inventory Flow"]
884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
885 | end
886 |
887 | subgraph shippingFlow["Shipping Flow"]
888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
889 | end
890 |
891 | paymentFlow --> inventoryFlow
892 | inventoryFlow --> shippingFlow
893 | end
894 | ```
895 |
896 | ================================================
897 | File: docs/core_abstraction/node.md
898 | ================================================
899 | ---
900 | layout: default
901 | title: "Node"
902 | parent: "Core Abstraction"
903 | nav_order: 1
904 | ---
905 |
906 | # Node
907 |
908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
909 |
910 |
911 |

912 |
913 |
914 | 1. `prep(shared)`
915 | - **Read and preprocess data** from `shared` store.
916 | - Examples: *query DB, read files, or serialize data into a string*.
917 | - Return `prep_res`, which is used by `exec()` and `post()`.
918 |
919 | 2. `exec(prep_res)`
920 | - **Execute compute logic**, with optional retries and error handling (below).
921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*.
922 | - ⚠️ This shall be only for compute and **NOT** access `shared`.
923 | - ⚠️ If retries enabled, ensure idempotent implementation.
924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
925 | - Return `exec_res`, which is passed to `post()`.
926 |
927 | 3. `post(shared, prep_res, exec_res)`
928 | - **Postprocess and write data** back to `shared`.
929 | - Examples: *update DB, change states, log results*.
930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
931 |
932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
933 | >
934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
935 | {: .note }
936 |
937 | ### Fault Tolerance & Retries
938 |
939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
940 |
941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting).
943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
944 |
945 | ```python
946 | my_node = SummarizeFile(max_retries=3, wait=10)
947 | ```
948 |
949 | When an exception occurs in `exec()`, the Node automatically retries until:
950 |
951 | - It either succeeds, or
952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
953 |
954 | You can get the current retry times (0-based) from `self.cur_retry`.
955 |
956 | ```python
957 | class RetryNode(Node):
958 | def exec(self, prep_res):
959 | print(f"Retry {self.cur_retry} times")
960 | raise Exception("Failed")
961 | ```
962 |
963 | ### Graceful Fallback
964 |
965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
966 |
967 | ```python
968 | def exec_fallback(self, prep_res, exc):
969 | raise exc
970 | ```
971 |
972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
973 |
974 | ### Example: Summarize file
975 |
976 | ```python
977 | class SummarizeFile(Node):
978 | def prep(self, shared):
979 | return shared["data"]
980 |
981 | def exec(self, prep_res):
982 | if not prep_res:
983 | return "Empty file content"
984 | prompt = f"Summarize this text in 10 words: {prep_res}"
985 | summary = call_llm(prompt) # might fail
986 | return summary
987 |
988 | def exec_fallback(self, prep_res, exc):
989 | # Provide a simple fallback instead of crashing
990 | return "There was an error processing your request."
991 |
992 | def post(self, shared, prep_res, exec_res):
993 | shared["summary"] = exec_res
994 | # Return "default" by not returning
995 |
996 | summarize_node = SummarizeFile(max_retries=3)
997 |
998 | # node.run() calls prep->exec->post
999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 |
1002 | print("Action returned:", action_result) # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 |
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 |
1016 | # (Advanced) Parallel
1017 |
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute.
1019 |
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 |
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | >
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | >
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 |
1030 | ## AsyncParallelBatchNode
1031 |
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 |
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 | async def prep_async(self, shared):
1037 | # e.g., multiple texts
1038 | return shared["texts"]
1039 |
1040 | async def exec_async(self, text):
1041 | prompt = f"Summarize: {text}"
1042 | return await call_llm_async(prompt)
1043 |
1044 | async def post_async(self, shared, prep_res, exec_res_list):
1045 | shared["summary"] = "\n\n".join(exec_res_list)
1046 | return "default"
1047 |
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 |
1052 | ## AsyncParallelBatchFlow
1053 |
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 |
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 | async def prep_async(self, shared):
1059 | return [{"filename": f} for f in shared["files"]]
1060 |
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 |
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 |
1076 | # Agent
1077 |
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 |
1080 |
1081 |

1082 |
1083 |
1084 | ## Implement Agent with Graph
1085 |
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 |
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 |
1097 | ### ACTION SPACE
1098 | [1] search
1099 | Description: Use web search to get results
1100 | Parameters:
1101 | - query (str): What to search for
1102 |
1103 | [2] answer
1104 | Description: Conclude based on the results
1105 | Parameters:
1106 | - result (str): Final answer to provide
1107 |
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 |
1112 | ```yaml
1113 | thinking: |
1114 |
1115 | action:
1116 | parameters:
1117 | :
1118 | ```"""
1119 | ```
1120 |
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 |
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 |
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database.
1126 |
1127 | ## Example Good Action Design
1128 |
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 |
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 |
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 |
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 |
1137 | ## Example: Search Agent
1138 |
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 |
1144 | ```python
1145 | class DecideAction(Node):
1146 | def prep(self, shared):
1147 | context = shared.get("context", "No previous search")
1148 | query = shared["query"]
1149 | return query, context
1150 |
1151 | def exec(self, inputs):
1152 | query, context = inputs
1153 | prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 | resp = call_llm(prompt)
1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 | result = yaml.safe_load(yaml_str)
1166 |
1167 | assert isinstance(result, dict)
1168 | assert "action" in result
1169 | assert "reason" in result
1170 | assert result["action"] in ["search", "answer"]
1171 | if result["action"] == "search":
1172 | assert "search_term" in result
1173 |
1174 | return result
1175 |
1176 | def post(self, shared, prep_res, exec_res):
1177 | if exec_res["action"] == "search":
1178 | shared["search_term"] = exec_res["search_term"]
1179 | return exec_res["action"]
1180 |
1181 | class SearchWeb(Node):
1182 | def prep(self, shared):
1183 | return shared["search_term"]
1184 |
1185 | def exec(self, search_term):
1186 | return search_web(search_term)
1187 |
1188 | def post(self, shared, prep_res, exec_res):
1189 | prev_searches = shared.get("context", [])
1190 | shared["context"] = prev_searches + [
1191 | {"term": shared["search_term"], "result": exec_res}
1192 | ]
1193 | return "decide"
1194 |
1195 | class DirectAnswer(Node):
1196 | def prep(self, shared):
1197 | return shared["query"], shared.get("context", "")
1198 |
1199 | def exec(self, inputs):
1200 | query, context = inputs
1201 | return call_llm(f"Context: {context}\nAnswer: {query}")
1202 |
1203 | def post(self, shared, prep_res, exec_res):
1204 | print(f"Answer: {exec_res}")
1205 | shared["answer"] = exec_res
1206 |
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 |
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide # Loop back
1215 |
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 |
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 |
1230 | # Map Reduce
1231 |
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 |
1236 | and there is a logical way to break the task into smaller, ideally independent parts.
1237 |
1238 |
1239 |

1240 |
1241 |
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 |
1244 | ### Example: Document Summarization
1245 |
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 | def prep(self, shared):
1249 | files_dict = shared["files"] # e.g. 10 files
1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 |
1252 | def exec(self, one_file):
1253 | filename, file_content = one_file
1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 | return (filename, summary_text)
1256 |
1257 | def post(self, shared, prep_res, exec_res_list):
1258 | shared["file_summaries"] = dict(exec_res_list)
1259 |
1260 | class CombineSummaries(Node):
1261 | def prep(self, shared):
1262 | return shared["file_summaries"]
1263 |
1264 | def exec(self, file_summaries):
1265 | # format as: "File1: summary\nFile2: summary...\n"
1266 | text_list = []
1267 | for fname, summ in file_summaries.items():
1268 | text_list.append(f"{fname} summary:\n{summ}\n")
1269 | big_text = "\n---\n".join(text_list)
1270 |
1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 |
1273 | def post(self, shared, prep_res, final_summary):
1274 | shared["all_files_summary"] = final_summary
1275 |
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 |
1280 | flow = Flow(start=batch_node)
1281 |
1282 | shared = {
1283 | "files": {
1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 | "file2.txt": "Some other interesting text ...",
1286 | # ...
1287 | }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 |
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 |
1304 | # RAG (Retrieval Augmented Generation)
1305 |
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 |
1308 |
1309 |

1310 |
1311 |
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 |
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 |
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 |
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 | def prep(self, shared):
1326 | # A list of file paths in shared["files"]. We process each file.
1327 | return shared["files"]
1328 |
1329 | def exec(self, filepath):
1330 | # read file content. In real usage, do error handling.
1331 | with open(filepath, "r", encoding="utf-8") as f:
1332 | text = f.read()
1333 | # chunk by 100 chars each
1334 | chunks = []
1335 | size = 100
1336 | for i in range(0, len(text), size):
1337 | chunks.append(text[i : i + size])
1338 | return chunks
1339 |
1340 | def post(self, shared, prep_res, exec_res_list):
1341 | # exec_res_list is a list of chunk-lists, one per file.
1342 | # flatten them all into a single list of chunks.
1343 | all_chunks = []
1344 | for chunk_list in exec_res_list:
1345 | all_chunks.extend(chunk_list)
1346 | shared["all_chunks"] = all_chunks
1347 |
1348 | class EmbedDocs(BatchNode):
1349 | def prep(self, shared):
1350 | return shared["all_chunks"]
1351 |
1352 | def exec(self, chunk):
1353 | return get_embedding(chunk)
1354 |
1355 | def post(self, shared, prep_res, exec_res_list):
1356 | # Store the list of embeddings.
1357 | shared["all_embeds"] = exec_res_list
1358 | print(f"Total embeddings: {len(exec_res_list)}")
1359 |
1360 | class StoreIndex(Node):
1361 | def prep(self, shared):
1362 | # We'll read all embeds from shared.
1363 | return shared["all_embeds"]
1364 |
1365 | def exec(self, all_embeds):
1366 | # Create a vector index (faiss or other DB in real usage).
1367 | index = create_index(all_embeds)
1368 | return index
1369 |
1370 | def post(self, shared, prep_res, index):
1371 | shared["index"] = index
1372 |
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 |
1378 | chunk_node >> embed_node >> store_node
1379 |
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 |
1383 | Usage example:
1384 |
1385 | ```python
1386 | shared = {
1387 | "files": ["doc1.txt", "doc2.txt"], # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 |
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 |
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 |
1400 | ```python
1401 | class EmbedQuery(Node):
1402 | def prep(self, shared):
1403 | return shared["question"]
1404 |
1405 | def exec(self, question):
1406 | return get_embedding(question)
1407 |
1408 | def post(self, shared, prep_res, q_emb):
1409 | shared["q_emb"] = q_emb
1410 |
1411 | class RetrieveDocs(Node):
1412 | def prep(self, shared):
1413 | # We'll need the query embedding, plus the offline index/chunks
1414 | return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 |
1416 | def exec(self, inputs):
1417 | q_emb, index, chunks = inputs
1418 | I, D = search_index(index, q_emb, top_k=1)
1419 | best_id = I[0][0]
1420 | relevant_chunk = chunks[best_id]
1421 | return relevant_chunk
1422 |
1423 | def post(self, shared, prep_res, relevant_chunk):
1424 | shared["retrieved_chunk"] = relevant_chunk
1425 | print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 |
1427 | class GenerateAnswer(Node):
1428 | def prep(self, shared):
1429 | return shared["question"], shared["retrieved_chunk"]
1430 |
1431 | def exec(self, inputs):
1432 | question, chunk = inputs
1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 | return call_llm(prompt)
1435 |
1436 | def post(self, shared, prep_res, answer):
1437 | shared["answer"] = answer
1438 | print("Answer:", answer)
1439 |
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 |
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 |
1448 | Usage example:
1449 |
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 |
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 |
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 |
1469 | # Structured Output
1470 |
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 |
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 |
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 |
1480 | ### Example Use Cases
1481 |
1482 | - Extracting Key Information
1483 |
1484 | ```yaml
1485 | product:
1486 | name: Widget Pro
1487 | price: 199.99
1488 | description: |
1489 | A high-quality widget designed for professionals.
1490 | Recommended for advanced users.
1491 | ```
1492 |
1493 | - Summarizing Documents into Bullet Points
1494 |
1495 | ```yaml
1496 | summary:
1497 | - This product is easy to use.
1498 | - It is cost-effective.
1499 | - Suitable for all skill levels.
1500 | ```
1501 |
1502 | - Generating Configuration Files
1503 |
1504 | ```yaml
1505 | server:
1506 | host: 127.0.0.1
1507 | port: 8080
1508 | ssl: true
1509 | ```
1510 |
1511 | ## Prompt Engineering
1512 |
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 |
1517 | ### Example Text Summarization
1518 |
1519 | ```python
1520 | class SummarizeNode(Node):
1521 | def exec(self, prep_res):
1522 | # Suppose `prep_res` is the text to summarize.
1523 | prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 |
1526 | {prep_res}
1527 |
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 | - bullet 1
1532 | - bullet 2
1533 | - bullet 3
1534 | ```"""
1535 | response = call_llm(prompt)
1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 |
1538 | import yaml
1539 | structured_result = yaml.safe_load(yaml_str)
1540 |
1541 | assert "summary" in structured_result
1542 | assert isinstance(structured_result["summary"], list)
1543 |
1544 | return structured_result
1545 | ```
1546 |
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 |
1550 | ### Why YAML instead of JSON?
1551 |
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 |
1554 | **In JSON**
1555 |
1556 | ```json
1557 | {
1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 |
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 |
1565 | **In YAML**
1566 |
1567 | ```yaml
1568 | dialogue: |
1569 | Alice said: "Hello Bob.
1570 | How are you?
1571 | I am good."
1572 | ```
1573 |
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 |
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 |
1587 | # Workflow
1588 |
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 |
1591 |
1592 |

1593 |
1594 |
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | >
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 |
1601 | ### Example: Article Writing
1602 |
1603 | ```python
1604 | class GenerateOutline(Node):
1605 | def prep(self, shared): return shared["topic"]
1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 |
1609 | class WriteSection(Node):
1610 | def prep(self, shared): return shared["outline"]
1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 |
1614 | class ReviewAndRefine(Node):
1615 | def prep(self, shared): return shared["draft"]
1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 |
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 |
1624 | outline >> write >> review
1625 |
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 |
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 |
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 |
1644 | # LLM Wrappers
1645 |
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm).
1647 | Here, we provide some minimal example implementations:
1648 |
1649 | 1. OpenAI
1650 | ```python
1651 | def call_llm(prompt):
1652 | from openai import OpenAI
1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 | r = client.chat.completions.create(
1655 | model="gpt-4o",
1656 | messages=[{"role": "user", "content": prompt}]
1657 | )
1658 | return r.choices[0].message.content
1659 |
1660 | # Example usage
1661 | call_llm("How are you?")
1662 | ```
1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 | {: .best-practice }
1665 |
1666 | 2. Claude (Anthropic)
1667 | ```python
1668 | def call_llm(prompt):
1669 | from anthropic import Anthropic
1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 | r = client.messages.create(
1672 | model="claude-sonnet-4-0",
1673 | messages=[
1674 | {"role": "user", "content": prompt}
1675 | ]
1676 | )
1677 | return r.content[0].text
1678 | ```
1679 |
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 | ```python
1682 | def call_llm(prompt):
1683 | from google import genai
1684 | client = genai.Client(api_key='GEMINI_API_KEY')
1685 | response = client.models.generate_content(
1686 | model='gemini-2.5-pro',
1687 | contents=prompt
1688 | )
1689 | return response.text
1690 | ```
1691 |
1692 | 4. Azure (Azure OpenAI)
1693 | ```python
1694 | def call_llm(prompt):
1695 | from openai import AzureOpenAI
1696 | client = AzureOpenAI(
1697 | azure_endpoint="https://.openai.azure.com/",
1698 | api_key="YOUR_API_KEY_HERE",
1699 | api_version="2023-05-15"
1700 | )
1701 | r = client.chat.completions.create(
1702 | model="",
1703 | messages=[{"role": "user", "content": prompt}]
1704 | )
1705 | return r.choices[0].message.content
1706 | ```
1707 |
1708 | 5. Ollama (Local LLM)
1709 | ```python
1710 | def call_llm(prompt):
1711 | from ollama import chat
1712 | response = chat(
1713 | model="llama2",
1714 | messages=[{"role": "user", "content": prompt}]
1715 | )
1716 | return response.message.content
1717 | ```
1718 |
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 |
1722 | - Handle chat history:
1723 |
1724 | ```python
1725 | def call_llm(messages):
1726 | from openai import OpenAI
1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 | r = client.chat.completions.create(
1729 | model="gpt-4o",
1730 | messages=messages
1731 | )
1732 | return r.choices[0].message.content
1733 | ```
1734 |
1735 | - Add in-memory caching
1736 |
1737 | ```python
1738 | from functools import lru_cache
1739 |
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 | # Your implementation here
1743 | pass
1744 | ```
1745 |
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 |
1751 |
1752 | ```python
1753 | from functools import lru_cache
1754 |
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 | pass
1758 |
1759 | def call_llm(prompt, use_cache):
1760 | if use_cache:
1761 | return cached_call(prompt)
1762 | # Call the underlying function directly
1763 | return cached_call.__wrapped__(prompt)
1764 |
1765 | class SummarizeNode(Node):
1766 | def exec(self, text):
1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 |
1770 | - Enable logging:
1771 |
1772 | ```python
1773 | def call_llm(prompt):
1774 | import logging
1775 | logging.info(f"Prompt: {prompt}")
1776 | response = ... # Your implementation here
1777 | logging.info(f"Response: {response}")
1778 | return response
1779 | ```
--------------------------------------------------------------------------------
/.cursorrules:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: "Agentic Coding"
4 | ---
5 |
6 | # Agentic Coding: Humans Design, Agents code!
7 |
8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
9 | {: .warning }
10 |
11 | ## Agentic Coding Steps
12 |
13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
14 |
15 | | Steps | Human | AI | Comment |
16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. |
18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. |
19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. |
21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. |
22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. |
23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. |
25 |
26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit.
27 | - Understand AI systems' strengths and limitations:
28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
33 |
34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
36 | - For each node in the flow, start with a high-level one-line description of what it does.
37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
40 | - Outline the flow and draw it in a mermaid diagram. For example:
41 | ```mermaid
42 | flowchart LR
43 | start[Start] --> batch[Batch]
44 | batch --> check[Check]
45 | check -->|OK| process
46 | check -->|Error| fix[Fix]
47 | fix --> check
48 |
49 | subgraph process[Process]
50 | step1[Step 1] --> step2[Step 2]
51 | end
52 |
53 | process --> endNode[End]
54 | ```
55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.
56 | {: .best-practice }
57 |
58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
60 |
61 |
62 | - Reading inputs (e.g., retrieving Slack messages, reading emails)
63 | - Writing outputs (e.g., generating reports, sending emails)
64 | - Using external tools (e.g., calling LLMs, searching the web)
65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
66 | - For each utility function, implement it and write a simple test.
67 | - Document their input/output, as well as why they are necessary. For example:
68 | - `name`: `get_embedding` (`utils/get_embedding.py`)
69 | - `input`: `str`
70 | - `output`: a vector of 3072 floats
71 | - `necessity`: Used by the second node to embed text
72 | - Example utility implementation:
73 | ```python
74 | # utils/call_llm.py
75 | from openai import OpenAI
76 |
77 | def call_llm(prompt):
78 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
79 | r = client.chat.completions.create(
80 | model="gpt-4o",
81 | messages=[{"role": "user", "content": prompt}]
82 | )
83 | return r.choices[0].message.content
84 |
85 | if __name__ == "__main__":
86 | prompt = "What is the meaning of life?"
87 | print(call_llm(prompt))
88 | ```
89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
90 | {: .best-practice }
91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
92 | {: .warning }
93 |
94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
96 | - For simple systems, use an in-memory dictionary.
97 | - For more complex systems or when persistence is required, use a database.
98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
99 | - Example shared store design:
100 | ```python
101 | shared = {
102 | "user": {
103 | "id": "user123",
104 | "context": { # Another nested dict
105 | "weather": {"temp": 72, "condition": "sunny"},
106 | "location": "San Francisco"
107 | }
108 | },
109 | "results": {} # Empty dict to store outputs
110 | }
111 | ```
112 |
113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
115 | - `type`: Regular (or Batch, or Async)
116 | - `prep`: Read "text" from the shared store
117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
118 | - `post`: Write "embedding" to the shared store
119 |
120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
124 | - Add logging throughout the code to facilitate debugging.
125 |
126 | 7. **Optimization**:
127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
129 | - If your flow design is already solid, move on to micro-optimizations:
130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
132 |
133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
134 | >
135 | >
136 | {: .best-practice }
137 |
138 | 8. **Reliability**
139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
142 |
143 | ## Example LLM Project File Structure
144 |
145 | ```
146 | my_project/
147 | ├── main.py
148 | ├── nodes.py
149 | ├── flow.py
150 | ├── utils/
151 | │ ├── __init__.py
152 | │ ├── call_llm.py
153 | │ └── search_web.py
154 | ├── requirements.txt
155 | └── docs/
156 | └── design.md
157 | ```
158 |
159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
160 | ```
161 | PyYAML
162 | pocketflow
163 | ```
164 |
165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
166 | ~~~
167 | # Design Doc: Your Project Name
168 |
169 | > Please DON'T remove notes for AI
170 |
171 | ## Requirements
172 |
173 | > Notes for AI: Keep it simple and clear.
174 | > If the requirements are abstract, write concrete user stories
175 |
176 |
177 | ## Flow Design
178 |
179 | > Notes for AI:
180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
181 | > 2. Present a concise, high-level description of the workflow.
182 |
183 | ### Applicable Design Pattern:
184 |
185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary.
186 | 2. Agentic file finder
187 | - *Context*: The entire summary of the file
188 | - *Action*: Find the file
189 |
190 | ### Flow high-level Design:
191 |
192 | 1. **First Node**: This node is for ...
193 | 2. **Second Node**: This node is for ...
194 | 3. **Third Node**: This node is for ...
195 |
196 | ```mermaid
197 | flowchart TD
198 | firstNode[First Node] --> secondNode[Second Node]
199 | secondNode --> thirdNode[Third Node]
200 | ```
201 | ## Utility Functions
202 |
203 | > Notes for AI:
204 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
205 | > 2. Include only the necessary utility functions, based on nodes in the flow.
206 |
207 | 1. **Call LLM** (`utils/call_llm.py`)
208 | - *Input*: prompt (str)
209 | - *Output*: response (str)
210 | - Generally used by most nodes for LLM tasks
211 |
212 | 2. **Embedding** (`utils/get_embedding.py`)
213 | - *Input*: str
214 | - *Output*: a vector of 3072 floats
215 | - Used by the second node to embed text
216 |
217 | ## Node Design
218 |
219 | ### Shared Store
220 |
221 | > Notes for AI: Try to minimize data redundancy
222 |
223 | The shared store structure is organized as follows:
224 |
225 | ```python
226 | shared = {
227 | "key": "value"
228 | }
229 | ```
230 |
231 | ### Node Steps
232 |
233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
234 |
235 | 1. First Node
236 | - *Purpose*: Provide a short explanation of the node’s function
237 | - *Type*: Decide between Regular, Batch, or Async
238 | - *Steps*:
239 | - *prep*: Read "key" from the shared store
240 | - *exec*: Call the utility function
241 | - *post*: Write "key" to the shared store
242 |
243 | 2. Second Node
244 | ...
245 | ~~~
246 |
247 |
248 | - **`utils/`**: Contains all utility functions.
249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
250 | - Each file should also include a `main()` function to try that API call
251 | ```python
252 | from google import genai
253 | import os
254 |
255 | def call_llm(prompt: str) -> str:
256 | client = genai.Client(
257 | api_key=os.getenv("GEMINI_API_KEY", ""),
258 | )
259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
260 | response = client.models.generate_content(model=model, contents=[prompt])
261 | return response.text
262 |
263 | if __name__ == "__main__":
264 | test_prompt = "Hello, how are you?"
265 |
266 | # First call - should hit the API
267 | print("Making call...")
268 | response1 = call_llm(test_prompt, use_cache=False)
269 | print(f"Response: {response1}")
270 | ```
271 |
272 | - **`nodes.py`**: Contains all the node definitions.
273 | ```python
274 | # nodes.py
275 | from pocketflow import Node
276 | from utils.call_llm import call_llm
277 |
278 | class GetQuestionNode(Node):
279 | def exec(self, _):
280 | # Get question directly from user input
281 | user_question = input("Enter your question: ")
282 | return user_question
283 |
284 | def post(self, shared, prep_res, exec_res):
285 | # Store the user's question
286 | shared["question"] = exec_res
287 | return "default" # Go to the next node
288 |
289 | class AnswerNode(Node):
290 | def prep(self, shared):
291 | # Read question from shared
292 | return shared["question"]
293 |
294 | def exec(self, question):
295 | # Call LLM to get the answer
296 | return call_llm(question)
297 |
298 | def post(self, shared, prep_res, exec_res):
299 | # Store the answer in shared
300 | shared["answer"] = exec_res
301 | ```
302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
303 | ```python
304 | # flow.py
305 | from pocketflow import Flow
306 | from nodes import GetQuestionNode, AnswerNode
307 |
308 | def create_qa_flow():
309 | """Create and return a question-answering flow."""
310 | # Create nodes
311 | get_question_node = GetQuestionNode()
312 | answer_node = AnswerNode()
313 |
314 | # Connect nodes in sequence
315 | get_question_node >> answer_node
316 |
317 | # Create flow starting with input node
318 | return Flow(start=get_question_node)
319 | ```
320 | - **`main.py`**: Serves as the project's entry point.
321 | ```python
322 | # main.py
323 | from flow import create_qa_flow
324 |
325 | # Example main function
326 | # Please replace this with your own main function
327 | def main():
328 | shared = {
329 | "question": None, # Will be populated by GetQuestionNode from user input
330 | "answer": None # Will be populated by AnswerNode
331 | }
332 |
333 | # Create the flow and run it
334 | qa_flow = create_qa_flow()
335 | qa_flow.run(shared)
336 | print(f"Question: {shared['question']}")
337 | print(f"Answer: {shared['answer']}")
338 |
339 | if __name__ == "__main__":
340 | main()
341 | ```
342 |
343 | ================================================
344 | File: docs/index.md
345 | ================================================
346 | ---
347 | layout: default
348 | title: "Home"
349 | nav_order: 1
350 | ---
351 |
352 | # Pocket Flow
353 |
354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
355 |
356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.
358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
359 |
360 |
361 |

362 |
363 |
364 | ## Core Abstraction
365 |
366 | We model the LLM workflow as a **Graph + Shared Store**:
367 |
368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
374 |
375 |
376 |

377 |
378 |
379 | ## Design Pattern
380 |
381 | From there, it’s easy to implement popular design patterns:
382 |
383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
389 |
390 |
391 |

392 |
393 |
394 | ## Utility Function
395 |
396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
397 |
398 | - [LLM Wrapper](./utility_function/llm.md)
399 | - [Viz and Debug](./utility_function/viz.md)
400 | - [Web Search](./utility_function/websearch.md)
401 | - [Chunking](./utility_function/chunking.md)
402 | - [Embedding](./utility_function/embedding.md)
403 | - [Vector Databases](./utility_function/vector.md)
404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
405 |
406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
410 |
411 | ## Ready to build your Apps?
412 |
413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
414 |
415 | ================================================
416 | File: docs/core_abstraction/async.md
417 | ================================================
418 | ---
419 | layout: default
420 | title: "(Advanced) Async"
421 | parent: "Core Abstraction"
422 | nav_order: 5
423 | ---
424 |
425 | # (Advanced) Async
426 |
427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
428 |
429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
430 | 2. **exec_async()**: Typically used for async LLM calls.
431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
432 |
433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
434 |
435 | ### Example
436 |
437 | ```python
438 | class SummarizeThenVerify(AsyncNode):
439 | async def prep_async(self, shared):
440 | # Example: read a file asynchronously
441 | doc_text = await read_file_async(shared["doc_path"])
442 | return doc_text
443 |
444 | async def exec_async(self, prep_res):
445 | # Example: async LLM call
446 | summary = await call_llm_async(f"Summarize: {prep_res}")
447 | return summary
448 |
449 | async def post_async(self, shared, prep_res, exec_res):
450 | # Example: wait for user feedback
451 | decision = await gather_user_feedback(exec_res)
452 | if decision == "approve":
453 | shared["summary"] = exec_res
454 | return "approve"
455 | return "deny"
456 |
457 | summarize_node = SummarizeThenVerify()
458 | final_node = Finalize()
459 |
460 | # Define transitions
461 | summarize_node - "approve" >> final_node
462 | summarize_node - "deny" >> summarize_node # retry
463 |
464 | flow = AsyncFlow(start=summarize_node)
465 |
466 | async def main():
467 | shared = {"doc_path": "document.txt"}
468 | await flow.run_async(shared)
469 | print("Final Summary:", shared.get("summary"))
470 |
471 | asyncio.run(main())
472 | ```
473 |
474 | ================================================
475 | File: docs/core_abstraction/batch.md
476 | ================================================
477 | ---
478 | layout: default
479 | title: "Batch"
480 | parent: "Core Abstraction"
481 | nav_order: 4
482 | ---
483 |
484 | # Batch
485 |
486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
487 | - **Chunk-based** processing (e.g., splitting large texts).
488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
489 |
490 | ## 1. BatchNode
491 |
492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
493 |
494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
495 | - **`exec(item)`**: called **once** per item in that iterable.
496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
497 |
498 |
499 | ### Example: Summarize a Large File
500 |
501 | ```python
502 | class MapSummaries(BatchNode):
503 | def prep(self, shared):
504 | # Suppose we have a big file; chunk it
505 | content = shared["data"]
506 | chunk_size = 10000
507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
508 | return chunks
509 |
510 | def exec(self, chunk):
511 | prompt = f"Summarize this chunk in 10 words: {chunk}"
512 | summary = call_llm(prompt)
513 | return summary
514 |
515 | def post(self, shared, prep_res, exec_res_list):
516 | combined = "\n".join(exec_res_list)
517 | shared["summary"] = combined
518 | return "default"
519 |
520 | map_summaries = MapSummaries()
521 | flow = Flow(start=map_summaries)
522 | flow.run(shared)
523 | ```
524 |
525 | ---
526 |
527 | ## 2. BatchFlow
528 |
529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
530 |
531 | ### Example: Summarize Many Files
532 |
533 | ```python
534 | class SummarizeAllFiles(BatchFlow):
535 | def prep(self, shared):
536 | # Return a list of param dicts (one per file)
537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
538 | return [{"filename": fn} for fn in filenames]
539 |
540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
541 | summarize_file = SummarizeFile(start=load_file)
542 |
543 | # Wrap that flow into a BatchFlow:
544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
545 | summarize_all_files.run(shared)
546 | ```
547 |
548 | ### Under the Hood
549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
550 | 2. The **BatchFlow** loops through each dict. For each one:
551 | - It merges the dict with the BatchFlow’s own `params`.
552 | - It calls `flow.run(shared)` using the merged result.
553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
554 |
555 | ---
556 |
557 | ## 3. Nested or Multi-Level Batches
558 |
559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
561 | - **Inner** batch: returning a list of per-file param dicts.
562 |
563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
564 |
565 | ```python
566 |
567 | class FileBatchFlow(BatchFlow):
568 | def prep(self, shared):
569 | directory = self.params["directory"]
570 | # e.g., files = ["file1.txt", "file2.txt", ...]
571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")]
572 | return [{"filename": f} for f in files]
573 |
574 | class DirectoryBatchFlow(BatchFlow):
575 | def prep(self, shared):
576 | directories = [ "/path/to/dirA", "/path/to/dirB"]
577 | return [{"directory": d} for d in directories]
578 |
579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
580 | inner_flow = FileBatchFlow(start=MapSummaries())
581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
582 | ```
583 |
584 | ================================================
585 | File: docs/core_abstraction/communication.md
586 | ================================================
587 | ---
588 | layout: default
589 | title: "Communication"
590 | parent: "Core Abstraction"
591 | nav_order: 3
592 | ---
593 |
594 | # Communication
595 |
596 | Nodes and Flows **communicate** in 2 ways:
597 |
598 | 1. **Shared Store (for almost all the cases)**
599 |
600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).
601 | - Great for data results, large content, or anything multiple nodes need.
602 | - You shall design the data structure and populate it ahead.
603 |
604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
605 | {: .best-practice }
606 |
607 | 2. **Params (only for [Batch](./batch.md))**
608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
609 | - Good for identifiers like filenames or numeric IDs, in Batch mode.
610 |
611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
612 |
613 | ---
614 |
615 | ## 1. Shared Store
616 |
617 | ### Overview
618 |
619 | A shared store is typically an in-mem dictionary, like:
620 | ```python
621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
622 | ```
623 |
624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
625 |
626 | ### Example
627 |
628 | ```python
629 | class LoadData(Node):
630 | def post(self, shared, prep_res, exec_res):
631 | # We write data to shared store
632 | shared["data"] = "Some text content"
633 | return None
634 |
635 | class Summarize(Node):
636 | def prep(self, shared):
637 | # We read data from shared store
638 | return shared["data"]
639 |
640 | def exec(self, prep_res):
641 | # Call LLM to summarize
642 | prompt = f"Summarize: {prep_res}"
643 | summary = call_llm(prompt)
644 | return summary
645 |
646 | def post(self, shared, prep_res, exec_res):
647 | # We write summary to shared store
648 | shared["summary"] = exec_res
649 | return "default"
650 |
651 | load_data = LoadData()
652 | summarize = Summarize()
653 | load_data >> summarize
654 | flow = Flow(start=load_data)
655 |
656 | shared = {}
657 | flow.run(shared)
658 | ```
659 |
660 | Here:
661 | - `LoadData` writes to `shared["data"]`.
662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
663 |
664 | ---
665 |
666 | ## 2. Params
667 |
668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
670 | - **Set** via `set_params()`.
671 | - **Cleared** and updated each time a parent Flow calls it.
672 |
673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow.
674 | >
675 | > If you need to set child node params, see [Batch](./batch.md).
676 | {: .warning }
677 |
678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
679 |
680 | ### Example
681 |
682 | ```python
683 | # 1) Create a Node that uses params
684 | class SummarizeFile(Node):
685 | def prep(self, shared):
686 | # Access the node's param
687 | filename = self.params["filename"]
688 | return shared["data"].get(filename, "")
689 |
690 | def exec(self, prep_res):
691 | prompt = f"Summarize: {prep_res}"
692 | return call_llm(prompt)
693 |
694 | def post(self, shared, prep_res, exec_res):
695 | filename = self.params["filename"]
696 | shared["summary"][filename] = exec_res
697 | return "default"
698 |
699 | # 2) Set params
700 | node = SummarizeFile()
701 |
702 | # 3) Set Node params directly (for testing)
703 | node.set_params({"filename": "doc1.txt"})
704 | node.run(shared)
705 |
706 | # 4) Create Flow
707 | flow = Flow(start=node)
708 |
709 | # 5) Set Flow params (overwrites node params)
710 | flow.set_params({"filename": "doc2.txt"})
711 | flow.run(shared) # The node summarizes doc2, not doc1
712 | ```
713 |
714 | ================================================
715 | File: docs/core_abstraction/flow.md
716 | ================================================
717 | ---
718 | layout: default
719 | title: "Flow"
720 | parent: "Core Abstraction"
721 | nav_order: 2
722 | ---
723 |
724 | # Flow
725 |
726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
727 |
728 | ## 1. Action-based Transitions
729 |
730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
731 |
732 | You define transitions with the syntax:
733 |
734 | 1. **Basic default transition**: `node_a >> node_b`
735 | This means if `node_a.post()` returns `"default"`, go to `node_b`.
736 | (Equivalent to `node_a - "default" >> node_b`)
737 |
738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
740 |
741 | It's possible to create loops, branching, or multi-step flows.
742 |
743 | ## 2. Creating a Flow
744 |
745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
746 |
747 | ### Example: Simple Sequence
748 |
749 | Here's a minimal flow of two nodes in a chain:
750 |
751 | ```python
752 | node_a >> node_b
753 | flow = Flow(start=node_a)
754 | flow.run(shared)
755 | ```
756 |
757 | - When you run the flow, it executes `node_a`.
758 | - Suppose `node_a.post()` returns `"default"`.
759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.
760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
761 |
762 | ### Example: Branching & Looping
763 |
764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
765 |
766 | - `"approved"`: expense is approved, move to payment processing
767 | - `"needs_revision"`: expense needs changes, send back for revision
768 | - `"rejected"`: expense is denied, finish the process
769 |
770 | We can wire them like this:
771 |
772 | ```python
773 | # Define the flow connections
774 | review - "approved" >> payment # If approved, process payment
775 | review - "needs_revision" >> revise # If needs changes, go to revision
776 | review - "rejected" >> finish # If rejected, finish the process
777 |
778 | revise >> review # After revision, go back for another review
779 | payment >> finish # After payment, finish the process
780 |
781 | flow = Flow(start=review)
782 | ```
783 |
784 | Let's see how it flows:
785 |
786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
789 |
790 | ```mermaid
791 | flowchart TD
792 | review[Review Expense] -->|approved| payment[Process Payment]
793 | review -->|needs_revision| revise[Revise Report]
794 | review -->|rejected| finish[Finish Process]
795 |
796 | revise --> review
797 | payment --> finish
798 | ```
799 |
800 | ### Running Individual Nodes vs. Running a Flow
801 |
802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action.
803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
804 |
805 | > `node.run(shared)` **does not** proceed to the successor.
806 | > This is mainly for debugging or testing a single node.
807 | >
808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
809 | {: .warning }
810 |
811 | ## 3. Nested Flows
812 |
813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
814 |
815 | 1. Use a Flow as a Node within another Flow's transitions.
816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.
817 | 3. Node `params` will be a merging of **all** parents' `params`.
818 |
819 | ### Flow's Node Methods
820 |
821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
822 |
823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
825 |
826 | ### Basic Flow Nesting
827 |
828 | Here's how to connect a flow to another node:
829 |
830 | ```python
831 | # Create a sub-flow
832 | node_a >> node_b
833 | subflow = Flow(start=node_a)
834 |
835 | # Connect it to another node
836 | subflow >> node_c
837 |
838 | # Create the parent flow
839 | parent_flow = Flow(start=subflow)
840 | ```
841 |
842 | When `parent_flow.run()` executes:
843 | 1. It starts `subflow`
844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
845 | 3. After `subflow` completes, execution continues to `node_c`
846 |
847 | ### Example: Order Processing Pipeline
848 |
849 | Here's a practical example that breaks down order processing into nested flows:
850 |
851 | ```python
852 | # Payment processing sub-flow
853 | validate_payment >> process_payment >> payment_confirmation
854 | payment_flow = Flow(start=validate_payment)
855 |
856 | # Inventory sub-flow
857 | check_stock >> reserve_items >> update_inventory
858 | inventory_flow = Flow(start=check_stock)
859 |
860 | # Shipping sub-flow
861 | create_label >> assign_carrier >> schedule_pickup
862 | shipping_flow = Flow(start=create_label)
863 |
864 | # Connect the flows into a main order pipeline
865 | payment_flow >> inventory_flow >> shipping_flow
866 |
867 | # Create the master flow
868 | order_pipeline = Flow(start=payment_flow)
869 |
870 | # Run the entire pipeline
871 | order_pipeline.run(shared_data)
872 | ```
873 |
874 | This creates a clean separation of concerns while maintaining a clear execution path:
875 |
876 | ```mermaid
877 | flowchart LR
878 | subgraph order_pipeline[Order Pipeline]
879 | subgraph paymentFlow["Payment Flow"]
880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
881 | end
882 |
883 | subgraph inventoryFlow["Inventory Flow"]
884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
885 | end
886 |
887 | subgraph shippingFlow["Shipping Flow"]
888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
889 | end
890 |
891 | paymentFlow --> inventoryFlow
892 | inventoryFlow --> shippingFlow
893 | end
894 | ```
895 |
896 | ================================================
897 | File: docs/core_abstraction/node.md
898 | ================================================
899 | ---
900 | layout: default
901 | title: "Node"
902 | parent: "Core Abstraction"
903 | nav_order: 1
904 | ---
905 |
906 | # Node
907 |
908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
909 |
910 |
911 |

912 |
913 |
914 | 1. `prep(shared)`
915 | - **Read and preprocess data** from `shared` store.
916 | - Examples: *query DB, read files, or serialize data into a string*.
917 | - Return `prep_res`, which is used by `exec()` and `post()`.
918 |
919 | 2. `exec(prep_res)`
920 | - **Execute compute logic**, with optional retries and error handling (below).
921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*.
922 | - ⚠️ This shall be only for compute and **NOT** access `shared`.
923 | - ⚠️ If retries enabled, ensure idempotent implementation.
924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
925 | - Return `exec_res`, which is passed to `post()`.
926 |
927 | 3. `post(shared, prep_res, exec_res)`
928 | - **Postprocess and write data** back to `shared`.
929 | - Examples: *update DB, change states, log results*.
930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
931 |
932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
933 | >
934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
935 | {: .note }
936 |
937 | ### Fault Tolerance & Retries
938 |
939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
940 |
941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting).
943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
944 |
945 | ```python
946 | my_node = SummarizeFile(max_retries=3, wait=10)
947 | ```
948 |
949 | When an exception occurs in `exec()`, the Node automatically retries until:
950 |
951 | - It either succeeds, or
952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
953 |
954 | You can get the current retry times (0-based) from `self.cur_retry`.
955 |
956 | ```python
957 | class RetryNode(Node):
958 | def exec(self, prep_res):
959 | print(f"Retry {self.cur_retry} times")
960 | raise Exception("Failed")
961 | ```
962 |
963 | ### Graceful Fallback
964 |
965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
966 |
967 | ```python
968 | def exec_fallback(self, prep_res, exc):
969 | raise exc
970 | ```
971 |
972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
973 |
974 | ### Example: Summarize file
975 |
976 | ```python
977 | class SummarizeFile(Node):
978 | def prep(self, shared):
979 | return shared["data"]
980 |
981 | def exec(self, prep_res):
982 | if not prep_res:
983 | return "Empty file content"
984 | prompt = f"Summarize this text in 10 words: {prep_res}"
985 | summary = call_llm(prompt) # might fail
986 | return summary
987 |
988 | def exec_fallback(self, prep_res, exc):
989 | # Provide a simple fallback instead of crashing
990 | return "There was an error processing your request."
991 |
992 | def post(self, shared, prep_res, exec_res):
993 | shared["summary"] = exec_res
994 | # Return "default" by not returning
995 |
996 | summarize_node = SummarizeFile(max_retries=3)
997 |
998 | # node.run() calls prep->exec->post
999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 |
1002 | print("Action returned:", action_result) # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 |
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 |
1016 | # (Advanced) Parallel
1017 |
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute.
1019 |
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 |
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | >
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | >
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 |
1030 | ## AsyncParallelBatchNode
1031 |
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 |
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 | async def prep_async(self, shared):
1037 | # e.g., multiple texts
1038 | return shared["texts"]
1039 |
1040 | async def exec_async(self, text):
1041 | prompt = f"Summarize: {text}"
1042 | return await call_llm_async(prompt)
1043 |
1044 | async def post_async(self, shared, prep_res, exec_res_list):
1045 | shared["summary"] = "\n\n".join(exec_res_list)
1046 | return "default"
1047 |
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 |
1052 | ## AsyncParallelBatchFlow
1053 |
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 |
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 | async def prep_async(self, shared):
1059 | return [{"filename": f} for f in shared["files"]]
1060 |
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 |
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 |
1076 | # Agent
1077 |
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 |
1080 |
1081 |

1082 |
1083 |
1084 | ## Implement Agent with Graph
1085 |
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 |
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 |
1097 | ### ACTION SPACE
1098 | [1] search
1099 | Description: Use web search to get results
1100 | Parameters:
1101 | - query (str): What to search for
1102 |
1103 | [2] answer
1104 | Description: Conclude based on the results
1105 | Parameters:
1106 | - result (str): Final answer to provide
1107 |
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 |
1112 | ```yaml
1113 | thinking: |
1114 |
1115 | action:
1116 | parameters:
1117 | :
1118 | ```"""
1119 | ```
1120 |
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 |
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 |
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database.
1126 |
1127 | ## Example Good Action Design
1128 |
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 |
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 |
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 |
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 |
1137 | ## Example: Search Agent
1138 |
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 |
1144 | ```python
1145 | class DecideAction(Node):
1146 | def prep(self, shared):
1147 | context = shared.get("context", "No previous search")
1148 | query = shared["query"]
1149 | return query, context
1150 |
1151 | def exec(self, inputs):
1152 | query, context = inputs
1153 | prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 | resp = call_llm(prompt)
1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 | result = yaml.safe_load(yaml_str)
1166 |
1167 | assert isinstance(result, dict)
1168 | assert "action" in result
1169 | assert "reason" in result
1170 | assert result["action"] in ["search", "answer"]
1171 | if result["action"] == "search":
1172 | assert "search_term" in result
1173 |
1174 | return result
1175 |
1176 | def post(self, shared, prep_res, exec_res):
1177 | if exec_res["action"] == "search":
1178 | shared["search_term"] = exec_res["search_term"]
1179 | return exec_res["action"]
1180 |
1181 | class SearchWeb(Node):
1182 | def prep(self, shared):
1183 | return shared["search_term"]
1184 |
1185 | def exec(self, search_term):
1186 | return search_web(search_term)
1187 |
1188 | def post(self, shared, prep_res, exec_res):
1189 | prev_searches = shared.get("context", [])
1190 | shared["context"] = prev_searches + [
1191 | {"term": shared["search_term"], "result": exec_res}
1192 | ]
1193 | return "decide"
1194 |
1195 | class DirectAnswer(Node):
1196 | def prep(self, shared):
1197 | return shared["query"], shared.get("context", "")
1198 |
1199 | def exec(self, inputs):
1200 | query, context = inputs
1201 | return call_llm(f"Context: {context}\nAnswer: {query}")
1202 |
1203 | def post(self, shared, prep_res, exec_res):
1204 | print(f"Answer: {exec_res}")
1205 | shared["answer"] = exec_res
1206 |
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 |
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide # Loop back
1215 |
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 |
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 |
1230 | # Map Reduce
1231 |
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 |
1236 | and there is a logical way to break the task into smaller, ideally independent parts.
1237 |
1238 |
1239 |

1240 |
1241 |
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 |
1244 | ### Example: Document Summarization
1245 |
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 | def prep(self, shared):
1249 | files_dict = shared["files"] # e.g. 10 files
1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 |
1252 | def exec(self, one_file):
1253 | filename, file_content = one_file
1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 | return (filename, summary_text)
1256 |
1257 | def post(self, shared, prep_res, exec_res_list):
1258 | shared["file_summaries"] = dict(exec_res_list)
1259 |
1260 | class CombineSummaries(Node):
1261 | def prep(self, shared):
1262 | return shared["file_summaries"]
1263 |
1264 | def exec(self, file_summaries):
1265 | # format as: "File1: summary\nFile2: summary...\n"
1266 | text_list = []
1267 | for fname, summ in file_summaries.items():
1268 | text_list.append(f"{fname} summary:\n{summ}\n")
1269 | big_text = "\n---\n".join(text_list)
1270 |
1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 |
1273 | def post(self, shared, prep_res, final_summary):
1274 | shared["all_files_summary"] = final_summary
1275 |
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 |
1280 | flow = Flow(start=batch_node)
1281 |
1282 | shared = {
1283 | "files": {
1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 | "file2.txt": "Some other interesting text ...",
1286 | # ...
1287 | }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 |
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 |
1304 | # RAG (Retrieval Augmented Generation)
1305 |
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 |
1308 |
1309 |

1310 |
1311 |
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 |
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 |
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 |
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 | def prep(self, shared):
1326 | # A list of file paths in shared["files"]. We process each file.
1327 | return shared["files"]
1328 |
1329 | def exec(self, filepath):
1330 | # read file content. In real usage, do error handling.
1331 | with open(filepath, "r", encoding="utf-8") as f:
1332 | text = f.read()
1333 | # chunk by 100 chars each
1334 | chunks = []
1335 | size = 100
1336 | for i in range(0, len(text), size):
1337 | chunks.append(text[i : i + size])
1338 | return chunks
1339 |
1340 | def post(self, shared, prep_res, exec_res_list):
1341 | # exec_res_list is a list of chunk-lists, one per file.
1342 | # flatten them all into a single list of chunks.
1343 | all_chunks = []
1344 | for chunk_list in exec_res_list:
1345 | all_chunks.extend(chunk_list)
1346 | shared["all_chunks"] = all_chunks
1347 |
1348 | class EmbedDocs(BatchNode):
1349 | def prep(self, shared):
1350 | return shared["all_chunks"]
1351 |
1352 | def exec(self, chunk):
1353 | return get_embedding(chunk)
1354 |
1355 | def post(self, shared, prep_res, exec_res_list):
1356 | # Store the list of embeddings.
1357 | shared["all_embeds"] = exec_res_list
1358 | print(f"Total embeddings: {len(exec_res_list)}")
1359 |
1360 | class StoreIndex(Node):
1361 | def prep(self, shared):
1362 | # We'll read all embeds from shared.
1363 | return shared["all_embeds"]
1364 |
1365 | def exec(self, all_embeds):
1366 | # Create a vector index (faiss or other DB in real usage).
1367 | index = create_index(all_embeds)
1368 | return index
1369 |
1370 | def post(self, shared, prep_res, index):
1371 | shared["index"] = index
1372 |
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 |
1378 | chunk_node >> embed_node >> store_node
1379 |
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 |
1383 | Usage example:
1384 |
1385 | ```python
1386 | shared = {
1387 | "files": ["doc1.txt", "doc2.txt"], # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 |
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 |
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 |
1400 | ```python
1401 | class EmbedQuery(Node):
1402 | def prep(self, shared):
1403 | return shared["question"]
1404 |
1405 | def exec(self, question):
1406 | return get_embedding(question)
1407 |
1408 | def post(self, shared, prep_res, q_emb):
1409 | shared["q_emb"] = q_emb
1410 |
1411 | class RetrieveDocs(Node):
1412 | def prep(self, shared):
1413 | # We'll need the query embedding, plus the offline index/chunks
1414 | return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 |
1416 | def exec(self, inputs):
1417 | q_emb, index, chunks = inputs
1418 | I, D = search_index(index, q_emb, top_k=1)
1419 | best_id = I[0][0]
1420 | relevant_chunk = chunks[best_id]
1421 | return relevant_chunk
1422 |
1423 | def post(self, shared, prep_res, relevant_chunk):
1424 | shared["retrieved_chunk"] = relevant_chunk
1425 | print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 |
1427 | class GenerateAnswer(Node):
1428 | def prep(self, shared):
1429 | return shared["question"], shared["retrieved_chunk"]
1430 |
1431 | def exec(self, inputs):
1432 | question, chunk = inputs
1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 | return call_llm(prompt)
1435 |
1436 | def post(self, shared, prep_res, answer):
1437 | shared["answer"] = answer
1438 | print("Answer:", answer)
1439 |
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 |
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 |
1448 | Usage example:
1449 |
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 |
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 |
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 |
1469 | # Structured Output
1470 |
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 |
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 |
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 |
1480 | ### Example Use Cases
1481 |
1482 | - Extracting Key Information
1483 |
1484 | ```yaml
1485 | product:
1486 | name: Widget Pro
1487 | price: 199.99
1488 | description: |
1489 | A high-quality widget designed for professionals.
1490 | Recommended for advanced users.
1491 | ```
1492 |
1493 | - Summarizing Documents into Bullet Points
1494 |
1495 | ```yaml
1496 | summary:
1497 | - This product is easy to use.
1498 | - It is cost-effective.
1499 | - Suitable for all skill levels.
1500 | ```
1501 |
1502 | - Generating Configuration Files
1503 |
1504 | ```yaml
1505 | server:
1506 | host: 127.0.0.1
1507 | port: 8080
1508 | ssl: true
1509 | ```
1510 |
1511 | ## Prompt Engineering
1512 |
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 |
1517 | ### Example Text Summarization
1518 |
1519 | ```python
1520 | class SummarizeNode(Node):
1521 | def exec(self, prep_res):
1522 | # Suppose `prep_res` is the text to summarize.
1523 | prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 |
1526 | {prep_res}
1527 |
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 | - bullet 1
1532 | - bullet 2
1533 | - bullet 3
1534 | ```"""
1535 | response = call_llm(prompt)
1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 |
1538 | import yaml
1539 | structured_result = yaml.safe_load(yaml_str)
1540 |
1541 | assert "summary" in structured_result
1542 | assert isinstance(structured_result["summary"], list)
1543 |
1544 | return structured_result
1545 | ```
1546 |
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 |
1550 | ### Why YAML instead of JSON?
1551 |
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 |
1554 | **In JSON**
1555 |
1556 | ```json
1557 | {
1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 |
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 |
1565 | **In YAML**
1566 |
1567 | ```yaml
1568 | dialogue: |
1569 | Alice said: "Hello Bob.
1570 | How are you?
1571 | I am good."
1572 | ```
1573 |
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 |
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 |
1587 | # Workflow
1588 |
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 |
1591 |
1592 |

1593 |
1594 |
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | >
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 |
1601 | ### Example: Article Writing
1602 |
1603 | ```python
1604 | class GenerateOutline(Node):
1605 | def prep(self, shared): return shared["topic"]
1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 |
1609 | class WriteSection(Node):
1610 | def prep(self, shared): return shared["outline"]
1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 |
1614 | class ReviewAndRefine(Node):
1615 | def prep(self, shared): return shared["draft"]
1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 |
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 |
1624 | outline >> write >> review
1625 |
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 |
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 |
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 |
1644 | # LLM Wrappers
1645 |
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm).
1647 | Here, we provide some minimal example implementations:
1648 |
1649 | 1. OpenAI
1650 | ```python
1651 | def call_llm(prompt):
1652 | from openai import OpenAI
1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 | r = client.chat.completions.create(
1655 | model="gpt-4o",
1656 | messages=[{"role": "user", "content": prompt}]
1657 | )
1658 | return r.choices[0].message.content
1659 |
1660 | # Example usage
1661 | call_llm("How are you?")
1662 | ```
1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 | {: .best-practice }
1665 |
1666 | 2. Claude (Anthropic)
1667 | ```python
1668 | def call_llm(prompt):
1669 | from anthropic import Anthropic
1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 | r = client.messages.create(
1672 | model="claude-sonnet-4-0",
1673 | messages=[
1674 | {"role": "user", "content": prompt}
1675 | ]
1676 | )
1677 | return r.content[0].text
1678 | ```
1679 |
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 | ```python
1682 | def call_llm(prompt):
1683 | from google import genai
1684 | client = genai.Client(api_key='GEMINI_API_KEY')
1685 | response = client.models.generate_content(
1686 | model='gemini-2.5-pro',
1687 | contents=prompt
1688 | )
1689 | return response.text
1690 | ```
1691 |
1692 | 4. Azure (Azure OpenAI)
1693 | ```python
1694 | def call_llm(prompt):
1695 | from openai import AzureOpenAI
1696 | client = AzureOpenAI(
1697 | azure_endpoint="https://.openai.azure.com/",
1698 | api_key="YOUR_API_KEY_HERE",
1699 | api_version="2023-05-15"
1700 | )
1701 | r = client.chat.completions.create(
1702 | model="",
1703 | messages=[{"role": "user", "content": prompt}]
1704 | )
1705 | return r.choices[0].message.content
1706 | ```
1707 |
1708 | 5. Ollama (Local LLM)
1709 | ```python
1710 | def call_llm(prompt):
1711 | from ollama import chat
1712 | response = chat(
1713 | model="llama2",
1714 | messages=[{"role": "user", "content": prompt}]
1715 | )
1716 | return response.message.content
1717 | ```
1718 |
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 |
1722 | - Handle chat history:
1723 |
1724 | ```python
1725 | def call_llm(messages):
1726 | from openai import OpenAI
1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 | r = client.chat.completions.create(
1729 | model="gpt-4o",
1730 | messages=messages
1731 | )
1732 | return r.choices[0].message.content
1733 | ```
1734 |
1735 | - Add in-memory caching
1736 |
1737 | ```python
1738 | from functools import lru_cache
1739 |
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 | # Your implementation here
1743 | pass
1744 | ```
1745 |
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 |
1751 |
1752 | ```python
1753 | from functools import lru_cache
1754 |
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 | pass
1758 |
1759 | def call_llm(prompt, use_cache):
1760 | if use_cache:
1761 | return cached_call(prompt)
1762 | # Call the underlying function directly
1763 | return cached_call.__wrapped__(prompt)
1764 |
1765 | class SummarizeNode(Node):
1766 | def exec(self, text):
1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 |
1770 | - Enable logging:
1771 |
1772 | ```python
1773 | def call_llm(prompt):
1774 | import logging
1775 | logging.info(f"Prompt: {prompt}")
1776 | response = ... # Your implementation here
1777 | logging.info(f"Response: {response}")
1778 | return response
1779 | ```
--------------------------------------------------------------------------------
/.goosehints:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | title: "Agentic Coding"
4 | ---
5 |
6 | # Agentic Coding: Humans Design, Agents code!
7 |
8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
9 | {: .warning }
10 |
11 | ## Agentic Coding Steps
12 |
13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
14 |
15 | | Steps | Human | AI | Comment |
16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
17 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. |
18 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. |
19 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
20 | | 4. Data | ★☆☆ Low | ★★★ High | AI designs the data schema, and humans verify. |
21 | | 5. Node | ★☆☆ Low | ★★★ High | The AI helps design the node based on the flow. |
22 | | 6. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. |
23 | | 7. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
24 | | 8. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. |
25 |
26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit.
27 | - Understand AI systems' strengths and limitations:
28 | - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
29 | - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
30 | - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
31 | - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
32 | - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
33 |
34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
35 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
36 | - For each node in the flow, start with a high-level one-line description of what it does.
37 | - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
38 | - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
39 | - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
40 | - Outline the flow and draw it in a mermaid diagram. For example:
41 | ```mermaid
42 | flowchart LR
43 | start[Start] --> batch[Batch]
44 | batch --> check[Check]
45 | check -->|OK| process
46 | check -->|Error| fix[Fix]
47 | fix --> check
48 |
49 | subgraph process[Process]
50 | step1[Step 1] --> step2[Step 2]
51 | end
52 |
53 | process --> endNode[End]
54 | ```
55 | - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.
56 | {: .best-practice }
57 |
58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
59 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
60 |
61 |
62 | - Reading inputs (e.g., retrieving Slack messages, reading emails)
63 | - Writing outputs (e.g., generating reports, sending emails)
64 | - Using external tools (e.g., calling LLMs, searching the web)
65 | - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
66 | - For each utility function, implement it and write a simple test.
67 | - Document their input/output, as well as why they are necessary. For example:
68 | - `name`: `get_embedding` (`utils/get_embedding.py`)
69 | - `input`: `str`
70 | - `output`: a vector of 3072 floats
71 | - `necessity`: Used by the second node to embed text
72 | - Example utility implementation:
73 | ```python
74 | # utils/call_llm.py
75 | from openai import OpenAI
76 |
77 | def call_llm(prompt):
78 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
79 | r = client.chat.completions.create(
80 | model="gpt-4o",
81 | messages=[{"role": "user", "content": prompt}]
82 | )
83 | return r.choices[0].message.content
84 |
85 | if __name__ == "__main__":
86 | prompt = "What is the meaning of life?"
87 | print(call_llm(prompt))
88 | ```
89 | - > **Sometimes, design Utilities before Flow:** For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
90 | {: .best-practice }
91 | - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
92 | {: .warning }
93 |
94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
95 | - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
96 | - For simple systems, use an in-memory dictionary.
97 | - For more complex systems or when persistence is required, use a database.
98 | - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
99 | - Example shared store design:
100 | ```python
101 | shared = {
102 | "user": {
103 | "id": "user123",
104 | "context": { # Another nested dict
105 | "weather": {"temp": 72, "condition": "sunny"},
106 | "location": "San Francisco"
107 | }
108 | },
109 | "results": {} # Empty dict to store outputs
110 | }
111 | ```
112 |
113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
114 | - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
115 | - `type`: Regular (or Batch, or Async)
116 | - `prep`: Read "text" from the shared store
117 | - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
118 | - `post`: Write "embedding" to the shared store
119 |
120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
121 | - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
122 | - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
123 | - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
124 | - Add logging throughout the code to facilitate debugging.
125 |
126 | 7. **Optimization**:
127 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
128 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
129 | - If your flow design is already solid, move on to micro-optimizations:
130 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
131 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
132 |
133 | - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
134 | >
135 | >
136 | {: .best-practice }
137 |
138 | 8. **Reliability**
139 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
140 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
141 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
142 |
143 | ## Example LLM Project File Structure
144 |
145 | ```
146 | my_project/
147 | ├── main.py
148 | ├── nodes.py
149 | ├── flow.py
150 | ├── utils/
151 | │ ├── __init__.py
152 | │ ├── call_llm.py
153 | │ └── search_web.py
154 | ├── requirements.txt
155 | └── docs/
156 | └── design.md
157 | ```
158 |
159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
160 | ```
161 | PyYAML
162 | pocketflow
163 | ```
164 |
165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
166 | ~~~
167 | # Design Doc: Your Project Name
168 |
169 | > Please DON'T remove notes for AI
170 |
171 | ## Requirements
172 |
173 | > Notes for AI: Keep it simple and clear.
174 | > If the requirements are abstract, write concrete user stories
175 |
176 |
177 | ## Flow Design
178 |
179 | > Notes for AI:
180 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
181 | > 2. Present a concise, high-level description of the workflow.
182 |
183 | ### Applicable Design Pattern:
184 |
185 | 1. Map the file summary into chunks, then reduce these chunks into a final summary.
186 | 2. Agentic file finder
187 | - *Context*: The entire summary of the file
188 | - *Action*: Find the file
189 |
190 | ### Flow high-level Design:
191 |
192 | 1. **First Node**: This node is for ...
193 | 2. **Second Node**: This node is for ...
194 | 3. **Third Node**: This node is for ...
195 |
196 | ```mermaid
197 | flowchart TD
198 | firstNode[First Node] --> secondNode[Second Node]
199 | secondNode --> thirdNode[Third Node]
200 | ```
201 | ## Utility Functions
202 |
203 | > Notes for AI:
204 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
205 | > 2. Include only the necessary utility functions, based on nodes in the flow.
206 |
207 | 1. **Call LLM** (`utils/call_llm.py`)
208 | - *Input*: prompt (str)
209 | - *Output*: response (str)
210 | - Generally used by most nodes for LLM tasks
211 |
212 | 2. **Embedding** (`utils/get_embedding.py`)
213 | - *Input*: str
214 | - *Output*: a vector of 3072 floats
215 | - Used by the second node to embed text
216 |
217 | ## Node Design
218 |
219 | ### Shared Store
220 |
221 | > Notes for AI: Try to minimize data redundancy
222 |
223 | The shared store structure is organized as follows:
224 |
225 | ```python
226 | shared = {
227 | "key": "value"
228 | }
229 | ```
230 |
231 | ### Node Steps
232 |
233 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
234 |
235 | 1. First Node
236 | - *Purpose*: Provide a short explanation of the node’s function
237 | - *Type*: Decide between Regular, Batch, or Async
238 | - *Steps*:
239 | - *prep*: Read "key" from the shared store
240 | - *exec*: Call the utility function
241 | - *post*: Write "key" to the shared store
242 |
243 | 2. Second Node
244 | ...
245 | ~~~
246 |
247 |
248 | - **`utils/`**: Contains all utility functions.
249 | - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
250 | - Each file should also include a `main()` function to try that API call
251 | ```python
252 | from google import genai
253 | import os
254 |
255 | def call_llm(prompt: str) -> str:
256 | client = genai.Client(
257 | api_key=os.getenv("GEMINI_API_KEY", ""),
258 | )
259 | model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
260 | response = client.models.generate_content(model=model, contents=[prompt])
261 | return response.text
262 |
263 | if __name__ == "__main__":
264 | test_prompt = "Hello, how are you?"
265 |
266 | # First call - should hit the API
267 | print("Making call...")
268 | response1 = call_llm(test_prompt, use_cache=False)
269 | print(f"Response: {response1}")
270 | ```
271 |
272 | - **`nodes.py`**: Contains all the node definitions.
273 | ```python
274 | # nodes.py
275 | from pocketflow import Node
276 | from utils.call_llm import call_llm
277 |
278 | class GetQuestionNode(Node):
279 | def exec(self, _):
280 | # Get question directly from user input
281 | user_question = input("Enter your question: ")
282 | return user_question
283 |
284 | def post(self, shared, prep_res, exec_res):
285 | # Store the user's question
286 | shared["question"] = exec_res
287 | return "default" # Go to the next node
288 |
289 | class AnswerNode(Node):
290 | def prep(self, shared):
291 | # Read question from shared
292 | return shared["question"]
293 |
294 | def exec(self, question):
295 | # Call LLM to get the answer
296 | return call_llm(question)
297 |
298 | def post(self, shared, prep_res, exec_res):
299 | # Store the answer in shared
300 | shared["answer"] = exec_res
301 | ```
302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
303 | ```python
304 | # flow.py
305 | from pocketflow import Flow
306 | from nodes import GetQuestionNode, AnswerNode
307 |
308 | def create_qa_flow():
309 | """Create and return a question-answering flow."""
310 | # Create nodes
311 | get_question_node = GetQuestionNode()
312 | answer_node = AnswerNode()
313 |
314 | # Connect nodes in sequence
315 | get_question_node >> answer_node
316 |
317 | # Create flow starting with input node
318 | return Flow(start=get_question_node)
319 | ```
320 | - **`main.py`**: Serves as the project's entry point.
321 | ```python
322 | # main.py
323 | from flow import create_qa_flow
324 |
325 | # Example main function
326 | # Please replace this with your own main function
327 | def main():
328 | shared = {
329 | "question": None, # Will be populated by GetQuestionNode from user input
330 | "answer": None # Will be populated by AnswerNode
331 | }
332 |
333 | # Create the flow and run it
334 | qa_flow = create_qa_flow()
335 | qa_flow.run(shared)
336 | print(f"Question: {shared['question']}")
337 | print(f"Answer: {shared['answer']}")
338 |
339 | if __name__ == "__main__":
340 | main()
341 | ```
342 |
343 | ================================================
344 | File: docs/index.md
345 | ================================================
346 | ---
347 | layout: default
348 | title: "Home"
349 | nav_order: 1
350 | ---
351 |
352 | # Pocket Flow
353 |
354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
355 |
356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.
358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
359 |
360 |
361 |

362 |
363 |
364 | ## Core Abstraction
365 |
366 | We model the LLM workflow as a **Graph + Shared Store**:
367 |
368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
374 |
375 |
376 |

377 |
378 |
379 | ## Design Pattern
380 |
381 | From there, it’s easy to implement popular design patterns:
382 |
383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
389 |
390 |
391 |

392 |
393 |
394 | ## Utility Function
395 |
396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
397 |
398 | - [LLM Wrapper](./utility_function/llm.md)
399 | - [Viz and Debug](./utility_function/viz.md)
400 | - [Web Search](./utility_function/websearch.md)
401 | - [Chunking](./utility_function/chunking.md)
402 | - [Embedding](./utility_function/embedding.md)
403 | - [Vector Databases](./utility_function/vector.md)
404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
405 |
406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
410 |
411 | ## Ready to build your Apps?
412 |
413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
414 |
415 | ================================================
416 | File: docs/core_abstraction/async.md
417 | ================================================
418 | ---
419 | layout: default
420 | title: "(Advanced) Async"
421 | parent: "Core Abstraction"
422 | nav_order: 5
423 | ---
424 |
425 | # (Advanced) Async
426 |
427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
428 |
429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
430 | 2. **exec_async()**: Typically used for async LLM calls.
431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
432 |
433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
434 |
435 | ### Example
436 |
437 | ```python
438 | class SummarizeThenVerify(AsyncNode):
439 | async def prep_async(self, shared):
440 | # Example: read a file asynchronously
441 | doc_text = await read_file_async(shared["doc_path"])
442 | return doc_text
443 |
444 | async def exec_async(self, prep_res):
445 | # Example: async LLM call
446 | summary = await call_llm_async(f"Summarize: {prep_res}")
447 | return summary
448 |
449 | async def post_async(self, shared, prep_res, exec_res):
450 | # Example: wait for user feedback
451 | decision = await gather_user_feedback(exec_res)
452 | if decision == "approve":
453 | shared["summary"] = exec_res
454 | return "approve"
455 | return "deny"
456 |
457 | summarize_node = SummarizeThenVerify()
458 | final_node = Finalize()
459 |
460 | # Define transitions
461 | summarize_node - "approve" >> final_node
462 | summarize_node - "deny" >> summarize_node # retry
463 |
464 | flow = AsyncFlow(start=summarize_node)
465 |
466 | async def main():
467 | shared = {"doc_path": "document.txt"}
468 | await flow.run_async(shared)
469 | print("Final Summary:", shared.get("summary"))
470 |
471 | asyncio.run(main())
472 | ```
473 |
474 | ================================================
475 | File: docs/core_abstraction/batch.md
476 | ================================================
477 | ---
478 | layout: default
479 | title: "Batch"
480 | parent: "Core Abstraction"
481 | nav_order: 4
482 | ---
483 |
484 | # Batch
485 |
486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
487 | - **Chunk-based** processing (e.g., splitting large texts).
488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
489 |
490 | ## 1. BatchNode
491 |
492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
493 |
494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
495 | - **`exec(item)`**: called **once** per item in that iterable.
496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
497 |
498 |
499 | ### Example: Summarize a Large File
500 |
501 | ```python
502 | class MapSummaries(BatchNode):
503 | def prep(self, shared):
504 | # Suppose we have a big file; chunk it
505 | content = shared["data"]
506 | chunk_size = 10000
507 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
508 | return chunks
509 |
510 | def exec(self, chunk):
511 | prompt = f"Summarize this chunk in 10 words: {chunk}"
512 | summary = call_llm(prompt)
513 | return summary
514 |
515 | def post(self, shared, prep_res, exec_res_list):
516 | combined = "\n".join(exec_res_list)
517 | shared["summary"] = combined
518 | return "default"
519 |
520 | map_summaries = MapSummaries()
521 | flow = Flow(start=map_summaries)
522 | flow.run(shared)
523 | ```
524 |
525 | ---
526 |
527 | ## 2. BatchFlow
528 |
529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
530 |
531 | ### Example: Summarize Many Files
532 |
533 | ```python
534 | class SummarizeAllFiles(BatchFlow):
535 | def prep(self, shared):
536 | # Return a list of param dicts (one per file)
537 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
538 | return [{"filename": fn} for fn in filenames]
539 |
540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
541 | summarize_file = SummarizeFile(start=load_file)
542 |
543 | # Wrap that flow into a BatchFlow:
544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
545 | summarize_all_files.run(shared)
546 | ```
547 |
548 | ### Under the Hood
549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
550 | 2. The **BatchFlow** loops through each dict. For each one:
551 | - It merges the dict with the BatchFlow’s own `params`.
552 | - It calls `flow.run(shared)` using the merged result.
553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
554 |
555 | ---
556 |
557 | ## 3. Nested or Multi-Level Batches
558 |
559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
561 | - **Inner** batch: returning a list of per-file param dicts.
562 |
563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
564 |
565 | ```python
566 |
567 | class FileBatchFlow(BatchFlow):
568 | def prep(self, shared):
569 | directory = self.params["directory"]
570 | # e.g., files = ["file1.txt", "file2.txt", ...]
571 | files = [f for f in os.listdir(directory) if f.endswith(".txt")]
572 | return [{"filename": f} for f in files]
573 |
574 | class DirectoryBatchFlow(BatchFlow):
575 | def prep(self, shared):
576 | directories = [ "/path/to/dirA", "/path/to/dirB"]
577 | return [{"directory": d} for d in directories]
578 |
579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
580 | inner_flow = FileBatchFlow(start=MapSummaries())
581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
582 | ```
583 |
584 | ================================================
585 | File: docs/core_abstraction/communication.md
586 | ================================================
587 | ---
588 | layout: default
589 | title: "Communication"
590 | parent: "Core Abstraction"
591 | nav_order: 3
592 | ---
593 |
594 | # Communication
595 |
596 | Nodes and Flows **communicate** in 2 ways:
597 |
598 | 1. **Shared Store (for almost all the cases)**
599 |
600 | - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).
601 | - Great for data results, large content, or anything multiple nodes need.
602 | - You shall design the data structure and populate it ahead.
603 |
604 | - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*! This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
605 | {: .best-practice }
606 |
607 | 2. **Params (only for [Batch](./batch.md))**
608 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
609 | - Good for identifiers like filenames or numeric IDs, in Batch mode.
610 |
611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
612 |
613 | ---
614 |
615 | ## 1. Shared Store
616 |
617 | ### Overview
618 |
619 | A shared store is typically an in-mem dictionary, like:
620 | ```python
621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
622 | ```
623 |
624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
625 |
626 | ### Example
627 |
628 | ```python
629 | class LoadData(Node):
630 | def post(self, shared, prep_res, exec_res):
631 | # We write data to shared store
632 | shared["data"] = "Some text content"
633 | return None
634 |
635 | class Summarize(Node):
636 | def prep(self, shared):
637 | # We read data from shared store
638 | return shared["data"]
639 |
640 | def exec(self, prep_res):
641 | # Call LLM to summarize
642 | prompt = f"Summarize: {prep_res}"
643 | summary = call_llm(prompt)
644 | return summary
645 |
646 | def post(self, shared, prep_res, exec_res):
647 | # We write summary to shared store
648 | shared["summary"] = exec_res
649 | return "default"
650 |
651 | load_data = LoadData()
652 | summarize = Summarize()
653 | load_data >> summarize
654 | flow = Flow(start=load_data)
655 |
656 | shared = {}
657 | flow.run(shared)
658 | ```
659 |
660 | Here:
661 | - `LoadData` writes to `shared["data"]`.
662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
663 |
664 | ---
665 |
666 | ## 2. Params
667 |
668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
670 | - **Set** via `set_params()`.
671 | - **Cleared** and updated each time a parent Flow calls it.
672 |
673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow.
674 | >
675 | > If you need to set child node params, see [Batch](./batch.md).
676 | {: .warning }
677 |
678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
679 |
680 | ### Example
681 |
682 | ```python
683 | # 1) Create a Node that uses params
684 | class SummarizeFile(Node):
685 | def prep(self, shared):
686 | # Access the node's param
687 | filename = self.params["filename"]
688 | return shared["data"].get(filename, "")
689 |
690 | def exec(self, prep_res):
691 | prompt = f"Summarize: {prep_res}"
692 | return call_llm(prompt)
693 |
694 | def post(self, shared, prep_res, exec_res):
695 | filename = self.params["filename"]
696 | shared["summary"][filename] = exec_res
697 | return "default"
698 |
699 | # 2) Set params
700 | node = SummarizeFile()
701 |
702 | # 3) Set Node params directly (for testing)
703 | node.set_params({"filename": "doc1.txt"})
704 | node.run(shared)
705 |
706 | # 4) Create Flow
707 | flow = Flow(start=node)
708 |
709 | # 5) Set Flow params (overwrites node params)
710 | flow.set_params({"filename": "doc2.txt"})
711 | flow.run(shared) # The node summarizes doc2, not doc1
712 | ```
713 |
714 | ================================================
715 | File: docs/core_abstraction/flow.md
716 | ================================================
717 | ---
718 | layout: default
719 | title: "Flow"
720 | parent: "Core Abstraction"
721 | nav_order: 2
722 | ---
723 |
724 | # Flow
725 |
726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
727 |
728 | ## 1. Action-based Transitions
729 |
730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
731 |
732 | You define transitions with the syntax:
733 |
734 | 1. **Basic default transition**: `node_a >> node_b`
735 | This means if `node_a.post()` returns `"default"`, go to `node_b`.
736 | (Equivalent to `node_a - "default" >> node_b`)
737 |
738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
739 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
740 |
741 | It's possible to create loops, branching, or multi-step flows.
742 |
743 | ## 2. Creating a Flow
744 |
745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
746 |
747 | ### Example: Simple Sequence
748 |
749 | Here's a minimal flow of two nodes in a chain:
750 |
751 | ```python
752 | node_a >> node_b
753 | flow = Flow(start=node_a)
754 | flow.run(shared)
755 | ```
756 |
757 | - When you run the flow, it executes `node_a`.
758 | - Suppose `node_a.post()` returns `"default"`.
759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.
760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
761 |
762 | ### Example: Branching & Looping
763 |
764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
765 |
766 | - `"approved"`: expense is approved, move to payment processing
767 | - `"needs_revision"`: expense needs changes, send back for revision
768 | - `"rejected"`: expense is denied, finish the process
769 |
770 | We can wire them like this:
771 |
772 | ```python
773 | # Define the flow connections
774 | review - "approved" >> payment # If approved, process payment
775 | review - "needs_revision" >> revise # If needs changes, go to revision
776 | review - "rejected" >> finish # If rejected, finish the process
777 |
778 | revise >> review # After revision, go back for another review
779 | payment >> finish # After payment, finish the process
780 |
781 | flow = Flow(start=review)
782 | ```
783 |
784 | Let's see how it flows:
785 |
786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
789 |
790 | ```mermaid
791 | flowchart TD
792 | review[Review Expense] -->|approved| payment[Process Payment]
793 | review -->|needs_revision| revise[Revise Report]
794 | review -->|rejected| finish[Finish Process]
795 |
796 | revise --> review
797 | payment --> finish
798 | ```
799 |
800 | ### Running Individual Nodes vs. Running a Flow
801 |
802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action.
803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
804 |
805 | > `node.run(shared)` **does not** proceed to the successor.
806 | > This is mainly for debugging or testing a single node.
807 | >
808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
809 | {: .warning }
810 |
811 | ## 3. Nested Flows
812 |
813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
814 |
815 | 1. Use a Flow as a Node within another Flow's transitions.
816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.
817 | 3. Node `params` will be a merging of **all** parents' `params`.
818 |
819 | ### Flow's Node Methods
820 |
821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
822 |
823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
825 |
826 | ### Basic Flow Nesting
827 |
828 | Here's how to connect a flow to another node:
829 |
830 | ```python
831 | # Create a sub-flow
832 | node_a >> node_b
833 | subflow = Flow(start=node_a)
834 |
835 | # Connect it to another node
836 | subflow >> node_c
837 |
838 | # Create the parent flow
839 | parent_flow = Flow(start=subflow)
840 | ```
841 |
842 | When `parent_flow.run()` executes:
843 | 1. It starts `subflow`
844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
845 | 3. After `subflow` completes, execution continues to `node_c`
846 |
847 | ### Example: Order Processing Pipeline
848 |
849 | Here's a practical example that breaks down order processing into nested flows:
850 |
851 | ```python
852 | # Payment processing sub-flow
853 | validate_payment >> process_payment >> payment_confirmation
854 | payment_flow = Flow(start=validate_payment)
855 |
856 | # Inventory sub-flow
857 | check_stock >> reserve_items >> update_inventory
858 | inventory_flow = Flow(start=check_stock)
859 |
860 | # Shipping sub-flow
861 | create_label >> assign_carrier >> schedule_pickup
862 | shipping_flow = Flow(start=create_label)
863 |
864 | # Connect the flows into a main order pipeline
865 | payment_flow >> inventory_flow >> shipping_flow
866 |
867 | # Create the master flow
868 | order_pipeline = Flow(start=payment_flow)
869 |
870 | # Run the entire pipeline
871 | order_pipeline.run(shared_data)
872 | ```
873 |
874 | This creates a clean separation of concerns while maintaining a clear execution path:
875 |
876 | ```mermaid
877 | flowchart LR
878 | subgraph order_pipeline[Order Pipeline]
879 | subgraph paymentFlow["Payment Flow"]
880 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
881 | end
882 |
883 | subgraph inventoryFlow["Inventory Flow"]
884 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
885 | end
886 |
887 | subgraph shippingFlow["Shipping Flow"]
888 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
889 | end
890 |
891 | paymentFlow --> inventoryFlow
892 | inventoryFlow --> shippingFlow
893 | end
894 | ```
895 |
896 | ================================================
897 | File: docs/core_abstraction/node.md
898 | ================================================
899 | ---
900 | layout: default
901 | title: "Node"
902 | parent: "Core Abstraction"
903 | nav_order: 1
904 | ---
905 |
906 | # Node
907 |
908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
909 |
910 |
911 |

912 |
913 |
914 | 1. `prep(shared)`
915 | - **Read and preprocess data** from `shared` store.
916 | - Examples: *query DB, read files, or serialize data into a string*.
917 | - Return `prep_res`, which is used by `exec()` and `post()`.
918 |
919 | 2. `exec(prep_res)`
920 | - **Execute compute logic**, with optional retries and error handling (below).
921 | - Examples: *(mostly) LLM calls, remote APIs, tool use*.
922 | - ⚠️ This shall be only for compute and **NOT** access `shared`.
923 | - ⚠️ If retries enabled, ensure idempotent implementation.
924 | - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
925 | - Return `exec_res`, which is passed to `post()`.
926 |
927 | 3. `post(shared, prep_res, exec_res)`
928 | - **Postprocess and write data** back to `shared`.
929 | - Examples: *update DB, change states, log results*.
930 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
931 |
932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
933 | >
934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
935 | {: .note }
936 |
937 | ### Fault Tolerance & Retries
938 |
939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
940 |
941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting).
943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
944 |
945 | ```python
946 | my_node = SummarizeFile(max_retries=3, wait=10)
947 | ```
948 |
949 | When an exception occurs in `exec()`, the Node automatically retries until:
950 |
951 | - It either succeeds, or
952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
953 |
954 | You can get the current retry times (0-based) from `self.cur_retry`.
955 |
956 | ```python
957 | class RetryNode(Node):
958 | def exec(self, prep_res):
959 | print(f"Retry {self.cur_retry} times")
960 | raise Exception("Failed")
961 | ```
962 |
963 | ### Graceful Fallback
964 |
965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
966 |
967 | ```python
968 | def exec_fallback(self, prep_res, exc):
969 | raise exc
970 | ```
971 |
972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
973 |
974 | ### Example: Summarize file
975 |
976 | ```python
977 | class SummarizeFile(Node):
978 | def prep(self, shared):
979 | return shared["data"]
980 |
981 | def exec(self, prep_res):
982 | if not prep_res:
983 | return "Empty file content"
984 | prompt = f"Summarize this text in 10 words: {prep_res}"
985 | summary = call_llm(prompt) # might fail
986 | return summary
987 |
988 | def exec_fallback(self, prep_res, exc):
989 | # Provide a simple fallback instead of crashing
990 | return "There was an error processing your request."
991 |
992 | def post(self, shared, prep_res, exec_res):
993 | shared["summary"] = exec_res
994 | # Return "default" by not returning
995 |
996 | summarize_node = SummarizeFile(max_retries=3)
997 |
998 | # node.run() calls prep->exec->post
999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 |
1002 | print("Action returned:", action_result) # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 |
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 |
1016 | # (Advanced) Parallel
1017 |
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute.
1019 |
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 |
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | >
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | >
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 |
1030 | ## AsyncParallelBatchNode
1031 |
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 |
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 | async def prep_async(self, shared):
1037 | # e.g., multiple texts
1038 | return shared["texts"]
1039 |
1040 | async def exec_async(self, text):
1041 | prompt = f"Summarize: {text}"
1042 | return await call_llm_async(prompt)
1043 |
1044 | async def post_async(self, shared, prep_res, exec_res_list):
1045 | shared["summary"] = "\n\n".join(exec_res_list)
1046 | return "default"
1047 |
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 |
1052 | ## AsyncParallelBatchFlow
1053 |
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 |
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 | async def prep_async(self, shared):
1059 | return [{"filename": f} for f in shared["files"]]
1060 |
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 |
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 |
1076 | # Agent
1077 |
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 |
1080 |
1081 |

1082 |
1083 |
1084 | ## Implement Agent with Graph
1085 |
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 |
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 |
1097 | ### ACTION SPACE
1098 | [1] search
1099 | Description: Use web search to get results
1100 | Parameters:
1101 | - query (str): What to search for
1102 |
1103 | [2] answer
1104 | Description: Conclude based on the results
1105 | Parameters:
1106 | - result (str): Final answer to provide
1107 |
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 |
1112 | ```yaml
1113 | thinking: |
1114 |
1115 | action:
1116 | parameters:
1117 | :
1118 | ```"""
1119 | ```
1120 |
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 |
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 |
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or `read_csvs`. Instead, import CSVs into the database.
1126 |
1127 | ## Example Good Action Design
1128 |
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 |
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 |
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 |
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 |
1137 | ## Example: Search Agent
1138 |
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 |
1144 | ```python
1145 | class DecideAction(Node):
1146 | def prep(self, shared):
1147 | context = shared.get("context", "No previous search")
1148 | query = shared["query"]
1149 | return query, context
1150 |
1151 | def exec(self, inputs):
1152 | query, context = inputs
1153 | prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 | resp = call_llm(prompt)
1164 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 | result = yaml.safe_load(yaml_str)
1166 |
1167 | assert isinstance(result, dict)
1168 | assert "action" in result
1169 | assert "reason" in result
1170 | assert result["action"] in ["search", "answer"]
1171 | if result["action"] == "search":
1172 | assert "search_term" in result
1173 |
1174 | return result
1175 |
1176 | def post(self, shared, prep_res, exec_res):
1177 | if exec_res["action"] == "search":
1178 | shared["search_term"] = exec_res["search_term"]
1179 | return exec_res["action"]
1180 |
1181 | class SearchWeb(Node):
1182 | def prep(self, shared):
1183 | return shared["search_term"]
1184 |
1185 | def exec(self, search_term):
1186 | return search_web(search_term)
1187 |
1188 | def post(self, shared, prep_res, exec_res):
1189 | prev_searches = shared.get("context", [])
1190 | shared["context"] = prev_searches + [
1191 | {"term": shared["search_term"], "result": exec_res}
1192 | ]
1193 | return "decide"
1194 |
1195 | class DirectAnswer(Node):
1196 | def prep(self, shared):
1197 | return shared["query"], shared.get("context", "")
1198 |
1199 | def exec(self, inputs):
1200 | query, context = inputs
1201 | return call_llm(f"Context: {context}\nAnswer: {query}")
1202 |
1203 | def post(self, shared, prep_res, exec_res):
1204 | print(f"Answer: {exec_res}")
1205 | shared["answer"] = exec_res
1206 |
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 |
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide # Loop back
1215 |
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 |
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 |
1230 | # Map Reduce
1231 |
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 |
1236 | and there is a logical way to break the task into smaller, ideally independent parts.
1237 |
1238 |
1239 |

1240 |
1241 |
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 |
1244 | ### Example: Document Summarization
1245 |
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 | def prep(self, shared):
1249 | files_dict = shared["files"] # e.g. 10 files
1250 | return list(files_dict.items()) # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 |
1252 | def exec(self, one_file):
1253 | filename, file_content = one_file
1254 | summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 | return (filename, summary_text)
1256 |
1257 | def post(self, shared, prep_res, exec_res_list):
1258 | shared["file_summaries"] = dict(exec_res_list)
1259 |
1260 | class CombineSummaries(Node):
1261 | def prep(self, shared):
1262 | return shared["file_summaries"]
1263 |
1264 | def exec(self, file_summaries):
1265 | # format as: "File1: summary\nFile2: summary...\n"
1266 | text_list = []
1267 | for fname, summ in file_summaries.items():
1268 | text_list.append(f"{fname} summary:\n{summ}\n")
1269 | big_text = "\n---\n".join(text_list)
1270 |
1271 | return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 |
1273 | def post(self, shared, prep_res, final_summary):
1274 | shared["all_files_summary"] = final_summary
1275 |
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 |
1280 | flow = Flow(start=batch_node)
1281 |
1282 | shared = {
1283 | "files": {
1284 | "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 | "file2.txt": "Some other interesting text ...",
1286 | # ...
1287 | }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 |
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 |
1304 | # RAG (Retrieval Augmented Generation)
1305 |
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 |
1308 |
1309 |

1310 |
1311 |
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 |
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 |
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 |
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 | def prep(self, shared):
1326 | # A list of file paths in shared["files"]. We process each file.
1327 | return shared["files"]
1328 |
1329 | def exec(self, filepath):
1330 | # read file content. In real usage, do error handling.
1331 | with open(filepath, "r", encoding="utf-8") as f:
1332 | text = f.read()
1333 | # chunk by 100 chars each
1334 | chunks = []
1335 | size = 100
1336 | for i in range(0, len(text), size):
1337 | chunks.append(text[i : i + size])
1338 | return chunks
1339 |
1340 | def post(self, shared, prep_res, exec_res_list):
1341 | # exec_res_list is a list of chunk-lists, one per file.
1342 | # flatten them all into a single list of chunks.
1343 | all_chunks = []
1344 | for chunk_list in exec_res_list:
1345 | all_chunks.extend(chunk_list)
1346 | shared["all_chunks"] = all_chunks
1347 |
1348 | class EmbedDocs(BatchNode):
1349 | def prep(self, shared):
1350 | return shared["all_chunks"]
1351 |
1352 | def exec(self, chunk):
1353 | return get_embedding(chunk)
1354 |
1355 | def post(self, shared, prep_res, exec_res_list):
1356 | # Store the list of embeddings.
1357 | shared["all_embeds"] = exec_res_list
1358 | print(f"Total embeddings: {len(exec_res_list)}")
1359 |
1360 | class StoreIndex(Node):
1361 | def prep(self, shared):
1362 | # We'll read all embeds from shared.
1363 | return shared["all_embeds"]
1364 |
1365 | def exec(self, all_embeds):
1366 | # Create a vector index (faiss or other DB in real usage).
1367 | index = create_index(all_embeds)
1368 | return index
1369 |
1370 | def post(self, shared, prep_res, index):
1371 | shared["index"] = index
1372 |
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 |
1378 | chunk_node >> embed_node >> store_node
1379 |
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 |
1383 | Usage example:
1384 |
1385 | ```python
1386 | shared = {
1387 | "files": ["doc1.txt", "doc2.txt"], # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 |
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 |
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 |
1400 | ```python
1401 | class EmbedQuery(Node):
1402 | def prep(self, shared):
1403 | return shared["question"]
1404 |
1405 | def exec(self, question):
1406 | return get_embedding(question)
1407 |
1408 | def post(self, shared, prep_res, q_emb):
1409 | shared["q_emb"] = q_emb
1410 |
1411 | class RetrieveDocs(Node):
1412 | def prep(self, shared):
1413 | # We'll need the query embedding, plus the offline index/chunks
1414 | return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 |
1416 | def exec(self, inputs):
1417 | q_emb, index, chunks = inputs
1418 | I, D = search_index(index, q_emb, top_k=1)
1419 | best_id = I[0][0]
1420 | relevant_chunk = chunks[best_id]
1421 | return relevant_chunk
1422 |
1423 | def post(self, shared, prep_res, relevant_chunk):
1424 | shared["retrieved_chunk"] = relevant_chunk
1425 | print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 |
1427 | class GenerateAnswer(Node):
1428 | def prep(self, shared):
1429 | return shared["question"], shared["retrieved_chunk"]
1430 |
1431 | def exec(self, inputs):
1432 | question, chunk = inputs
1433 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 | return call_llm(prompt)
1435 |
1436 | def post(self, shared, prep_res, answer):
1437 | shared["answer"] = answer
1438 | print("Answer:", answer)
1439 |
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 |
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 |
1448 | Usage example:
1449 |
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 |
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 |
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 |
1469 | # Structured Output
1470 |
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 |
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 |
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 |
1480 | ### Example Use Cases
1481 |
1482 | - Extracting Key Information
1483 |
1484 | ```yaml
1485 | product:
1486 | name: Widget Pro
1487 | price: 199.99
1488 | description: |
1489 | A high-quality widget designed for professionals.
1490 | Recommended for advanced users.
1491 | ```
1492 |
1493 | - Summarizing Documents into Bullet Points
1494 |
1495 | ```yaml
1496 | summary:
1497 | - This product is easy to use.
1498 | - It is cost-effective.
1499 | - Suitable for all skill levels.
1500 | ```
1501 |
1502 | - Generating Configuration Files
1503 |
1504 | ```yaml
1505 | server:
1506 | host: 127.0.0.1
1507 | port: 8080
1508 | ssl: true
1509 | ```
1510 |
1511 | ## Prompt Engineering
1512 |
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 |
1517 | ### Example Text Summarization
1518 |
1519 | ```python
1520 | class SummarizeNode(Node):
1521 | def exec(self, prep_res):
1522 | # Suppose `prep_res` is the text to summarize.
1523 | prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 |
1526 | {prep_res}
1527 |
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 | - bullet 1
1532 | - bullet 2
1533 | - bullet 3
1534 | ```"""
1535 | response = call_llm(prompt)
1536 | yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 |
1538 | import yaml
1539 | structured_result = yaml.safe_load(yaml_str)
1540 |
1541 | assert "summary" in structured_result
1542 | assert isinstance(structured_result["summary"], list)
1543 |
1544 | return structured_result
1545 | ```
1546 |
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 |
1550 | ### Why YAML instead of JSON?
1551 |
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 |
1554 | **In JSON**
1555 |
1556 | ```json
1557 | {
1558 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 |
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 |
1565 | **In YAML**
1566 |
1567 | ```yaml
1568 | dialogue: |
1569 | Alice said: "Hello Bob.
1570 | How are you?
1571 | I am good."
1572 | ```
1573 |
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 |
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 |
1587 | # Workflow
1588 |
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 |
1591 |
1592 |

1593 |
1594 |
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | >
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 |
1601 | ### Example: Article Writing
1602 |
1603 | ```python
1604 | class GenerateOutline(Node):
1605 | def prep(self, shared): return shared["topic"]
1606 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 |
1609 | class WriteSection(Node):
1610 | def prep(self, shared): return shared["outline"]
1611 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 |
1614 | class ReviewAndRefine(Node):
1615 | def prep(self, shared): return shared["draft"]
1616 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 |
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 |
1624 | outline >> write >> review
1625 |
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 |
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 |
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 |
1644 | # LLM Wrappers
1645 |
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm).
1647 | Here, we provide some minimal example implementations:
1648 |
1649 | 1. OpenAI
1650 | ```python
1651 | def call_llm(prompt):
1652 | from openai import OpenAI
1653 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 | r = client.chat.completions.create(
1655 | model="gpt-4o",
1656 | messages=[{"role": "user", "content": prompt}]
1657 | )
1658 | return r.choices[0].message.content
1659 |
1660 | # Example usage
1661 | call_llm("How are you?")
1662 | ```
1663 | > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 | {: .best-practice }
1665 |
1666 | 2. Claude (Anthropic)
1667 | ```python
1668 | def call_llm(prompt):
1669 | from anthropic import Anthropic
1670 | client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 | r = client.messages.create(
1672 | model="claude-sonnet-4-0",
1673 | messages=[
1674 | {"role": "user", "content": prompt}
1675 | ]
1676 | )
1677 | return r.content[0].text
1678 | ```
1679 |
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 | ```python
1682 | def call_llm(prompt):
1683 | from google import genai
1684 | client = genai.Client(api_key='GEMINI_API_KEY')
1685 | response = client.models.generate_content(
1686 | model='gemini-2.5-pro',
1687 | contents=prompt
1688 | )
1689 | return response.text
1690 | ```
1691 |
1692 | 4. Azure (Azure OpenAI)
1693 | ```python
1694 | def call_llm(prompt):
1695 | from openai import AzureOpenAI
1696 | client = AzureOpenAI(
1697 | azure_endpoint="https://.openai.azure.com/",
1698 | api_key="YOUR_API_KEY_HERE",
1699 | api_version="2023-05-15"
1700 | )
1701 | r = client.chat.completions.create(
1702 | model="",
1703 | messages=[{"role": "user", "content": prompt}]
1704 | )
1705 | return r.choices[0].message.content
1706 | ```
1707 |
1708 | 5. Ollama (Local LLM)
1709 | ```python
1710 | def call_llm(prompt):
1711 | from ollama import chat
1712 | response = chat(
1713 | model="llama2",
1714 | messages=[{"role": "user", "content": prompt}]
1715 | )
1716 | return response.message.content
1717 | ```
1718 |
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 |
1722 | - Handle chat history:
1723 |
1724 | ```python
1725 | def call_llm(messages):
1726 | from openai import OpenAI
1727 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 | r = client.chat.completions.create(
1729 | model="gpt-4o",
1730 | messages=messages
1731 | )
1732 | return r.choices[0].message.content
1733 | ```
1734 |
1735 | - Add in-memory caching
1736 |
1737 | ```python
1738 | from functools import lru_cache
1739 |
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 | # Your implementation here
1743 | pass
1744 | ```
1745 |
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 |
1751 |
1752 | ```python
1753 | from functools import lru_cache
1754 |
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 | pass
1758 |
1759 | def call_llm(prompt, use_cache):
1760 | if use_cache:
1761 | return cached_call(prompt)
1762 | # Call the underlying function directly
1763 | return cached_call.__wrapped__(prompt)
1764 |
1765 | class SummarizeNode(Node):
1766 | def exec(self, text):
1767 | return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 |
1770 | - Enable logging:
1771 |
1772 | ```python
1773 | def call_llm(prompt):
1774 | import logging
1775 | logging.info(f"Prompt: {prompt}")
1776 | response = ... # Your implementation here
1777 | logging.info(f"Response: {response}")
1778 | return response
1779 | ```
--------------------------------------------------------------------------------