├── utils
    ├── __init__.py
    └── call_llm.py
├── requirements.txt
├── assets
    └── banner.png
├── main.py
├── flow.py
├── nodes.py
├── README.md
├── .gitignore
├── docs
    └── design.md
├── .clinerules
├── .cursorrules
└── .goosehints


/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pocketflow>=0.0.1


--------------------------------------------------------------------------------
/assets/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/The-Pocket/PocketFlow-Template-Python/main/assets/banner.png


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from flow import create_qa_flow
 2 | 
 3 | # Example main function
 4 | # Please replace this with your own main function
 5 | def main():
 6 |     shared = {
 7 |         "question": "In one sentence, what's the end of universe?",
 8 |         "answer": None
 9 |     }
10 | 
11 |     qa_flow = create_qa_flow()
12 |     qa_flow.run(shared)
13 |     print("Question:", shared["question"])
14 |     print("Answer:", shared["answer"])
15 | 
16 | if __name__ == "__main__":
17 |     main()
18 | 


--------------------------------------------------------------------------------
/flow.py:
--------------------------------------------------------------------------------
 1 | from pocketflow import Flow
 2 | from nodes import GetQuestionNode, AnswerNode
 3 | 
 4 | def create_qa_flow():
 5 |     """Create and return a question-answering flow."""
 6 |     # Create nodes
 7 |     get_question_node = GetQuestionNode()
 8 |     answer_node = AnswerNode()
 9 |     
10 |     # Connect nodes in sequence
11 |     get_question_node >> answer_node
12 |     
13 |     # Create flow starting with input node
14 |     return Flow(start=get_question_node)
15 | 
16 | qa_flow = create_qa_flow()


--------------------------------------------------------------------------------
/utils/call_llm.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import os
 3 | 
 4 | # Learn more about calling the LLM: https://the-pocket.github.io/PocketFlow/utility_function/llm.html
 5 | def call_llm(prompt):    
 6 |     client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
 7 |     r = client.chat.completions.create(
 8 |         model="gpt-4o",
 9 |         messages=[{"role": "user", "content": prompt}]
10 |     )
11 |     return r.choices[0].message.content
12 |     
13 | if __name__ == "__main__":
14 |     prompt = "What is the meaning of life?"
15 |     print(call_llm(prompt))
16 | 


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
 1 | from pocketflow import Node
 2 | from utils.call_llm import call_llm
 3 | 
 4 | class GetQuestionNode(Node):
 5 |     def exec(self, _):
 6 |         # Get question directly from user input
 7 |         user_question = input("Enter your question: ")
 8 |         return user_question
 9 |     
10 |     def post(self, shared, prep_res, exec_res):
11 |         # Store the user's question
12 |         shared["question"] = exec_res
13 |         return "default"  # Go to the next node
14 | 
15 | class AnswerNode(Node):
16 |     def prep(self, shared):
17 |         # Read question from shared
18 |         return shared["question"]
19 |     
20 |     def exec(self, question):
21 |         # Call LLM to get the answer
22 |         return call_llm(question)
23 |     
24 |     def post(self, shared, prep_res, exec_res):
25 |         # Store the answer in shared
26 |         shared["answer"] = exec_res


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center">Pocket Flow Project Template: Agentic Coding</h1>
 2 | 
 3 | <p align="center">
 4 |   <a href="https://github.com/The-Pocket/PocketFlow" target="_blank">
 5 |     <img 
 6 |       src="./assets/banner.png" width="800"
 7 |     />
 8 |   </a>
 9 | </p>
10 | 
11 | This is a project template for Agentic Coding with [Pocket Flow](https://github.com/The-Pocket/PocketFlow), a 100-line LLM framework, and your editor of choice.
12 | 
13 | - We have included rules files for various AI coding assistants to help you build LLM projects:
14 |   - [.cursorrules](.cursorrules) for Cursor AI
15 |   - [.clinerules](.clinerules) for Cline
16 |   - [.windsurfrules](.windsurfrules) for Windsurf
17 |   - [.goosehints](.goosehints) for Goose
18 |   - Configuration in [.github](.github) for GitHub Copilot
19 |   - [CLAUDE.md](CLAUDE.md) for Claude Code
20 |   - [GEMINI.md](GEMINI.md) for Gemini
21 |   
22 | - Want to learn how to build LLM projects with Agentic Coding?
23 | 
24 |   - Check out the [Agentic Coding Guidance](https://the-pocket.github.io/PocketFlow/guide.html)
25 |     
26 |   - Check out the [YouTube Tutorial](https://www.youtube.com/@ZacharyLLM?sub_confirmation=1)
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependencies
 2 | node_modules/
 3 | vendor/
 4 | .pnp/
 5 | .pnp.js
 6 | 
 7 | # Build outputs
 8 | dist/
 9 | build/
10 | out/
11 | *.pyc
12 | __pycache__/
13 | 
14 | # Environment files
15 | .env
16 | .env.local
17 | .env.*.local
18 | .env.development
19 | .env.test
20 | .env.production
21 | 
22 | # IDE - VSCode
23 | .vscode/*
24 | !.vscode/settings.json
25 | !.vscode/tasks.json
26 | !.vscode/launch.json
27 | !.vscode/extensions.json
28 | 
29 | # IDE - JetBrains
30 | .idea/
31 | *.iml
32 | *.iws
33 | *.ipr
34 | 
35 | # IDE - Eclipse
36 | .project
37 | .classpath
38 | .settings/
39 | 
40 | # Logs
41 | logs/
42 | *.log
43 | npm-debug.log*
44 | yarn-debug.log*
45 | yarn-error.log*
46 | 
47 | # Operating System
48 | .DS_Store
49 | Thumbs.db
50 | *.swp
51 | *.swo
52 | 
53 | # Testing
54 | coverage/
55 | .nyc_output/
56 | 
57 | # Temporary files
58 | *.tmp
59 | *.temp
60 | .cache/
61 | 
62 | # Compiled files
63 | *.com
64 | *.class
65 | *.dll
66 | *.exe
67 | *.o
68 | *.so
69 | 
70 | # Package files
71 | *.7z
72 | *.dmg
73 | *.gz
74 | *.iso
75 | *.jar
76 | *.rar
77 | *.tar
78 | *.zip
79 | 
80 | # Database
81 | *.sqlite
82 | *.sqlite3
83 | *.db
84 | 
85 | # Optional npm cache directory
86 | .npm
87 | 
88 | # Optional eslint cache
89 | .eslintcache
90 | 
91 | # Optional REPL history
92 | .node_repl_history 


--------------------------------------------------------------------------------
/docs/design.md:
--------------------------------------------------------------------------------
 1 | # Design Doc: Your Project Name
 2 | 
 3 | > Please DON'T remove notes for AI
 4 | 
 5 | ## Requirements
 6 | 
 7 | > Notes for AI: Keep it simple and clear.
 8 | > If the requirements are abstract, write concrete user stories
 9 | 
10 | 
11 | ## Flow Design
12 | 
13 | > Notes for AI:
14 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
15 | > 2. Present a concise, high-level description of the workflow.
16 | 
17 | ### Applicable Design Pattern:
18 | 
19 | 1. Map the file summary into chunks, then reduce these chunks into a final summary.
20 | 2. Agentic file finder
21 |    - *Context*: The entire summary of the file
22 |    - *Action*: Find the file
23 | 
24 | ### Flow high-level Design:
25 | 
26 | 1. **First Node**: This node is for ...
27 | 2. **Second Node**: This node is for ...
28 | 3. **Third Node**: This node is for ...
29 | 
30 | ```mermaid
31 | flowchart TD
32 |     firstNode[First Node] --> secondNode[Second Node]
33 |     secondNode --> thirdNode[Third Node]
34 | ```
35 | ## Utility Functions
36 | 
37 | > Notes for AI:
38 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
39 | > 2. Include only the necessary utility functions, based on nodes in the flow.
40 | 
41 | 1. **Call LLM** (`utils/call_llm.py`)
42 |    - *Input*: prompt (str)
43 |    - *Output*: response (str)
44 |    - Generally used by most nodes for LLM tasks
45 | 
46 | 2. **Embedding** (`utils/get_embedding.py`)
47 |    - *Input*: str
48 |    - *Output*: a vector of 3072 floats
49 |    - Used by the second node to embed text
50 | 
51 | ## Node Design
52 | 
53 | ### Shared Store
54 | 
55 | > Notes for AI: Try to minimize data redundancy
56 | 
57 | The shared store structure is organized as follows:
58 | 
59 | ```python
60 | shared = {
61 |     "key": "value"
62 | }
63 | ```
64 | 
65 | ### Node Steps
66 | 
67 | > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
68 | 
69 | 1. First Node
70 |   - *Purpose*: Provide a short explanation of the node’s function
71 |   - *Type*: Decide between Regular, Batch, or Async
72 |   - *Steps*:
73 |     - *prep*: Read "key" from the shared store
74 |     - *exec*: Call the utility function
75 |     - *post*: Write "key" to the shared store
76 | 
77 | 2. Second Node
78 |   ...
79 | 
80 | 


--------------------------------------------------------------------------------
/.clinerules:
--------------------------------------------------------------------------------
   1 | ---
   2 | layout: default
   3 | title: "Agentic Coding"
   4 | ---
   5 | 
   6 | # Agentic Coding: Humans Design, Agents code!
   7 | 
   8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
   9 | {: .warning }
  10 | 
  11 | ## Agentic Coding Steps
  12 | 
  13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
  14 | 
  15 | | Steps                  | Human      | AI        | Comment                                                                 |
  16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
  17 | | 1. Requirements | ★★★ High  | ★☆☆ Low   | Humans understand the requirements and context.                    |
  18 | | 2. Flow          | ★★☆ Medium | ★★☆ Medium |  Humans specify the high-level design, and the AI fills in the details. |
  19 | | 3. Utilities   | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
  20 | | 4. Data          | ★☆☆ Low    | ★★★ High   | AI designs the data schema, and humans verify.                            |
  21 | | 5. Node          | ★☆☆ Low   | ★★★ High  | The AI helps design the node based on the flow.          |
  22 | | 6. Implementation      | ★☆☆ Low   | ★★★ High  |  The AI implements the flow based on the design. |
  23 | | 7. Optimization        | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
  24 | | 8. Reliability         | ★☆☆ Low   | ★★★ High  |  The AI writes test cases and addresses corner cases.     |
  25 | 
  26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 
  27 |     - Understand AI systems' strengths and limitations:
  28 |       - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
  29 |       - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
  30 |       - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
  31 |     - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
  32 |     - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
  33 | 
  34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
  35 |     - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
  36 |       - For each node in the flow, start with a high-level one-line description of what it does.
  37 |       - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
  38 |       - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
  39 |       - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
  40 |     - Outline the flow and draw it in a mermaid diagram. For example:
  41 |       ```mermaid
  42 |       flowchart LR
  43 |           start[Start] --> batch[Batch]
  44 |           batch --> check[Check]
  45 |           check -->|OK| process
  46 |           check -->|Error| fix[Fix]
  47 |           fix --> check
  48 |           
  49 |           subgraph process[Process]
  50 |             step1[Step 1] --> step2[Step 2]
  51 |           end
  52 |           
  53 |           process --> endNode[End]
  54 |       ```
  55 |     - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.  
  56 |       {: .best-practice }
  57 | 
  58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
  59 |     - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
  60 |         <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/utility.png?raw=true" width="400"/></div>
  61 | 
  62 |         - Reading inputs (e.g., retrieving Slack messages, reading emails)
  63 |         - Writing outputs (e.g., generating reports, sending emails)
  64 |         - Using external tools (e.g., calling LLMs, searching the web)
  65 |         - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
  66 |     - For each utility function, implement it and write a simple test.
  67 |     - Document their input/output, as well as why they are necessary. For example:
  68 |       - `name`: `get_embedding` (`utils/get_embedding.py`)
  69 |       - `input`: `str`
  70 |       - `output`: a vector of 3072 floats
  71 |       - `necessity`: Used by the second node to embed text
  72 |     - Example utility implementation:
  73 |       ```python
  74 |       # utils/call_llm.py
  75 |       from openai import OpenAI
  76 | 
  77 |       def call_llm(prompt):    
  78 |           client = OpenAI(api_key="YOUR_API_KEY_HERE")
  79 |           r = client.chat.completions.create(
  80 |               model="gpt-4o",
  81 |               messages=[{"role": "user", "content": prompt}]
  82 |           )
  83 |           return r.choices[0].message.content
  84 |           
  85 |       if __name__ == "__main__":
  86 |           prompt = "What is the meaning of life?"
  87 |           print(call_llm(prompt))
  88 |       ```
  89 |     - > **Sometimes, design Utilities before Flow:**  For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
  90 |       {: .best-practice }
  91 |     - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
  92 |       {: .warning }
  93 | 
  94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
  95 |    - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
  96 |       - For simple systems, use an in-memory dictionary.
  97 |       - For more complex systems or when persistence is required, use a database.
  98 |       - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
  99 |       - Example shared store design:
 100 |         ```python
 101 |         shared = {
 102 |             "user": {
 103 |                 "id": "user123",
 104 |                 "context": {                # Another nested dict
 105 |                     "weather": {"temp": 72, "condition": "sunny"},
 106 |                     "location": "San Francisco"
 107 |                 }
 108 |             },
 109 |             "results": {}                   # Empty dict to store outputs
 110 |         }
 111 |         ```
 112 | 
 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
 114 |    - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
 115 |      - `type`: Regular (or Batch, or Async)
 116 |      - `prep`: Read "text" from the shared store
 117 |      - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
 118 |      - `post`: Write "embedding" to the shared store
 119 | 
 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
 121 |    - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
 122 |    - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
 123 |    - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
 124 |    - Add logging throughout the code to facilitate debugging.
 125 | 
 126 | 7. **Optimization**:
 127 |    - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
 128 |    - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
 129 |    - If your flow design is already solid, move on to micro-optimizations:
 130 |      - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
 131 |      - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
 132 | 
 133 |    - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
 134 |      >
 135 |      > <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/success.png?raw=true" width="400"/></div>
 136 |      {: .best-practice }
 137 | 
 138 | 8. **Reliability**  
 139 |    - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
 140 |    - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
 141 |    - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
 142 | 
 143 | ## Example LLM Project File Structure
 144 | 
 145 | ```
 146 | my_project/
 147 | ├── main.py
 148 | ├── nodes.py
 149 | ├── flow.py
 150 | ├── utils/
 151 | │   ├── __init__.py
 152 | │   ├── call_llm.py
 153 | │   └── search_web.py
 154 | ├── requirements.txt
 155 | └── docs/
 156 |     └── design.md
 157 | ```
 158 | 
 159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
 160 |   ```
 161 |   PyYAML
 162 |   pocketflow
 163 |   ```
 164 | 
 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
 166 |   ~~~
 167 |   # Design Doc: Your Project Name
 168 | 
 169 |   > Please DON'T remove notes for AI
 170 | 
 171 |   ## Requirements
 172 | 
 173 |   > Notes for AI: Keep it simple and clear.
 174 |   > If the requirements are abstract, write concrete user stories
 175 | 
 176 | 
 177 |   ## Flow Design
 178 | 
 179 |   > Notes for AI:
 180 |   > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
 181 |   > 2. Present a concise, high-level description of the workflow.
 182 | 
 183 |   ### Applicable Design Pattern:
 184 | 
 185 |   1. Map the file summary into chunks, then reduce these chunks into a final summary.
 186 |   2. Agentic file finder
 187 |     - *Context*: The entire summary of the file
 188 |     - *Action*: Find the file
 189 | 
 190 |   ### Flow high-level Design:
 191 | 
 192 |   1. **First Node**: This node is for ...
 193 |   2. **Second Node**: This node is for ...
 194 |   3. **Third Node**: This node is for ...
 195 | 
 196 |   ```mermaid
 197 |   flowchart TD
 198 |       firstNode[First Node] --> secondNode[Second Node]
 199 |       secondNode --> thirdNode[Third Node]
 200 |   ```
 201 |   ## Utility Functions
 202 | 
 203 |   > Notes for AI:
 204 |   > 1. Understand the utility function definition thoroughly by reviewing the doc.
 205 |   > 2. Include only the necessary utility functions, based on nodes in the flow.
 206 | 
 207 |   1. **Call LLM** (`utils/call_llm.py`)
 208 |     - *Input*: prompt (str)
 209 |     - *Output*: response (str)
 210 |     - Generally used by most nodes for LLM tasks
 211 | 
 212 |   2. **Embedding** (`utils/get_embedding.py`)
 213 |     - *Input*: str
 214 |     - *Output*: a vector of 3072 floats
 215 |     - Used by the second node to embed text
 216 | 
 217 |   ## Node Design
 218 | 
 219 |   ### Shared Store
 220 | 
 221 |   > Notes for AI: Try to minimize data redundancy
 222 | 
 223 |   The shared store structure is organized as follows:
 224 | 
 225 |   ```python
 226 |   shared = {
 227 |       "key": "value"
 228 |   }
 229 |   ```
 230 | 
 231 |   ### Node Steps
 232 | 
 233 |   > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
 234 | 
 235 |   1. First Node
 236 |     - *Purpose*: Provide a short explanation of the node’s function
 237 |     - *Type*: Decide between Regular, Batch, or Async
 238 |     - *Steps*:
 239 |       - *prep*: Read "key" from the shared store
 240 |       - *exec*: Call the utility function
 241 |       - *post*: Write "key" to the shared store
 242 | 
 243 |   2. Second Node
 244 |     ...
 245 |   ~~~
 246 | 
 247 | 
 248 | - **`utils/`**: Contains all utility functions.
 249 |   - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
 250 |   - Each file should also include a `main()` function to try that API call
 251 |   ```python
 252 |   from google import genai
 253 |   import os
 254 | 
 255 |   def call_llm(prompt: str) -> str:
 256 |       client = genai.Client(
 257 |           api_key=os.getenv("GEMINI_API_KEY", ""),
 258 |       )
 259 |       model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
 260 |       response = client.models.generate_content(model=model, contents=[prompt])
 261 |       return response.text
 262 | 
 263 |   if __name__ == "__main__":
 264 |       test_prompt = "Hello, how are you?"
 265 | 
 266 |       # First call - should hit the API
 267 |       print("Making call...")
 268 |       response1 = call_llm(test_prompt, use_cache=False)
 269 |       print(f"Response: {response1}")
 270 |   ```
 271 | 
 272 | - **`nodes.py`**: Contains all the node definitions.
 273 |   ```python
 274 |   # nodes.py
 275 |   from pocketflow import Node
 276 |   from utils.call_llm import call_llm
 277 | 
 278 |   class GetQuestionNode(Node):
 279 |       def exec(self, _):
 280 |           # Get question directly from user input
 281 |           user_question = input("Enter your question: ")
 282 |           return user_question
 283 |       
 284 |       def post(self, shared, prep_res, exec_res):
 285 |           # Store the user's question
 286 |           shared["question"] = exec_res
 287 |           return "default"  # Go to the next node
 288 | 
 289 |   class AnswerNode(Node):
 290 |       def prep(self, shared):
 291 |           # Read question from shared
 292 |           return shared["question"]
 293 |       
 294 |       def exec(self, question):
 295 |           # Call LLM to get the answer
 296 |           return call_llm(question)
 297 |       
 298 |       def post(self, shared, prep_res, exec_res):
 299 |           # Store the answer in shared
 300 |           shared["answer"] = exec_res
 301 |   ```
 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
 303 |   ```python
 304 |   # flow.py
 305 |   from pocketflow import Flow
 306 |   from nodes import GetQuestionNode, AnswerNode
 307 | 
 308 |   def create_qa_flow():
 309 |       """Create and return a question-answering flow."""
 310 |       # Create nodes
 311 |       get_question_node = GetQuestionNode()
 312 |       answer_node = AnswerNode()
 313 |       
 314 |       # Connect nodes in sequence
 315 |       get_question_node >> answer_node
 316 |       
 317 |       # Create flow starting with input node
 318 |       return Flow(start=get_question_node)
 319 |   ```
 320 | - **`main.py`**: Serves as the project's entry point.
 321 |   ```python
 322 |   # main.py
 323 |   from flow import create_qa_flow
 324 | 
 325 |   # Example main function
 326 |   # Please replace this with your own main function
 327 |   def main():
 328 |       shared = {
 329 |           "question": None,  # Will be populated by GetQuestionNode from user input
 330 |           "answer": None     # Will be populated by AnswerNode
 331 |       }
 332 | 
 333 |       # Create the flow and run it
 334 |       qa_flow = create_qa_flow()
 335 |       qa_flow.run(shared)
 336 |       print(f"Question: {shared['question']}")
 337 |       print(f"Answer: {shared['answer']}")
 338 | 
 339 |   if __name__ == "__main__":
 340 |       main()
 341 |   ```
 342 | 
 343 | ================================================
 344 | File: docs/index.md
 345 | ================================================
 346 | ---
 347 | layout: default
 348 | title: "Home"
 349 | nav_order: 1
 350 | ---
 351 | 
 352 | # Pocket Flow
 353 | 
 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
 355 | 
 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.  
 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
 359 | 
 360 | <div align="center">
 361 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/meme.jpg?raw=true" width="400"/>
 362 | </div>
 363 | 
 364 | ## Core Abstraction
 365 | 
 366 | We model the LLM workflow as a **Graph + Shared Store**:
 367 | 
 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
 374 | 
 375 | <div align="center">
 376 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/abstraction.png" width="500"/>
 377 | </div>
 378 | 
 379 | ## Design Pattern
 380 | 
 381 | From there, it’s easy to implement popular design patterns:
 382 | 
 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
 389 | 
 390 | <div align="center">
 391 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/design.png" width="500"/>
 392 | </div>
 393 | 
 394 | ## Utility Function
 395 | 
 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
 397 | 
 398 | - [LLM Wrapper](./utility_function/llm.md)
 399 | - [Viz and Debug](./utility_function/viz.md)
 400 | - [Web Search](./utility_function/websearch.md)
 401 | - [Chunking](./utility_function/chunking.md)
 402 | - [Embedding](./utility_function/embedding.md)
 403 | - [Vector Databases](./utility_function/vector.md)
 404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
 405 | 
 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
 410 | 
 411 | ## Ready to build your Apps? 
 412 | 
 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
 414 | 
 415 | ================================================
 416 | File: docs/core_abstraction/async.md
 417 | ================================================
 418 | ---
 419 | layout: default
 420 | title: "(Advanced) Async"
 421 | parent: "Core Abstraction"
 422 | nav_order: 5
 423 | ---
 424 | 
 425 | # (Advanced) Async
 426 | 
 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
 428 | 
 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
 430 | 2. **exec_async()**: Typically used for async LLM calls.
 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
 432 | 
 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
 434 | 
 435 | ### Example
 436 | 
 437 | ```python
 438 | class SummarizeThenVerify(AsyncNode):
 439 |     async def prep_async(self, shared):
 440 |         # Example: read a file asynchronously
 441 |         doc_text = await read_file_async(shared["doc_path"])
 442 |         return doc_text
 443 | 
 444 |     async def exec_async(self, prep_res):
 445 |         # Example: async LLM call
 446 |         summary = await call_llm_async(f"Summarize: {prep_res}")
 447 |         return summary
 448 | 
 449 |     async def post_async(self, shared, prep_res, exec_res):
 450 |         # Example: wait for user feedback
 451 |         decision = await gather_user_feedback(exec_res)
 452 |         if decision == "approve":
 453 |             shared["summary"] = exec_res
 454 |             return "approve"
 455 |         return "deny"
 456 | 
 457 | summarize_node = SummarizeThenVerify()
 458 | final_node = Finalize()
 459 | 
 460 | # Define transitions
 461 | summarize_node - "approve" >> final_node
 462 | summarize_node - "deny"    >> summarize_node  # retry
 463 | 
 464 | flow = AsyncFlow(start=summarize_node)
 465 | 
 466 | async def main():
 467 |     shared = {"doc_path": "document.txt"}
 468 |     await flow.run_async(shared)
 469 |     print("Final Summary:", shared.get("summary"))
 470 | 
 471 | asyncio.run(main())
 472 | ```
 473 | 
 474 | ================================================
 475 | File: docs/core_abstraction/batch.md
 476 | ================================================
 477 | ---
 478 | layout: default
 479 | title: "Batch"
 480 | parent: "Core Abstraction"
 481 | nav_order: 4
 482 | ---
 483 | 
 484 | # Batch
 485 | 
 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
 487 | - **Chunk-based** processing (e.g., splitting large texts).
 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
 489 | 
 490 | ## 1. BatchNode
 491 | 
 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
 493 | 
 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
 495 | - **`exec(item)`**: called **once** per item in that iterable.
 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
 497 | 
 498 | 
 499 | ### Example: Summarize a Large File
 500 | 
 501 | ```python
 502 | class MapSummaries(BatchNode):
 503 |     def prep(self, shared):
 504 |         # Suppose we have a big file; chunk it
 505 |         content = shared["data"]
 506 |         chunk_size = 10000
 507 |         chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
 508 |         return chunks
 509 | 
 510 |     def exec(self, chunk):
 511 |         prompt = f"Summarize this chunk in 10 words: {chunk}"
 512 |         summary = call_llm(prompt)
 513 |         return summary
 514 | 
 515 |     def post(self, shared, prep_res, exec_res_list):
 516 |         combined = "\n".join(exec_res_list)
 517 |         shared["summary"] = combined
 518 |         return "default"
 519 | 
 520 | map_summaries = MapSummaries()
 521 | flow = Flow(start=map_summaries)
 522 | flow.run(shared)
 523 | ```
 524 | 
 525 | ---
 526 | 
 527 | ## 2. BatchFlow
 528 | 
 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
 530 | 
 531 | ### Example: Summarize Many Files
 532 | 
 533 | ```python
 534 | class SummarizeAllFiles(BatchFlow):
 535 |     def prep(self, shared):
 536 |         # Return a list of param dicts (one per file)
 537 |         filenames = list(shared["data"].keys())  # e.g., ["file1.txt", "file2.txt", ...]
 538 |         return [{"filename": fn} for fn in filenames]
 539 | 
 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
 541 | summarize_file = SummarizeFile(start=load_file)
 542 | 
 543 | # Wrap that flow into a BatchFlow:
 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
 545 | summarize_all_files.run(shared)
 546 | ```
 547 | 
 548 | ### Under the Hood
 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
 550 | 2. The **BatchFlow** loops through each dict. For each one:
 551 |    - It merges the dict with the BatchFlow’s own `params`.
 552 |    - It calls `flow.run(shared)` using the merged result.
 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
 554 | 
 555 | ---
 556 | 
 557 | ## 3. Nested or Multi-Level Batches
 558 | 
 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
 561 | - **Inner** batch: returning a list of per-file param dicts.
 562 | 
 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
 564 | 
 565 | ```python
 566 | 
 567 | class FileBatchFlow(BatchFlow):
 568 |     def prep(self, shared):
 569 |         directory = self.params["directory"]
 570 |         # e.g., files = ["file1.txt", "file2.txt", ...]
 571 |         files = [f for f in os.listdir(directory) if f.endswith(".txt")]
 572 |         return [{"filename": f} for f in files]
 573 | 
 574 | class DirectoryBatchFlow(BatchFlow):
 575 |     def prep(self, shared):
 576 |         directories = [ "/path/to/dirA", "/path/to/dirB"]
 577 |         return [{"directory": d} for d in directories]
 578 | 
 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
 580 | inner_flow = FileBatchFlow(start=MapSummaries())
 581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
 582 | ```
 583 | 
 584 | ================================================
 585 | File: docs/core_abstraction/communication.md
 586 | ================================================
 587 | ---
 588 | layout: default
 589 | title: "Communication"
 590 | parent: "Core Abstraction"
 591 | nav_order: 3
 592 | ---
 593 | 
 594 | # Communication
 595 | 
 596 | Nodes and Flows **communicate** in 2 ways:
 597 | 
 598 | 1. **Shared Store (for almost all the cases)** 
 599 | 
 600 |    - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).  
 601 |    - Great for data results, large content, or anything multiple nodes need.
 602 |    - You shall design the data structure and populate it ahead.
 603 |      
 604 |    - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*!  This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
 605 |      {: .best-practice }
 606 | 
 607 | 2. **Params (only for [Batch](./batch.md))** 
 608 |    - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
 609 |    - Good for identifiers like filenames or numeric IDs, in Batch mode.
 610 | 
 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
 612 | 
 613 | ---
 614 | 
 615 | ## 1. Shared Store
 616 | 
 617 | ### Overview
 618 | 
 619 | A shared store is typically an in-mem dictionary, like:
 620 | ```python
 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
 622 | ```
 623 | 
 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
 625 | 
 626 | ### Example
 627 | 
 628 | ```python
 629 | class LoadData(Node):
 630 |     def post(self, shared, prep_res, exec_res):
 631 |         # We write data to shared store
 632 |         shared["data"] = "Some text content"
 633 |         return None
 634 | 
 635 | class Summarize(Node):
 636 |     def prep(self, shared):
 637 |         # We read data from shared store
 638 |         return shared["data"]
 639 | 
 640 |     def exec(self, prep_res):
 641 |         # Call LLM to summarize
 642 |         prompt = f"Summarize: {prep_res}"
 643 |         summary = call_llm(prompt)
 644 |         return summary
 645 | 
 646 |     def post(self, shared, prep_res, exec_res):
 647 |         # We write summary to shared store
 648 |         shared["summary"] = exec_res
 649 |         return "default"
 650 | 
 651 | load_data = LoadData()
 652 | summarize = Summarize()
 653 | load_data >> summarize
 654 | flow = Flow(start=load_data)
 655 | 
 656 | shared = {}
 657 | flow.run(shared)
 658 | ```
 659 | 
 660 | Here:
 661 | - `LoadData` writes to `shared["data"]`.
 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
 663 | 
 664 | ---
 665 | 
 666 | ## 2. Params
 667 | 
 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
 670 | - **Set** via `set_params()`.
 671 | - **Cleared** and updated each time a parent Flow calls it.
 672 | 
 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 
 674 | > 
 675 | > If you need to set child node params, see [Batch](./batch.md).
 676 | {: .warning }
 677 | 
 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
 679 | 
 680 | ### Example
 681 | 
 682 | ```python
 683 | # 1) Create a Node that uses params
 684 | class SummarizeFile(Node):
 685 |     def prep(self, shared):
 686 |         # Access the node's param
 687 |         filename = self.params["filename"]
 688 |         return shared["data"].get(filename, "")
 689 | 
 690 |     def exec(self, prep_res):
 691 |         prompt = f"Summarize: {prep_res}"
 692 |         return call_llm(prompt)
 693 | 
 694 |     def post(self, shared, prep_res, exec_res):
 695 |         filename = self.params["filename"]
 696 |         shared["summary"][filename] = exec_res
 697 |         return "default"
 698 | 
 699 | # 2) Set params
 700 | node = SummarizeFile()
 701 | 
 702 | # 3) Set Node params directly (for testing)
 703 | node.set_params({"filename": "doc1.txt"})
 704 | node.run(shared)
 705 | 
 706 | # 4) Create Flow
 707 | flow = Flow(start=node)
 708 | 
 709 | # 5) Set Flow params (overwrites node params)
 710 | flow.set_params({"filename": "doc2.txt"})
 711 | flow.run(shared)  # The node summarizes doc2, not doc1
 712 | ```
 713 | 
 714 | ================================================
 715 | File: docs/core_abstraction/flow.md
 716 | ================================================
 717 | ---
 718 | layout: default
 719 | title: "Flow"
 720 | parent: "Core Abstraction"
 721 | nav_order: 2
 722 | ---
 723 | 
 724 | # Flow
 725 | 
 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
 727 | 
 728 | ## 1. Action-based Transitions
 729 | 
 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
 731 | 
 732 | You define transitions with the syntax:
 733 | 
 734 | 1. **Basic default transition**: `node_a >> node_b`
 735 |   This means if `node_a.post()` returns `"default"`, go to `node_b`. 
 736 |   (Equivalent to `node_a - "default" >> node_b`)
 737 | 
 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
 739 |   This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
 740 | 
 741 | It's possible to create loops, branching, or multi-step flows.
 742 | 
 743 | ## 2. Creating a Flow
 744 | 
 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
 746 | 
 747 | ### Example: Simple Sequence
 748 | 
 749 | Here's a minimal flow of two nodes in a chain:
 750 | 
 751 | ```python
 752 | node_a >> node_b
 753 | flow = Flow(start=node_a)
 754 | flow.run(shared)
 755 | ```
 756 | 
 757 | - When you run the flow, it executes `node_a`.  
 758 | - Suppose `node_a.post()` returns `"default"`.  
 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.  
 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
 761 | 
 762 | ### Example: Branching & Looping
 763 | 
 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
 765 | 
 766 | - `"approved"`: expense is approved, move to payment processing
 767 | - `"needs_revision"`: expense needs changes, send back for revision 
 768 | - `"rejected"`: expense is denied, finish the process
 769 | 
 770 | We can wire them like this:
 771 | 
 772 | ```python
 773 | # Define the flow connections
 774 | review - "approved" >> payment        # If approved, process payment
 775 | review - "needs_revision" >> revise   # If needs changes, go to revision
 776 | review - "rejected" >> finish         # If rejected, finish the process
 777 | 
 778 | revise >> review   # After revision, go back for another review
 779 | payment >> finish  # After payment, finish the process
 780 | 
 781 | flow = Flow(start=review)
 782 | ```
 783 | 
 784 | Let's see how it flows:
 785 | 
 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
 789 | 
 790 | ```mermaid
 791 | flowchart TD
 792 |     review[Review Expense] -->|approved| payment[Process Payment]
 793 |     review -->|needs_revision| revise[Revise Report]
 794 |     review -->|rejected| finish[Finish Process]
 795 | 
 796 |     revise --> review
 797 |     payment --> finish
 798 | ```
 799 | 
 800 | ### Running Individual Nodes vs. Running a Flow
 801 | 
 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 
 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
 804 | 
 805 | > `node.run(shared)` **does not** proceed to the successor.
 806 | > This is mainly for debugging or testing a single node.
 807 | > 
 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
 809 | {: .warning }
 810 | 
 811 | ## 3. Nested Flows
 812 | 
 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
 814 | 
 815 | 1. Use a Flow as a Node within another Flow's transitions.  
 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.  
 817 | 3. Node `params` will be a merging of **all** parents' `params`.
 818 | 
 819 | ### Flow's Node Methods
 820 | 
 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
 822 | 
 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
 825 | 
 826 | ### Basic Flow Nesting
 827 | 
 828 | Here's how to connect a flow to another node:
 829 | 
 830 | ```python
 831 | # Create a sub-flow
 832 | node_a >> node_b
 833 | subflow = Flow(start=node_a)
 834 | 
 835 | # Connect it to another node
 836 | subflow >> node_c
 837 | 
 838 | # Create the parent flow
 839 | parent_flow = Flow(start=subflow)
 840 | ```
 841 | 
 842 | When `parent_flow.run()` executes:
 843 | 1. It starts `subflow`
 844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
 845 | 3. After `subflow` completes, execution continues to `node_c`
 846 | 
 847 | ### Example: Order Processing Pipeline
 848 | 
 849 | Here's a practical example that breaks down order processing into nested flows:
 850 | 
 851 | ```python
 852 | # Payment processing sub-flow
 853 | validate_payment >> process_payment >> payment_confirmation
 854 | payment_flow = Flow(start=validate_payment)
 855 | 
 856 | # Inventory sub-flow
 857 | check_stock >> reserve_items >> update_inventory
 858 | inventory_flow = Flow(start=check_stock)
 859 | 
 860 | # Shipping sub-flow
 861 | create_label >> assign_carrier >> schedule_pickup
 862 | shipping_flow = Flow(start=create_label)
 863 | 
 864 | # Connect the flows into a main order pipeline
 865 | payment_flow >> inventory_flow >> shipping_flow
 866 | 
 867 | # Create the master flow
 868 | order_pipeline = Flow(start=payment_flow)
 869 | 
 870 | # Run the entire pipeline
 871 | order_pipeline.run(shared_data)
 872 | ```
 873 | 
 874 | This creates a clean separation of concerns while maintaining a clear execution path:
 875 | 
 876 | ```mermaid
 877 | flowchart LR
 878 |     subgraph order_pipeline[Order Pipeline]
 879 |         subgraph paymentFlow["Payment Flow"]
 880 |             A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
 881 |         end
 882 | 
 883 |         subgraph inventoryFlow["Inventory Flow"]
 884 |             D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
 885 |         end
 886 | 
 887 |         subgraph shippingFlow["Shipping Flow"]
 888 |             G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
 889 |         end
 890 | 
 891 |         paymentFlow --> inventoryFlow
 892 |         inventoryFlow --> shippingFlow
 893 |     end
 894 | ```
 895 | 
 896 | ================================================
 897 | File: docs/core_abstraction/node.md
 898 | ================================================
 899 | ---
 900 | layout: default
 901 | title: "Node"
 902 | parent: "Core Abstraction"
 903 | nav_order: 1
 904 | ---
 905 | 
 906 | # Node
 907 | 
 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
 909 | 
 910 | <div align="center">
 911 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/node.png?raw=true" width="400"/>
 912 | </div>
 913 | 
 914 | 1. `prep(shared)`
 915 |    - **Read and preprocess data** from `shared` store. 
 916 |    - Examples: *query DB, read files, or serialize data into a string*.
 917 |    - Return `prep_res`, which is used by `exec()` and `post()`.
 918 | 
 919 | 2. `exec(prep_res)`
 920 |    - **Execute compute logic**, with optional retries and error handling (below).
 921 |    - Examples: *(mostly) LLM calls, remote APIs, tool use*.
 922 |    - ⚠️ This shall be only for compute and **NOT** access `shared`.
 923 |    - ⚠️ If retries enabled, ensure idempotent implementation.
 924 |    - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
 925 |    - Return `exec_res`, which is passed to `post()`.
 926 | 
 927 | 3. `post(shared, prep_res, exec_res)`
 928 |    - **Postprocess and write data** back to `shared`.
 929 |    - Examples: *update DB, change states, log results*.
 930 |    - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
 931 | 
 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
 933 | >
 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
 935 | {: .note }
 936 | 
 937 | ### Fault Tolerance & Retries
 938 | 
 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
 940 | 
 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 
 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
 944 | 
 945 | ```python 
 946 | my_node = SummarizeFile(max_retries=3, wait=10)
 947 | ```
 948 | 
 949 | When an exception occurs in `exec()`, the Node automatically retries until:
 950 | 
 951 | - It either succeeds, or
 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
 953 | 
 954 | You can get the current retry times (0-based) from `self.cur_retry`.
 955 | 
 956 | ```python 
 957 | class RetryNode(Node):
 958 |     def exec(self, prep_res):
 959 |         print(f"Retry {self.cur_retry} times")
 960 |         raise Exception("Failed")
 961 | ```
 962 | 
 963 | ### Graceful Fallback
 964 | 
 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
 966 | 
 967 | ```python 
 968 | def exec_fallback(self, prep_res, exc):
 969 |     raise exc
 970 | ```
 971 | 
 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
 973 | 
 974 | ### Example: Summarize file
 975 | 
 976 | ```python 
 977 | class SummarizeFile(Node):
 978 |     def prep(self, shared):
 979 |         return shared["data"]
 980 | 
 981 |     def exec(self, prep_res):
 982 |         if not prep_res:
 983 |             return "Empty file content"
 984 |         prompt = f"Summarize this text in 10 words: {prep_res}"
 985 |         summary = call_llm(prompt)  # might fail
 986 |         return summary
 987 | 
 988 |     def exec_fallback(self, prep_res, exc):
 989 |         # Provide a simple fallback instead of crashing
 990 |         return "There was an error processing your request."
 991 | 
 992 |     def post(self, shared, prep_res, exec_res):
 993 |         shared["summary"] = exec_res
 994 |         # Return "default" by not returning
 995 | 
 996 | summarize_node = SummarizeFile(max_retries=3)
 997 | 
 998 | # node.run() calls prep->exec->post
 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 | 
1002 | print("Action returned:", action_result)  # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 | 
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 | 
1016 | # (Advanced) Parallel
1017 | 
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows  **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 
1019 | 
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 | 
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | > 
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | > 
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 | 
1030 | ## AsyncParallelBatchNode
1031 | 
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 | 
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 |     async def prep_async(self, shared):
1037 |         # e.g., multiple texts
1038 |         return shared["texts"]
1039 | 
1040 |     async def exec_async(self, text):
1041 |         prompt = f"Summarize: {text}"
1042 |         return await call_llm_async(prompt)
1043 | 
1044 |     async def post_async(self, shared, prep_res, exec_res_list):
1045 |         shared["summary"] = "\n\n".join(exec_res_list)
1046 |         return "default"
1047 | 
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 | 
1052 | ## AsyncParallelBatchFlow
1053 | 
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 | 
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 |     async def prep_async(self, shared):
1059 |         return [{"filename": f} for f in shared["files"]]
1060 | 
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 | 
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 | 
1076 | # Agent
1077 | 
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 | 
1080 | <div align="center">
1081 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/agent.png?raw=true" width="350"/>
1082 | </div>
1083 | 
1084 | ## Implement Agent with Graph
1085 | 
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.  
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 | 
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 | 
1097 | ### ACTION SPACE
1098 | [1] search
1099 |   Description: Use web search to get results
1100 |   Parameters:
1101 |     - query (str): What to search for
1102 | 
1103 | [2] answer
1104 |   Description: Conclude based on the results
1105 |   Parameters:
1106 |     - result (str): Final answer to provide
1107 | 
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 | 
1112 | ```yaml
1113 | thinking: |
1114 |     <your step-by-step reasoning process>
1115 | action: <action_name>
1116 | parameters:
1117 |     <parameter_name>: <parameter_value>
1118 | ```"""
1119 | ```
1120 | 
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 | 
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 | 
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or  `read_csvs`. Instead, import CSVs into the database.
1126 | 
1127 | ## Example Good Action Design
1128 | 
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 | 
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 | 
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 | 
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 | 
1137 | ## Example: Search Agent
1138 | 
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 | 
1144 | ```python
1145 | class DecideAction(Node):
1146 |     def prep(self, shared):
1147 |         context = shared.get("context", "No previous search")
1148 |         query = shared["query"]
1149 |         return query, context
1150 |         
1151 |     def exec(self, inputs):
1152 |         query, context = inputs
1153 |         prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 |         resp = call_llm(prompt)
1164 |         yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 |         result = yaml.safe_load(yaml_str)
1166 |         
1167 |         assert isinstance(result, dict)
1168 |         assert "action" in result
1169 |         assert "reason" in result
1170 |         assert result["action"] in ["search", "answer"]
1171 |         if result["action"] == "search":
1172 |             assert "search_term" in result
1173 |         
1174 |         return result
1175 | 
1176 |     def post(self, shared, prep_res, exec_res):
1177 |         if exec_res["action"] == "search":
1178 |             shared["search_term"] = exec_res["search_term"]
1179 |         return exec_res["action"]
1180 | 
1181 | class SearchWeb(Node):
1182 |     def prep(self, shared):
1183 |         return shared["search_term"]
1184 |         
1185 |     def exec(self, search_term):
1186 |         return search_web(search_term)
1187 |     
1188 |     def post(self, shared, prep_res, exec_res):
1189 |         prev_searches = shared.get("context", [])
1190 |         shared["context"] = prev_searches + [
1191 |             {"term": shared["search_term"], "result": exec_res}
1192 |         ]
1193 |         return "decide"
1194 |         
1195 | class DirectAnswer(Node):
1196 |     def prep(self, shared):
1197 |         return shared["query"], shared.get("context", "")
1198 |         
1199 |     def exec(self, inputs):
1200 |         query, context = inputs
1201 |         return call_llm(f"Context: {context}\nAnswer: {query}")
1202 | 
1203 |     def post(self, shared, prep_res, exec_res):
1204 |        print(f"Answer: {exec_res}")
1205 |        shared["answer"] = exec_res
1206 | 
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 | 
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide  # Loop back
1215 | 
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 | 
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 | 
1230 | # Map Reduce
1231 | 
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 | 
1236 | and there is a logical way to break the task into smaller, ideally independent parts. 
1237 | 
1238 | <div align="center">
1239 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/mapreduce.png?raw=true" width="400"/>
1240 | </div>
1241 | 
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 | 
1244 | ### Example: Document Summarization
1245 | 
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 |     def prep(self, shared):
1249 |         files_dict = shared["files"]  # e.g. 10 files
1250 |         return list(files_dict.items())  # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 | 
1252 |     def exec(self, one_file):
1253 |         filename, file_content = one_file
1254 |         summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 |         return (filename, summary_text)
1256 | 
1257 |     def post(self, shared, prep_res, exec_res_list):
1258 |         shared["file_summaries"] = dict(exec_res_list)
1259 | 
1260 | class CombineSummaries(Node):
1261 |     def prep(self, shared):
1262 |         return shared["file_summaries"]
1263 | 
1264 |     def exec(self, file_summaries):
1265 |         # format as: "File1: summary\nFile2: summary...\n"
1266 |         text_list = []
1267 |         for fname, summ in file_summaries.items():
1268 |             text_list.append(f"{fname} summary:\n{summ}\n")
1269 |         big_text = "\n---\n".join(text_list)
1270 | 
1271 |         return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 | 
1273 |     def post(self, shared, prep_res, final_summary):
1274 |         shared["all_files_summary"] = final_summary
1275 | 
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 | 
1280 | flow = Flow(start=batch_node)
1281 | 
1282 | shared = {
1283 |     "files": {
1284 |         "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 |         "file2.txt": "Some other interesting text ...",
1286 |         # ...
1287 |     }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 | 
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 | 
1304 | # RAG (Retrieval Augmented Generation)
1305 | 
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 | 
1308 | <div align="center">
1309 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/rag.png?raw=true" width="400"/>
1310 | </div>
1311 | 
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 | 
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 | 
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 | 
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 |     def prep(self, shared):
1326 |         # A list of file paths in shared["files"]. We process each file.
1327 |         return shared["files"]
1328 | 
1329 |     def exec(self, filepath):
1330 |         # read file content. In real usage, do error handling.
1331 |         with open(filepath, "r", encoding="utf-8") as f:
1332 |             text = f.read()
1333 |         # chunk by 100 chars each
1334 |         chunks = []
1335 |         size = 100
1336 |         for i in range(0, len(text), size):
1337 |             chunks.append(text[i : i + size])
1338 |         return chunks
1339 |     
1340 |     def post(self, shared, prep_res, exec_res_list):
1341 |         # exec_res_list is a list of chunk-lists, one per file.
1342 |         # flatten them all into a single list of chunks.
1343 |         all_chunks = []
1344 |         for chunk_list in exec_res_list:
1345 |             all_chunks.extend(chunk_list)
1346 |         shared["all_chunks"] = all_chunks
1347 | 
1348 | class EmbedDocs(BatchNode):
1349 |     def prep(self, shared):
1350 |         return shared["all_chunks"]
1351 | 
1352 |     def exec(self, chunk):
1353 |         return get_embedding(chunk)
1354 | 
1355 |     def post(self, shared, prep_res, exec_res_list):
1356 |         # Store the list of embeddings.
1357 |         shared["all_embeds"] = exec_res_list
1358 |         print(f"Total embeddings: {len(exec_res_list)}")
1359 | 
1360 | class StoreIndex(Node):
1361 |     def prep(self, shared):
1362 |         # We'll read all embeds from shared.
1363 |         return shared["all_embeds"]
1364 | 
1365 |     def exec(self, all_embeds):
1366 |         # Create a vector index (faiss or other DB in real usage).
1367 |         index = create_index(all_embeds)
1368 |         return index
1369 | 
1370 |     def post(self, shared, prep_res, index):
1371 |         shared["index"] = index
1372 | 
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 | 
1378 | chunk_node >> embed_node >> store_node
1379 | 
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 | 
1383 | Usage example:
1384 | 
1385 | ```python
1386 | shared = {
1387 |     "files": ["doc1.txt", "doc2.txt"],  # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 | 
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 | 
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 | 
1400 | ```python
1401 | class EmbedQuery(Node):
1402 |     def prep(self, shared):
1403 |         return shared["question"]
1404 | 
1405 |     def exec(self, question):
1406 |         return get_embedding(question)
1407 | 
1408 |     def post(self, shared, prep_res, q_emb):
1409 |         shared["q_emb"] = q_emb
1410 | 
1411 | class RetrieveDocs(Node):
1412 |     def prep(self, shared):
1413 |         # We'll need the query embedding, plus the offline index/chunks
1414 |         return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 | 
1416 |     def exec(self, inputs):
1417 |         q_emb, index, chunks = inputs
1418 |         I, D = search_index(index, q_emb, top_k=1)
1419 |         best_id = I[0][0]
1420 |         relevant_chunk = chunks[best_id]
1421 |         return relevant_chunk
1422 | 
1423 |     def post(self, shared, prep_res, relevant_chunk):
1424 |         shared["retrieved_chunk"] = relevant_chunk
1425 |         print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 | 
1427 | class GenerateAnswer(Node):
1428 |     def prep(self, shared):
1429 |         return shared["question"], shared["retrieved_chunk"]
1430 | 
1431 |     def exec(self, inputs):
1432 |         question, chunk = inputs
1433 |         prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 |         return call_llm(prompt)
1435 | 
1436 |     def post(self, shared, prep_res, answer):
1437 |         shared["answer"] = answer
1438 |         print("Answer:", answer)
1439 | 
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 | 
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 | 
1448 | Usage example:
1449 | 
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 | 
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 | 
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 | 
1469 | # Structured Output
1470 | 
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 | 
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 | 
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 | 
1480 | ### Example Use Cases
1481 | 
1482 | - Extracting Key Information 
1483 | 
1484 | ```yaml
1485 | product:
1486 |   name: Widget Pro
1487 |   price: 199.99
1488 |   description: |
1489 |     A high-quality widget designed for professionals.
1490 |     Recommended for advanced users.
1491 | ```
1492 | 
1493 | - Summarizing Documents into Bullet Points
1494 | 
1495 | ```yaml
1496 | summary:
1497 |   - This product is easy to use.
1498 |   - It is cost-effective.
1499 |   - Suitable for all skill levels.
1500 | ```
1501 | 
1502 | - Generating Configuration Files
1503 | 
1504 | ```yaml
1505 | server:
1506 |   host: 127.0.0.1
1507 |   port: 8080
1508 |   ssl: true
1509 | ```
1510 | 
1511 | ## Prompt Engineering
1512 | 
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 | 
1517 | ### Example Text Summarization
1518 | 
1519 | ```python
1520 | class SummarizeNode(Node):
1521 |     def exec(self, prep_res):
1522 |         # Suppose `prep_res` is the text to summarize.
1523 |         prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 | 
1526 | {prep_res}
1527 | 
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 |   - bullet 1
1532 |   - bullet 2
1533 |   - bullet 3
1534 | ```"""
1535 |         response = call_llm(prompt)
1536 |         yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 | 
1538 |         import yaml
1539 |         structured_result = yaml.safe_load(yaml_str)
1540 | 
1541 |         assert "summary" in structured_result
1542 |         assert isinstance(structured_result["summary"], list)
1543 | 
1544 |         return structured_result
1545 | ```
1546 | 
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 | 
1550 | ### Why YAML instead of JSON?
1551 | 
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 | 
1554 | **In JSON**  
1555 | 
1556 | ```json
1557 | {
1558 |   "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 | 
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 | 
1565 | **In YAML**  
1566 | 
1567 | ```yaml
1568 | dialogue: |
1569 |   Alice said: "Hello Bob.
1570 |   How are you?
1571 |   I am good."
1572 | ```
1573 | 
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 | 
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 | 
1587 | # Workflow
1588 | 
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 | 
1591 | <div align="center">
1592 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/workflow.png?raw=true" width="400"/>
1593 | </div>
1594 | 
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | > 
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 | 
1601 | ### Example: Article Writing
1602 | 
1603 | ```python
1604 | class GenerateOutline(Node):
1605 |     def prep(self, shared): return shared["topic"]
1606 |     def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 |     def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 | 
1609 | class WriteSection(Node):
1610 |     def prep(self, shared): return shared["outline"]
1611 |     def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 |     def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 | 
1614 | class ReviewAndRefine(Node):
1615 |     def prep(self, shared): return shared["draft"]
1616 |     def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 |     def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 | 
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 | 
1624 | outline >> write >> review
1625 | 
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 | 
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 | 
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 | 
1644 | # LLM Wrappers
1645 | 
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 
1647 | Here, we provide some minimal example implementations:
1648 | 
1649 | 1. OpenAI
1650 |     ```python
1651 |     def call_llm(prompt):
1652 |         from openai import OpenAI
1653 |         client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 |         r = client.chat.completions.create(
1655 |             model="gpt-4o",
1656 |             messages=[{"role": "user", "content": prompt}]
1657 |         )
1658 |         return r.choices[0].message.content
1659 | 
1660 |     # Example usage
1661 |     call_llm("How are you?")
1662 |     ```
1663 |     > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 |     {: .best-practice }
1665 | 
1666 | 2. Claude (Anthropic)
1667 |     ```python
1668 |     def call_llm(prompt):
1669 |         from anthropic import Anthropic
1670 |         client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 |         r = client.messages.create(
1672 |             model="claude-sonnet-4-0",
1673 |             messages=[
1674 |                 {"role": "user", "content": prompt}
1675 |             ]
1676 |         )
1677 |         return r.content[0].text
1678 |     ```
1679 | 
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 |     ```python
1682 |     def call_llm(prompt):
1683 |     from google import genai
1684 |     client = genai.Client(api_key='GEMINI_API_KEY')
1685 |         response = client.models.generate_content(
1686 |         model='gemini-2.5-pro',
1687 |         contents=prompt
1688 |     )
1689 |     return response.text
1690 |     ```
1691 | 
1692 | 4. Azure (Azure OpenAI)
1693 |     ```python
1694 |     def call_llm(prompt):
1695 |         from openai import AzureOpenAI
1696 |         client = AzureOpenAI(
1697 |             azure_endpoint="https://<YOUR_RESOURCE_NAME>.openai.azure.com/",
1698 |             api_key="YOUR_API_KEY_HERE",
1699 |             api_version="2023-05-15"
1700 |         )
1701 |         r = client.chat.completions.create(
1702 |             model="<YOUR_DEPLOYMENT_NAME>",
1703 |             messages=[{"role": "user", "content": prompt}]
1704 |         )
1705 |         return r.choices[0].message.content
1706 |     ```
1707 | 
1708 | 5. Ollama (Local LLM)
1709 |     ```python
1710 |     def call_llm(prompt):
1711 |         from ollama import chat
1712 |         response = chat(
1713 |             model="llama2",
1714 |             messages=[{"role": "user", "content": prompt}]
1715 |         )
1716 |         return response.message.content
1717 |     ```
1718 | 
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 | 
1722 | - Handle chat history:
1723 | 
1724 | ```python
1725 | def call_llm(messages):
1726 |     from openai import OpenAI
1727 |     client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 |     r = client.chat.completions.create(
1729 |         model="gpt-4o",
1730 |         messages=messages
1731 |     )
1732 |     return r.choices[0].message.content
1733 | ```
1734 | 
1735 | - Add in-memory caching 
1736 | 
1737 | ```python
1738 | from functools import lru_cache
1739 | 
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 |     # Your implementation here
1743 |     pass
1744 | ```
1745 | 
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 | 
1751 | 
1752 | ```python
1753 | from functools import lru_cache
1754 | 
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 |     pass
1758 | 
1759 | def call_llm(prompt, use_cache):
1760 |     if use_cache:
1761 |         return cached_call(prompt)
1762 |     # Call the underlying function directly
1763 |     return cached_call.__wrapped__(prompt)
1764 | 
1765 | class SummarizeNode(Node):
1766 |     def exec(self, text):
1767 |         return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 | 
1770 | - Enable logging:
1771 | 
1772 | ```python
1773 | def call_llm(prompt):
1774 |     import logging
1775 |     logging.info(f"Prompt: {prompt}")
1776 |     response = ... # Your implementation here
1777 |     logging.info(f"Response: {response}")
1778 |     return response
1779 | ```


--------------------------------------------------------------------------------
/.cursorrules:
--------------------------------------------------------------------------------
   1 | ---
   2 | layout: default
   3 | title: "Agentic Coding"
   4 | ---
   5 | 
   6 | # Agentic Coding: Humans Design, Agents code!
   7 | 
   8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
   9 | {: .warning }
  10 | 
  11 | ## Agentic Coding Steps
  12 | 
  13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
  14 | 
  15 | | Steps                  | Human      | AI        | Comment                                                                 |
  16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
  17 | | 1. Requirements | ★★★ High  | ★☆☆ Low   | Humans understand the requirements and context.                    |
  18 | | 2. Flow          | ★★☆ Medium | ★★☆ Medium |  Humans specify the high-level design, and the AI fills in the details. |
  19 | | 3. Utilities   | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
  20 | | 4. Data          | ★☆☆ Low    | ★★★ High   | AI designs the data schema, and humans verify.                            |
  21 | | 5. Node          | ★☆☆ Low   | ★★★ High  | The AI helps design the node based on the flow.          |
  22 | | 6. Implementation      | ★☆☆ Low   | ★★★ High  |  The AI implements the flow based on the design. |
  23 | | 7. Optimization        | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
  24 | | 8. Reliability         | ★☆☆ Low   | ★★★ High  |  The AI writes test cases and addresses corner cases.     |
  25 | 
  26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 
  27 |     - Understand AI systems' strengths and limitations:
  28 |       - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
  29 |       - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
  30 |       - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
  31 |     - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
  32 |     - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
  33 | 
  34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
  35 |     - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
  36 |       - For each node in the flow, start with a high-level one-line description of what it does.
  37 |       - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
  38 |       - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
  39 |       - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
  40 |     - Outline the flow and draw it in a mermaid diagram. For example:
  41 |       ```mermaid
  42 |       flowchart LR
  43 |           start[Start] --> batch[Batch]
  44 |           batch --> check[Check]
  45 |           check -->|OK| process
  46 |           check -->|Error| fix[Fix]
  47 |           fix --> check
  48 |           
  49 |           subgraph process[Process]
  50 |             step1[Step 1] --> step2[Step 2]
  51 |           end
  52 |           
  53 |           process --> endNode[End]
  54 |       ```
  55 |     - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.  
  56 |       {: .best-practice }
  57 | 
  58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
  59 |     - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
  60 |         <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/utility.png?raw=true" width="400"/></div>
  61 | 
  62 |         - Reading inputs (e.g., retrieving Slack messages, reading emails)
  63 |         - Writing outputs (e.g., generating reports, sending emails)
  64 |         - Using external tools (e.g., calling LLMs, searching the web)
  65 |         - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
  66 |     - For each utility function, implement it and write a simple test.
  67 |     - Document their input/output, as well as why they are necessary. For example:
  68 |       - `name`: `get_embedding` (`utils/get_embedding.py`)
  69 |       - `input`: `str`
  70 |       - `output`: a vector of 3072 floats
  71 |       - `necessity`: Used by the second node to embed text
  72 |     - Example utility implementation:
  73 |       ```python
  74 |       # utils/call_llm.py
  75 |       from openai import OpenAI
  76 | 
  77 |       def call_llm(prompt):    
  78 |           client = OpenAI(api_key="YOUR_API_KEY_HERE")
  79 |           r = client.chat.completions.create(
  80 |               model="gpt-4o",
  81 |               messages=[{"role": "user", "content": prompt}]
  82 |           )
  83 |           return r.choices[0].message.content
  84 |           
  85 |       if __name__ == "__main__":
  86 |           prompt = "What is the meaning of life?"
  87 |           print(call_llm(prompt))
  88 |       ```
  89 |     - > **Sometimes, design Utilities before Flow:**  For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
  90 |       {: .best-practice }
  91 |     - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
  92 |       {: .warning }
  93 | 
  94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
  95 |    - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
  96 |       - For simple systems, use an in-memory dictionary.
  97 |       - For more complex systems or when persistence is required, use a database.
  98 |       - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
  99 |       - Example shared store design:
 100 |         ```python
 101 |         shared = {
 102 |             "user": {
 103 |                 "id": "user123",
 104 |                 "context": {                # Another nested dict
 105 |                     "weather": {"temp": 72, "condition": "sunny"},
 106 |                     "location": "San Francisco"
 107 |                 }
 108 |             },
 109 |             "results": {}                   # Empty dict to store outputs
 110 |         }
 111 |         ```
 112 | 
 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
 114 |    - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
 115 |      - `type`: Regular (or Batch, or Async)
 116 |      - `prep`: Read "text" from the shared store
 117 |      - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
 118 |      - `post`: Write "embedding" to the shared store
 119 | 
 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
 121 |    - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
 122 |    - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
 123 |    - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
 124 |    - Add logging throughout the code to facilitate debugging.
 125 | 
 126 | 7. **Optimization**:
 127 |    - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
 128 |    - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
 129 |    - If your flow design is already solid, move on to micro-optimizations:
 130 |      - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
 131 |      - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
 132 | 
 133 |    - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
 134 |      >
 135 |      > <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/success.png?raw=true" width="400"/></div>
 136 |      {: .best-practice }
 137 | 
 138 | 8. **Reliability**  
 139 |    - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
 140 |    - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
 141 |    - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
 142 | 
 143 | ## Example LLM Project File Structure
 144 | 
 145 | ```
 146 | my_project/
 147 | ├── main.py
 148 | ├── nodes.py
 149 | ├── flow.py
 150 | ├── utils/
 151 | │   ├── __init__.py
 152 | │   ├── call_llm.py
 153 | │   └── search_web.py
 154 | ├── requirements.txt
 155 | └── docs/
 156 |     └── design.md
 157 | ```
 158 | 
 159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
 160 |   ```
 161 |   PyYAML
 162 |   pocketflow
 163 |   ```
 164 | 
 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
 166 |   ~~~
 167 |   # Design Doc: Your Project Name
 168 | 
 169 |   > Please DON'T remove notes for AI
 170 | 
 171 |   ## Requirements
 172 | 
 173 |   > Notes for AI: Keep it simple and clear.
 174 |   > If the requirements are abstract, write concrete user stories
 175 | 
 176 | 
 177 |   ## Flow Design
 178 | 
 179 |   > Notes for AI:
 180 |   > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
 181 |   > 2. Present a concise, high-level description of the workflow.
 182 | 
 183 |   ### Applicable Design Pattern:
 184 | 
 185 |   1. Map the file summary into chunks, then reduce these chunks into a final summary.
 186 |   2. Agentic file finder
 187 |     - *Context*: The entire summary of the file
 188 |     - *Action*: Find the file
 189 | 
 190 |   ### Flow high-level Design:
 191 | 
 192 |   1. **First Node**: This node is for ...
 193 |   2. **Second Node**: This node is for ...
 194 |   3. **Third Node**: This node is for ...
 195 | 
 196 |   ```mermaid
 197 |   flowchart TD
 198 |       firstNode[First Node] --> secondNode[Second Node]
 199 |       secondNode --> thirdNode[Third Node]
 200 |   ```
 201 |   ## Utility Functions
 202 | 
 203 |   > Notes for AI:
 204 |   > 1. Understand the utility function definition thoroughly by reviewing the doc.
 205 |   > 2. Include only the necessary utility functions, based on nodes in the flow.
 206 | 
 207 |   1. **Call LLM** (`utils/call_llm.py`)
 208 |     - *Input*: prompt (str)
 209 |     - *Output*: response (str)
 210 |     - Generally used by most nodes for LLM tasks
 211 | 
 212 |   2. **Embedding** (`utils/get_embedding.py`)
 213 |     - *Input*: str
 214 |     - *Output*: a vector of 3072 floats
 215 |     - Used by the second node to embed text
 216 | 
 217 |   ## Node Design
 218 | 
 219 |   ### Shared Store
 220 | 
 221 |   > Notes for AI: Try to minimize data redundancy
 222 | 
 223 |   The shared store structure is organized as follows:
 224 | 
 225 |   ```python
 226 |   shared = {
 227 |       "key": "value"
 228 |   }
 229 |   ```
 230 | 
 231 |   ### Node Steps
 232 | 
 233 |   > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
 234 | 
 235 |   1. First Node
 236 |     - *Purpose*: Provide a short explanation of the node’s function
 237 |     - *Type*: Decide between Regular, Batch, or Async
 238 |     - *Steps*:
 239 |       - *prep*: Read "key" from the shared store
 240 |       - *exec*: Call the utility function
 241 |       - *post*: Write "key" to the shared store
 242 | 
 243 |   2. Second Node
 244 |     ...
 245 |   ~~~
 246 | 
 247 | 
 248 | - **`utils/`**: Contains all utility functions.
 249 |   - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
 250 |   - Each file should also include a `main()` function to try that API call
 251 |   ```python
 252 |   from google import genai
 253 |   import os
 254 | 
 255 |   def call_llm(prompt: str) -> str:
 256 |       client = genai.Client(
 257 |           api_key=os.getenv("GEMINI_API_KEY", ""),
 258 |       )
 259 |       model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
 260 |       response = client.models.generate_content(model=model, contents=[prompt])
 261 |       return response.text
 262 | 
 263 |   if __name__ == "__main__":
 264 |       test_prompt = "Hello, how are you?"
 265 | 
 266 |       # First call - should hit the API
 267 |       print("Making call...")
 268 |       response1 = call_llm(test_prompt, use_cache=False)
 269 |       print(f"Response: {response1}")
 270 |   ```
 271 | 
 272 | - **`nodes.py`**: Contains all the node definitions.
 273 |   ```python
 274 |   # nodes.py
 275 |   from pocketflow import Node
 276 |   from utils.call_llm import call_llm
 277 | 
 278 |   class GetQuestionNode(Node):
 279 |       def exec(self, _):
 280 |           # Get question directly from user input
 281 |           user_question = input("Enter your question: ")
 282 |           return user_question
 283 |       
 284 |       def post(self, shared, prep_res, exec_res):
 285 |           # Store the user's question
 286 |           shared["question"] = exec_res
 287 |           return "default"  # Go to the next node
 288 | 
 289 |   class AnswerNode(Node):
 290 |       def prep(self, shared):
 291 |           # Read question from shared
 292 |           return shared["question"]
 293 |       
 294 |       def exec(self, question):
 295 |           # Call LLM to get the answer
 296 |           return call_llm(question)
 297 |       
 298 |       def post(self, shared, prep_res, exec_res):
 299 |           # Store the answer in shared
 300 |           shared["answer"] = exec_res
 301 |   ```
 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
 303 |   ```python
 304 |   # flow.py
 305 |   from pocketflow import Flow
 306 |   from nodes import GetQuestionNode, AnswerNode
 307 | 
 308 |   def create_qa_flow():
 309 |       """Create and return a question-answering flow."""
 310 |       # Create nodes
 311 |       get_question_node = GetQuestionNode()
 312 |       answer_node = AnswerNode()
 313 |       
 314 |       # Connect nodes in sequence
 315 |       get_question_node >> answer_node
 316 |       
 317 |       # Create flow starting with input node
 318 |       return Flow(start=get_question_node)
 319 |   ```
 320 | - **`main.py`**: Serves as the project's entry point.
 321 |   ```python
 322 |   # main.py
 323 |   from flow import create_qa_flow
 324 | 
 325 |   # Example main function
 326 |   # Please replace this with your own main function
 327 |   def main():
 328 |       shared = {
 329 |           "question": None,  # Will be populated by GetQuestionNode from user input
 330 |           "answer": None     # Will be populated by AnswerNode
 331 |       }
 332 | 
 333 |       # Create the flow and run it
 334 |       qa_flow = create_qa_flow()
 335 |       qa_flow.run(shared)
 336 |       print(f"Question: {shared['question']}")
 337 |       print(f"Answer: {shared['answer']}")
 338 | 
 339 |   if __name__ == "__main__":
 340 |       main()
 341 |   ```
 342 | 
 343 | ================================================
 344 | File: docs/index.md
 345 | ================================================
 346 | ---
 347 | layout: default
 348 | title: "Home"
 349 | nav_order: 1
 350 | ---
 351 | 
 352 | # Pocket Flow
 353 | 
 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
 355 | 
 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.  
 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
 359 | 
 360 | <div align="center">
 361 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/meme.jpg?raw=true" width="400"/>
 362 | </div>
 363 | 
 364 | ## Core Abstraction
 365 | 
 366 | We model the LLM workflow as a **Graph + Shared Store**:
 367 | 
 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
 374 | 
 375 | <div align="center">
 376 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/abstraction.png" width="500"/>
 377 | </div>
 378 | 
 379 | ## Design Pattern
 380 | 
 381 | From there, it’s easy to implement popular design patterns:
 382 | 
 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
 389 | 
 390 | <div align="center">
 391 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/design.png" width="500"/>
 392 | </div>
 393 | 
 394 | ## Utility Function
 395 | 
 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
 397 | 
 398 | - [LLM Wrapper](./utility_function/llm.md)
 399 | - [Viz and Debug](./utility_function/viz.md)
 400 | - [Web Search](./utility_function/websearch.md)
 401 | - [Chunking](./utility_function/chunking.md)
 402 | - [Embedding](./utility_function/embedding.md)
 403 | - [Vector Databases](./utility_function/vector.md)
 404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
 405 | 
 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
 410 | 
 411 | ## Ready to build your Apps? 
 412 | 
 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
 414 | 
 415 | ================================================
 416 | File: docs/core_abstraction/async.md
 417 | ================================================
 418 | ---
 419 | layout: default
 420 | title: "(Advanced) Async"
 421 | parent: "Core Abstraction"
 422 | nav_order: 5
 423 | ---
 424 | 
 425 | # (Advanced) Async
 426 | 
 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
 428 | 
 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
 430 | 2. **exec_async()**: Typically used for async LLM calls.
 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
 432 | 
 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
 434 | 
 435 | ### Example
 436 | 
 437 | ```python
 438 | class SummarizeThenVerify(AsyncNode):
 439 |     async def prep_async(self, shared):
 440 |         # Example: read a file asynchronously
 441 |         doc_text = await read_file_async(shared["doc_path"])
 442 |         return doc_text
 443 | 
 444 |     async def exec_async(self, prep_res):
 445 |         # Example: async LLM call
 446 |         summary = await call_llm_async(f"Summarize: {prep_res}")
 447 |         return summary
 448 | 
 449 |     async def post_async(self, shared, prep_res, exec_res):
 450 |         # Example: wait for user feedback
 451 |         decision = await gather_user_feedback(exec_res)
 452 |         if decision == "approve":
 453 |             shared["summary"] = exec_res
 454 |             return "approve"
 455 |         return "deny"
 456 | 
 457 | summarize_node = SummarizeThenVerify()
 458 | final_node = Finalize()
 459 | 
 460 | # Define transitions
 461 | summarize_node - "approve" >> final_node
 462 | summarize_node - "deny"    >> summarize_node  # retry
 463 | 
 464 | flow = AsyncFlow(start=summarize_node)
 465 | 
 466 | async def main():
 467 |     shared = {"doc_path": "document.txt"}
 468 |     await flow.run_async(shared)
 469 |     print("Final Summary:", shared.get("summary"))
 470 | 
 471 | asyncio.run(main())
 472 | ```
 473 | 
 474 | ================================================
 475 | File: docs/core_abstraction/batch.md
 476 | ================================================
 477 | ---
 478 | layout: default
 479 | title: "Batch"
 480 | parent: "Core Abstraction"
 481 | nav_order: 4
 482 | ---
 483 | 
 484 | # Batch
 485 | 
 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
 487 | - **Chunk-based** processing (e.g., splitting large texts).
 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
 489 | 
 490 | ## 1. BatchNode
 491 | 
 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
 493 | 
 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
 495 | - **`exec(item)`**: called **once** per item in that iterable.
 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
 497 | 
 498 | 
 499 | ### Example: Summarize a Large File
 500 | 
 501 | ```python
 502 | class MapSummaries(BatchNode):
 503 |     def prep(self, shared):
 504 |         # Suppose we have a big file; chunk it
 505 |         content = shared["data"]
 506 |         chunk_size = 10000
 507 |         chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
 508 |         return chunks
 509 | 
 510 |     def exec(self, chunk):
 511 |         prompt = f"Summarize this chunk in 10 words: {chunk}"
 512 |         summary = call_llm(prompt)
 513 |         return summary
 514 | 
 515 |     def post(self, shared, prep_res, exec_res_list):
 516 |         combined = "\n".join(exec_res_list)
 517 |         shared["summary"] = combined
 518 |         return "default"
 519 | 
 520 | map_summaries = MapSummaries()
 521 | flow = Flow(start=map_summaries)
 522 | flow.run(shared)
 523 | ```
 524 | 
 525 | ---
 526 | 
 527 | ## 2. BatchFlow
 528 | 
 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
 530 | 
 531 | ### Example: Summarize Many Files
 532 | 
 533 | ```python
 534 | class SummarizeAllFiles(BatchFlow):
 535 |     def prep(self, shared):
 536 |         # Return a list of param dicts (one per file)
 537 |         filenames = list(shared["data"].keys())  # e.g., ["file1.txt", "file2.txt", ...]
 538 |         return [{"filename": fn} for fn in filenames]
 539 | 
 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
 541 | summarize_file = SummarizeFile(start=load_file)
 542 | 
 543 | # Wrap that flow into a BatchFlow:
 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
 545 | summarize_all_files.run(shared)
 546 | ```
 547 | 
 548 | ### Under the Hood
 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
 550 | 2. The **BatchFlow** loops through each dict. For each one:
 551 |    - It merges the dict with the BatchFlow’s own `params`.
 552 |    - It calls `flow.run(shared)` using the merged result.
 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
 554 | 
 555 | ---
 556 | 
 557 | ## 3. Nested or Multi-Level Batches
 558 | 
 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
 561 | - **Inner** batch: returning a list of per-file param dicts.
 562 | 
 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
 564 | 
 565 | ```python
 566 | 
 567 | class FileBatchFlow(BatchFlow):
 568 |     def prep(self, shared):
 569 |         directory = self.params["directory"]
 570 |         # e.g., files = ["file1.txt", "file2.txt", ...]
 571 |         files = [f for f in os.listdir(directory) if f.endswith(".txt")]
 572 |         return [{"filename": f} for f in files]
 573 | 
 574 | class DirectoryBatchFlow(BatchFlow):
 575 |     def prep(self, shared):
 576 |         directories = [ "/path/to/dirA", "/path/to/dirB"]
 577 |         return [{"directory": d} for d in directories]
 578 | 
 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
 580 | inner_flow = FileBatchFlow(start=MapSummaries())
 581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
 582 | ```
 583 | 
 584 | ================================================
 585 | File: docs/core_abstraction/communication.md
 586 | ================================================
 587 | ---
 588 | layout: default
 589 | title: "Communication"
 590 | parent: "Core Abstraction"
 591 | nav_order: 3
 592 | ---
 593 | 
 594 | # Communication
 595 | 
 596 | Nodes and Flows **communicate** in 2 ways:
 597 | 
 598 | 1. **Shared Store (for almost all the cases)** 
 599 | 
 600 |    - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).  
 601 |    - Great for data results, large content, or anything multiple nodes need.
 602 |    - You shall design the data structure and populate it ahead.
 603 |      
 604 |    - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*!  This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
 605 |      {: .best-practice }
 606 | 
 607 | 2. **Params (only for [Batch](./batch.md))** 
 608 |    - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
 609 |    - Good for identifiers like filenames or numeric IDs, in Batch mode.
 610 | 
 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
 612 | 
 613 | ---
 614 | 
 615 | ## 1. Shared Store
 616 | 
 617 | ### Overview
 618 | 
 619 | A shared store is typically an in-mem dictionary, like:
 620 | ```python
 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
 622 | ```
 623 | 
 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
 625 | 
 626 | ### Example
 627 | 
 628 | ```python
 629 | class LoadData(Node):
 630 |     def post(self, shared, prep_res, exec_res):
 631 |         # We write data to shared store
 632 |         shared["data"] = "Some text content"
 633 |         return None
 634 | 
 635 | class Summarize(Node):
 636 |     def prep(self, shared):
 637 |         # We read data from shared store
 638 |         return shared["data"]
 639 | 
 640 |     def exec(self, prep_res):
 641 |         # Call LLM to summarize
 642 |         prompt = f"Summarize: {prep_res}"
 643 |         summary = call_llm(prompt)
 644 |         return summary
 645 | 
 646 |     def post(self, shared, prep_res, exec_res):
 647 |         # We write summary to shared store
 648 |         shared["summary"] = exec_res
 649 |         return "default"
 650 | 
 651 | load_data = LoadData()
 652 | summarize = Summarize()
 653 | load_data >> summarize
 654 | flow = Flow(start=load_data)
 655 | 
 656 | shared = {}
 657 | flow.run(shared)
 658 | ```
 659 | 
 660 | Here:
 661 | - `LoadData` writes to `shared["data"]`.
 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
 663 | 
 664 | ---
 665 | 
 666 | ## 2. Params
 667 | 
 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
 670 | - **Set** via `set_params()`.
 671 | - **Cleared** and updated each time a parent Flow calls it.
 672 | 
 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 
 674 | > 
 675 | > If you need to set child node params, see [Batch](./batch.md).
 676 | {: .warning }
 677 | 
 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
 679 | 
 680 | ### Example
 681 | 
 682 | ```python
 683 | # 1) Create a Node that uses params
 684 | class SummarizeFile(Node):
 685 |     def prep(self, shared):
 686 |         # Access the node's param
 687 |         filename = self.params["filename"]
 688 |         return shared["data"].get(filename, "")
 689 | 
 690 |     def exec(self, prep_res):
 691 |         prompt = f"Summarize: {prep_res}"
 692 |         return call_llm(prompt)
 693 | 
 694 |     def post(self, shared, prep_res, exec_res):
 695 |         filename = self.params["filename"]
 696 |         shared["summary"][filename] = exec_res
 697 |         return "default"
 698 | 
 699 | # 2) Set params
 700 | node = SummarizeFile()
 701 | 
 702 | # 3) Set Node params directly (for testing)
 703 | node.set_params({"filename": "doc1.txt"})
 704 | node.run(shared)
 705 | 
 706 | # 4) Create Flow
 707 | flow = Flow(start=node)
 708 | 
 709 | # 5) Set Flow params (overwrites node params)
 710 | flow.set_params({"filename": "doc2.txt"})
 711 | flow.run(shared)  # The node summarizes doc2, not doc1
 712 | ```
 713 | 
 714 | ================================================
 715 | File: docs/core_abstraction/flow.md
 716 | ================================================
 717 | ---
 718 | layout: default
 719 | title: "Flow"
 720 | parent: "Core Abstraction"
 721 | nav_order: 2
 722 | ---
 723 | 
 724 | # Flow
 725 | 
 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
 727 | 
 728 | ## 1. Action-based Transitions
 729 | 
 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
 731 | 
 732 | You define transitions with the syntax:
 733 | 
 734 | 1. **Basic default transition**: `node_a >> node_b`
 735 |   This means if `node_a.post()` returns `"default"`, go to `node_b`. 
 736 |   (Equivalent to `node_a - "default" >> node_b`)
 737 | 
 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
 739 |   This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
 740 | 
 741 | It's possible to create loops, branching, or multi-step flows.
 742 | 
 743 | ## 2. Creating a Flow
 744 | 
 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
 746 | 
 747 | ### Example: Simple Sequence
 748 | 
 749 | Here's a minimal flow of two nodes in a chain:
 750 | 
 751 | ```python
 752 | node_a >> node_b
 753 | flow = Flow(start=node_a)
 754 | flow.run(shared)
 755 | ```
 756 | 
 757 | - When you run the flow, it executes `node_a`.  
 758 | - Suppose `node_a.post()` returns `"default"`.  
 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.  
 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
 761 | 
 762 | ### Example: Branching & Looping
 763 | 
 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
 765 | 
 766 | - `"approved"`: expense is approved, move to payment processing
 767 | - `"needs_revision"`: expense needs changes, send back for revision 
 768 | - `"rejected"`: expense is denied, finish the process
 769 | 
 770 | We can wire them like this:
 771 | 
 772 | ```python
 773 | # Define the flow connections
 774 | review - "approved" >> payment        # If approved, process payment
 775 | review - "needs_revision" >> revise   # If needs changes, go to revision
 776 | review - "rejected" >> finish         # If rejected, finish the process
 777 | 
 778 | revise >> review   # After revision, go back for another review
 779 | payment >> finish  # After payment, finish the process
 780 | 
 781 | flow = Flow(start=review)
 782 | ```
 783 | 
 784 | Let's see how it flows:
 785 | 
 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
 789 | 
 790 | ```mermaid
 791 | flowchart TD
 792 |     review[Review Expense] -->|approved| payment[Process Payment]
 793 |     review -->|needs_revision| revise[Revise Report]
 794 |     review -->|rejected| finish[Finish Process]
 795 | 
 796 |     revise --> review
 797 |     payment --> finish
 798 | ```
 799 | 
 800 | ### Running Individual Nodes vs. Running a Flow
 801 | 
 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 
 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
 804 | 
 805 | > `node.run(shared)` **does not** proceed to the successor.
 806 | > This is mainly for debugging or testing a single node.
 807 | > 
 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
 809 | {: .warning }
 810 | 
 811 | ## 3. Nested Flows
 812 | 
 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
 814 | 
 815 | 1. Use a Flow as a Node within another Flow's transitions.  
 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.  
 817 | 3. Node `params` will be a merging of **all** parents' `params`.
 818 | 
 819 | ### Flow's Node Methods
 820 | 
 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
 822 | 
 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
 825 | 
 826 | ### Basic Flow Nesting
 827 | 
 828 | Here's how to connect a flow to another node:
 829 | 
 830 | ```python
 831 | # Create a sub-flow
 832 | node_a >> node_b
 833 | subflow = Flow(start=node_a)
 834 | 
 835 | # Connect it to another node
 836 | subflow >> node_c
 837 | 
 838 | # Create the parent flow
 839 | parent_flow = Flow(start=subflow)
 840 | ```
 841 | 
 842 | When `parent_flow.run()` executes:
 843 | 1. It starts `subflow`
 844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
 845 | 3. After `subflow` completes, execution continues to `node_c`
 846 | 
 847 | ### Example: Order Processing Pipeline
 848 | 
 849 | Here's a practical example that breaks down order processing into nested flows:
 850 | 
 851 | ```python
 852 | # Payment processing sub-flow
 853 | validate_payment >> process_payment >> payment_confirmation
 854 | payment_flow = Flow(start=validate_payment)
 855 | 
 856 | # Inventory sub-flow
 857 | check_stock >> reserve_items >> update_inventory
 858 | inventory_flow = Flow(start=check_stock)
 859 | 
 860 | # Shipping sub-flow
 861 | create_label >> assign_carrier >> schedule_pickup
 862 | shipping_flow = Flow(start=create_label)
 863 | 
 864 | # Connect the flows into a main order pipeline
 865 | payment_flow >> inventory_flow >> shipping_flow
 866 | 
 867 | # Create the master flow
 868 | order_pipeline = Flow(start=payment_flow)
 869 | 
 870 | # Run the entire pipeline
 871 | order_pipeline.run(shared_data)
 872 | ```
 873 | 
 874 | This creates a clean separation of concerns while maintaining a clear execution path:
 875 | 
 876 | ```mermaid
 877 | flowchart LR
 878 |     subgraph order_pipeline[Order Pipeline]
 879 |         subgraph paymentFlow["Payment Flow"]
 880 |             A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
 881 |         end
 882 | 
 883 |         subgraph inventoryFlow["Inventory Flow"]
 884 |             D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
 885 |         end
 886 | 
 887 |         subgraph shippingFlow["Shipping Flow"]
 888 |             G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
 889 |         end
 890 | 
 891 |         paymentFlow --> inventoryFlow
 892 |         inventoryFlow --> shippingFlow
 893 |     end
 894 | ```
 895 | 
 896 | ================================================
 897 | File: docs/core_abstraction/node.md
 898 | ================================================
 899 | ---
 900 | layout: default
 901 | title: "Node"
 902 | parent: "Core Abstraction"
 903 | nav_order: 1
 904 | ---
 905 | 
 906 | # Node
 907 | 
 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
 909 | 
 910 | <div align="center">
 911 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/node.png?raw=true" width="400"/>
 912 | </div>
 913 | 
 914 | 1. `prep(shared)`
 915 |    - **Read and preprocess data** from `shared` store. 
 916 |    - Examples: *query DB, read files, or serialize data into a string*.
 917 |    - Return `prep_res`, which is used by `exec()` and `post()`.
 918 | 
 919 | 2. `exec(prep_res)`
 920 |    - **Execute compute logic**, with optional retries and error handling (below).
 921 |    - Examples: *(mostly) LLM calls, remote APIs, tool use*.
 922 |    - ⚠️ This shall be only for compute and **NOT** access `shared`.
 923 |    - ⚠️ If retries enabled, ensure idempotent implementation.
 924 |    - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
 925 |    - Return `exec_res`, which is passed to `post()`.
 926 | 
 927 | 3. `post(shared, prep_res, exec_res)`
 928 |    - **Postprocess and write data** back to `shared`.
 929 |    - Examples: *update DB, change states, log results*.
 930 |    - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
 931 | 
 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
 933 | >
 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
 935 | {: .note }
 936 | 
 937 | ### Fault Tolerance & Retries
 938 | 
 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
 940 | 
 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 
 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
 944 | 
 945 | ```python 
 946 | my_node = SummarizeFile(max_retries=3, wait=10)
 947 | ```
 948 | 
 949 | When an exception occurs in `exec()`, the Node automatically retries until:
 950 | 
 951 | - It either succeeds, or
 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
 953 | 
 954 | You can get the current retry times (0-based) from `self.cur_retry`.
 955 | 
 956 | ```python 
 957 | class RetryNode(Node):
 958 |     def exec(self, prep_res):
 959 |         print(f"Retry {self.cur_retry} times")
 960 |         raise Exception("Failed")
 961 | ```
 962 | 
 963 | ### Graceful Fallback
 964 | 
 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
 966 | 
 967 | ```python 
 968 | def exec_fallback(self, prep_res, exc):
 969 |     raise exc
 970 | ```
 971 | 
 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
 973 | 
 974 | ### Example: Summarize file
 975 | 
 976 | ```python 
 977 | class SummarizeFile(Node):
 978 |     def prep(self, shared):
 979 |         return shared["data"]
 980 | 
 981 |     def exec(self, prep_res):
 982 |         if not prep_res:
 983 |             return "Empty file content"
 984 |         prompt = f"Summarize this text in 10 words: {prep_res}"
 985 |         summary = call_llm(prompt)  # might fail
 986 |         return summary
 987 | 
 988 |     def exec_fallback(self, prep_res, exc):
 989 |         # Provide a simple fallback instead of crashing
 990 |         return "There was an error processing your request."
 991 | 
 992 |     def post(self, shared, prep_res, exec_res):
 993 |         shared["summary"] = exec_res
 994 |         # Return "default" by not returning
 995 | 
 996 | summarize_node = SummarizeFile(max_retries=3)
 997 | 
 998 | # node.run() calls prep->exec->post
 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 | 
1002 | print("Action returned:", action_result)  # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 | 
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 | 
1016 | # (Advanced) Parallel
1017 | 
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows  **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 
1019 | 
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 | 
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | > 
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | > 
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 | 
1030 | ## AsyncParallelBatchNode
1031 | 
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 | 
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 |     async def prep_async(self, shared):
1037 |         # e.g., multiple texts
1038 |         return shared["texts"]
1039 | 
1040 |     async def exec_async(self, text):
1041 |         prompt = f"Summarize: {text}"
1042 |         return await call_llm_async(prompt)
1043 | 
1044 |     async def post_async(self, shared, prep_res, exec_res_list):
1045 |         shared["summary"] = "\n\n".join(exec_res_list)
1046 |         return "default"
1047 | 
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 | 
1052 | ## AsyncParallelBatchFlow
1053 | 
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 | 
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 |     async def prep_async(self, shared):
1059 |         return [{"filename": f} for f in shared["files"]]
1060 | 
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 | 
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 | 
1076 | # Agent
1077 | 
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 | 
1080 | <div align="center">
1081 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/agent.png?raw=true" width="350"/>
1082 | </div>
1083 | 
1084 | ## Implement Agent with Graph
1085 | 
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.  
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 | 
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 | 
1097 | ### ACTION SPACE
1098 | [1] search
1099 |   Description: Use web search to get results
1100 |   Parameters:
1101 |     - query (str): What to search for
1102 | 
1103 | [2] answer
1104 |   Description: Conclude based on the results
1105 |   Parameters:
1106 |     - result (str): Final answer to provide
1107 | 
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 | 
1112 | ```yaml
1113 | thinking: |
1114 |     <your step-by-step reasoning process>
1115 | action: <action_name>
1116 | parameters:
1117 |     <parameter_name>: <parameter_value>
1118 | ```"""
1119 | ```
1120 | 
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 | 
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 | 
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or  `read_csvs`. Instead, import CSVs into the database.
1126 | 
1127 | ## Example Good Action Design
1128 | 
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 | 
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 | 
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 | 
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 | 
1137 | ## Example: Search Agent
1138 | 
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 | 
1144 | ```python
1145 | class DecideAction(Node):
1146 |     def prep(self, shared):
1147 |         context = shared.get("context", "No previous search")
1148 |         query = shared["query"]
1149 |         return query, context
1150 |         
1151 |     def exec(self, inputs):
1152 |         query, context = inputs
1153 |         prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 |         resp = call_llm(prompt)
1164 |         yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 |         result = yaml.safe_load(yaml_str)
1166 |         
1167 |         assert isinstance(result, dict)
1168 |         assert "action" in result
1169 |         assert "reason" in result
1170 |         assert result["action"] in ["search", "answer"]
1171 |         if result["action"] == "search":
1172 |             assert "search_term" in result
1173 |         
1174 |         return result
1175 | 
1176 |     def post(self, shared, prep_res, exec_res):
1177 |         if exec_res["action"] == "search":
1178 |             shared["search_term"] = exec_res["search_term"]
1179 |         return exec_res["action"]
1180 | 
1181 | class SearchWeb(Node):
1182 |     def prep(self, shared):
1183 |         return shared["search_term"]
1184 |         
1185 |     def exec(self, search_term):
1186 |         return search_web(search_term)
1187 |     
1188 |     def post(self, shared, prep_res, exec_res):
1189 |         prev_searches = shared.get("context", [])
1190 |         shared["context"] = prev_searches + [
1191 |             {"term": shared["search_term"], "result": exec_res}
1192 |         ]
1193 |         return "decide"
1194 |         
1195 | class DirectAnswer(Node):
1196 |     def prep(self, shared):
1197 |         return shared["query"], shared.get("context", "")
1198 |         
1199 |     def exec(self, inputs):
1200 |         query, context = inputs
1201 |         return call_llm(f"Context: {context}\nAnswer: {query}")
1202 | 
1203 |     def post(self, shared, prep_res, exec_res):
1204 |        print(f"Answer: {exec_res}")
1205 |        shared["answer"] = exec_res
1206 | 
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 | 
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide  # Loop back
1215 | 
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 | 
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 | 
1230 | # Map Reduce
1231 | 
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 | 
1236 | and there is a logical way to break the task into smaller, ideally independent parts. 
1237 | 
1238 | <div align="center">
1239 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/mapreduce.png?raw=true" width="400"/>
1240 | </div>
1241 | 
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 | 
1244 | ### Example: Document Summarization
1245 | 
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 |     def prep(self, shared):
1249 |         files_dict = shared["files"]  # e.g. 10 files
1250 |         return list(files_dict.items())  # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 | 
1252 |     def exec(self, one_file):
1253 |         filename, file_content = one_file
1254 |         summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 |         return (filename, summary_text)
1256 | 
1257 |     def post(self, shared, prep_res, exec_res_list):
1258 |         shared["file_summaries"] = dict(exec_res_list)
1259 | 
1260 | class CombineSummaries(Node):
1261 |     def prep(self, shared):
1262 |         return shared["file_summaries"]
1263 | 
1264 |     def exec(self, file_summaries):
1265 |         # format as: "File1: summary\nFile2: summary...\n"
1266 |         text_list = []
1267 |         for fname, summ in file_summaries.items():
1268 |             text_list.append(f"{fname} summary:\n{summ}\n")
1269 |         big_text = "\n---\n".join(text_list)
1270 | 
1271 |         return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 | 
1273 |     def post(self, shared, prep_res, final_summary):
1274 |         shared["all_files_summary"] = final_summary
1275 | 
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 | 
1280 | flow = Flow(start=batch_node)
1281 | 
1282 | shared = {
1283 |     "files": {
1284 |         "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 |         "file2.txt": "Some other interesting text ...",
1286 |         # ...
1287 |     }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 | 
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 | 
1304 | # RAG (Retrieval Augmented Generation)
1305 | 
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 | 
1308 | <div align="center">
1309 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/rag.png?raw=true" width="400"/>
1310 | </div>
1311 | 
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 | 
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 | 
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 | 
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 |     def prep(self, shared):
1326 |         # A list of file paths in shared["files"]. We process each file.
1327 |         return shared["files"]
1328 | 
1329 |     def exec(self, filepath):
1330 |         # read file content. In real usage, do error handling.
1331 |         with open(filepath, "r", encoding="utf-8") as f:
1332 |             text = f.read()
1333 |         # chunk by 100 chars each
1334 |         chunks = []
1335 |         size = 100
1336 |         for i in range(0, len(text), size):
1337 |             chunks.append(text[i : i + size])
1338 |         return chunks
1339 |     
1340 |     def post(self, shared, prep_res, exec_res_list):
1341 |         # exec_res_list is a list of chunk-lists, one per file.
1342 |         # flatten them all into a single list of chunks.
1343 |         all_chunks = []
1344 |         for chunk_list in exec_res_list:
1345 |             all_chunks.extend(chunk_list)
1346 |         shared["all_chunks"] = all_chunks
1347 | 
1348 | class EmbedDocs(BatchNode):
1349 |     def prep(self, shared):
1350 |         return shared["all_chunks"]
1351 | 
1352 |     def exec(self, chunk):
1353 |         return get_embedding(chunk)
1354 | 
1355 |     def post(self, shared, prep_res, exec_res_list):
1356 |         # Store the list of embeddings.
1357 |         shared["all_embeds"] = exec_res_list
1358 |         print(f"Total embeddings: {len(exec_res_list)}")
1359 | 
1360 | class StoreIndex(Node):
1361 |     def prep(self, shared):
1362 |         # We'll read all embeds from shared.
1363 |         return shared["all_embeds"]
1364 | 
1365 |     def exec(self, all_embeds):
1366 |         # Create a vector index (faiss or other DB in real usage).
1367 |         index = create_index(all_embeds)
1368 |         return index
1369 | 
1370 |     def post(self, shared, prep_res, index):
1371 |         shared["index"] = index
1372 | 
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 | 
1378 | chunk_node >> embed_node >> store_node
1379 | 
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 | 
1383 | Usage example:
1384 | 
1385 | ```python
1386 | shared = {
1387 |     "files": ["doc1.txt", "doc2.txt"],  # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 | 
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 | 
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 | 
1400 | ```python
1401 | class EmbedQuery(Node):
1402 |     def prep(self, shared):
1403 |         return shared["question"]
1404 | 
1405 |     def exec(self, question):
1406 |         return get_embedding(question)
1407 | 
1408 |     def post(self, shared, prep_res, q_emb):
1409 |         shared["q_emb"] = q_emb
1410 | 
1411 | class RetrieveDocs(Node):
1412 |     def prep(self, shared):
1413 |         # We'll need the query embedding, plus the offline index/chunks
1414 |         return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 | 
1416 |     def exec(self, inputs):
1417 |         q_emb, index, chunks = inputs
1418 |         I, D = search_index(index, q_emb, top_k=1)
1419 |         best_id = I[0][0]
1420 |         relevant_chunk = chunks[best_id]
1421 |         return relevant_chunk
1422 | 
1423 |     def post(self, shared, prep_res, relevant_chunk):
1424 |         shared["retrieved_chunk"] = relevant_chunk
1425 |         print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 | 
1427 | class GenerateAnswer(Node):
1428 |     def prep(self, shared):
1429 |         return shared["question"], shared["retrieved_chunk"]
1430 | 
1431 |     def exec(self, inputs):
1432 |         question, chunk = inputs
1433 |         prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 |         return call_llm(prompt)
1435 | 
1436 |     def post(self, shared, prep_res, answer):
1437 |         shared["answer"] = answer
1438 |         print("Answer:", answer)
1439 | 
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 | 
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 | 
1448 | Usage example:
1449 | 
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 | 
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 | 
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 | 
1469 | # Structured Output
1470 | 
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 | 
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 | 
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 | 
1480 | ### Example Use Cases
1481 | 
1482 | - Extracting Key Information 
1483 | 
1484 | ```yaml
1485 | product:
1486 |   name: Widget Pro
1487 |   price: 199.99
1488 |   description: |
1489 |     A high-quality widget designed for professionals.
1490 |     Recommended for advanced users.
1491 | ```
1492 | 
1493 | - Summarizing Documents into Bullet Points
1494 | 
1495 | ```yaml
1496 | summary:
1497 |   - This product is easy to use.
1498 |   - It is cost-effective.
1499 |   - Suitable for all skill levels.
1500 | ```
1501 | 
1502 | - Generating Configuration Files
1503 | 
1504 | ```yaml
1505 | server:
1506 |   host: 127.0.0.1
1507 |   port: 8080
1508 |   ssl: true
1509 | ```
1510 | 
1511 | ## Prompt Engineering
1512 | 
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 | 
1517 | ### Example Text Summarization
1518 | 
1519 | ```python
1520 | class SummarizeNode(Node):
1521 |     def exec(self, prep_res):
1522 |         # Suppose `prep_res` is the text to summarize.
1523 |         prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 | 
1526 | {prep_res}
1527 | 
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 |   - bullet 1
1532 |   - bullet 2
1533 |   - bullet 3
1534 | ```"""
1535 |         response = call_llm(prompt)
1536 |         yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 | 
1538 |         import yaml
1539 |         structured_result = yaml.safe_load(yaml_str)
1540 | 
1541 |         assert "summary" in structured_result
1542 |         assert isinstance(structured_result["summary"], list)
1543 | 
1544 |         return structured_result
1545 | ```
1546 | 
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 | 
1550 | ### Why YAML instead of JSON?
1551 | 
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 | 
1554 | **In JSON**  
1555 | 
1556 | ```json
1557 | {
1558 |   "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 | 
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 | 
1565 | **In YAML**  
1566 | 
1567 | ```yaml
1568 | dialogue: |
1569 |   Alice said: "Hello Bob.
1570 |   How are you?
1571 |   I am good."
1572 | ```
1573 | 
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 | 
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 | 
1587 | # Workflow
1588 | 
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 | 
1591 | <div align="center">
1592 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/workflow.png?raw=true" width="400"/>
1593 | </div>
1594 | 
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | > 
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 | 
1601 | ### Example: Article Writing
1602 | 
1603 | ```python
1604 | class GenerateOutline(Node):
1605 |     def prep(self, shared): return shared["topic"]
1606 |     def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 |     def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 | 
1609 | class WriteSection(Node):
1610 |     def prep(self, shared): return shared["outline"]
1611 |     def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 |     def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 | 
1614 | class ReviewAndRefine(Node):
1615 |     def prep(self, shared): return shared["draft"]
1616 |     def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 |     def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 | 
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 | 
1624 | outline >> write >> review
1625 | 
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 | 
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 | 
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 | 
1644 | # LLM Wrappers
1645 | 
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 
1647 | Here, we provide some minimal example implementations:
1648 | 
1649 | 1. OpenAI
1650 |     ```python
1651 |     def call_llm(prompt):
1652 |         from openai import OpenAI
1653 |         client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 |         r = client.chat.completions.create(
1655 |             model="gpt-4o",
1656 |             messages=[{"role": "user", "content": prompt}]
1657 |         )
1658 |         return r.choices[0].message.content
1659 | 
1660 |     # Example usage
1661 |     call_llm("How are you?")
1662 |     ```
1663 |     > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 |     {: .best-practice }
1665 | 
1666 | 2. Claude (Anthropic)
1667 |     ```python
1668 |     def call_llm(prompt):
1669 |         from anthropic import Anthropic
1670 |         client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 |         r = client.messages.create(
1672 |             model="claude-sonnet-4-0",
1673 |             messages=[
1674 |                 {"role": "user", "content": prompt}
1675 |             ]
1676 |         )
1677 |         return r.content[0].text
1678 |     ```
1679 | 
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 |     ```python
1682 |     def call_llm(prompt):
1683 |     from google import genai
1684 |     client = genai.Client(api_key='GEMINI_API_KEY')
1685 |         response = client.models.generate_content(
1686 |         model='gemini-2.5-pro',
1687 |         contents=prompt
1688 |     )
1689 |     return response.text
1690 |     ```
1691 | 
1692 | 4. Azure (Azure OpenAI)
1693 |     ```python
1694 |     def call_llm(prompt):
1695 |         from openai import AzureOpenAI
1696 |         client = AzureOpenAI(
1697 |             azure_endpoint="https://<YOUR_RESOURCE_NAME>.openai.azure.com/",
1698 |             api_key="YOUR_API_KEY_HERE",
1699 |             api_version="2023-05-15"
1700 |         )
1701 |         r = client.chat.completions.create(
1702 |             model="<YOUR_DEPLOYMENT_NAME>",
1703 |             messages=[{"role": "user", "content": prompt}]
1704 |         )
1705 |         return r.choices[0].message.content
1706 |     ```
1707 | 
1708 | 5. Ollama (Local LLM)
1709 |     ```python
1710 |     def call_llm(prompt):
1711 |         from ollama import chat
1712 |         response = chat(
1713 |             model="llama2",
1714 |             messages=[{"role": "user", "content": prompt}]
1715 |         )
1716 |         return response.message.content
1717 |     ```
1718 | 
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 | 
1722 | - Handle chat history:
1723 | 
1724 | ```python
1725 | def call_llm(messages):
1726 |     from openai import OpenAI
1727 |     client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 |     r = client.chat.completions.create(
1729 |         model="gpt-4o",
1730 |         messages=messages
1731 |     )
1732 |     return r.choices[0].message.content
1733 | ```
1734 | 
1735 | - Add in-memory caching 
1736 | 
1737 | ```python
1738 | from functools import lru_cache
1739 | 
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 |     # Your implementation here
1743 |     pass
1744 | ```
1745 | 
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 | 
1751 | 
1752 | ```python
1753 | from functools import lru_cache
1754 | 
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 |     pass
1758 | 
1759 | def call_llm(prompt, use_cache):
1760 |     if use_cache:
1761 |         return cached_call(prompt)
1762 |     # Call the underlying function directly
1763 |     return cached_call.__wrapped__(prompt)
1764 | 
1765 | class SummarizeNode(Node):
1766 |     def exec(self, text):
1767 |         return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 | 
1770 | - Enable logging:
1771 | 
1772 | ```python
1773 | def call_llm(prompt):
1774 |     import logging
1775 |     logging.info(f"Prompt: {prompt}")
1776 |     response = ... # Your implementation here
1777 |     logging.info(f"Response: {response}")
1778 |     return response
1779 | ```


--------------------------------------------------------------------------------
/.goosehints:
--------------------------------------------------------------------------------
   1 | ---
   2 | layout: default
   3 | title: "Agentic Coding"
   4 | ---
   5 | 
   6 | # Agentic Coding: Humans Design, Agents code!
   7 | 
   8 | > If you are an AI agent involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
   9 | {: .warning }
  10 | 
  11 | ## Agentic Coding Steps
  12 | 
  13 | Agentic Coding should be a collaboration between Human System Design and Agent Implementation:
  14 | 
  15 | | Steps                  | Human      | AI        | Comment                                                                 |
  16 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
  17 | | 1. Requirements | ★★★ High  | ★☆☆ Low   | Humans understand the requirements and context.                    |
  18 | | 2. Flow          | ★★☆ Medium | ★★☆ Medium |  Humans specify the high-level design, and the AI fills in the details. |
  19 | | 3. Utilities   | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
  20 | | 4. Data          | ★☆☆ Low    | ★★★ High   | AI designs the data schema, and humans verify.                            |
  21 | | 5. Node          | ★☆☆ Low   | ★★★ High  | The AI helps design the node based on the flow.          |
  22 | | 6. Implementation      | ★☆☆ Low   | ★★★ High  |  The AI implements the flow based on the design. |
  23 | | 7. Optimization        | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
  24 | | 8. Reliability         | ★☆☆ Low   | ★★★ High  |  The AI writes test cases and addresses corner cases.     |
  25 | 
  26 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. 
  27 |     - Understand AI systems' strengths and limitations:
  28 |       - **Good for**: Routine tasks requiring common sense (filling forms, replying to emails)
  29 |       - **Good for**: Creative tasks with well-defined inputs (building slides, writing SQL)
  30 |       - **Not good for**: Ambiguous problems requiring complex decision-making (business strategy, startup planning)
  31 |     - **Keep It User-Centric:** Explain the "problem" from the user's perspective rather than just listing features.
  32 |     - **Balance complexity vs. impact**: Aim to deliver the highest value features with minimal complexity early.
  33 | 
  34 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
  35 |     - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
  36 |       - For each node in the flow, start with a high-level one-line description of what it does.
  37 |       - If using **Map Reduce**, specify how to map (what to split) and how to reduce (how to combine).
  38 |       - If using **Agent**, specify what are the inputs (context) and what are the possible actions.
  39 |       - If using **RAG**, specify what to embed, noting that there's usually both offline (indexing) and online (retrieval) workflows.
  40 |     - Outline the flow and draw it in a mermaid diagram. For example:
  41 |       ```mermaid
  42 |       flowchart LR
  43 |           start[Start] --> batch[Batch]
  44 |           batch --> check[Check]
  45 |           check -->|OK| process
  46 |           check -->|Error| fix[Fix]
  47 |           fix --> check
  48 |           
  49 |           subgraph process[Process]
  50 |             step1[Step 1] --> step2[Step 2]
  51 |           end
  52 |           
  53 |           process --> endNode[End]
  54 |       ```
  55 |     - > **If Humans can't specify the flow, AI Agents can't automate it!** Before building an LLM system, thoroughly understand the problem and potential solution by manually solving example inputs to develop intuition.  
  56 |       {: .best-practice }
  57 | 
  58 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
  59 |     - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
  60 |         <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/utility.png?raw=true" width="400"/></div>
  61 | 
  62 |         - Reading inputs (e.g., retrieving Slack messages, reading emails)
  63 |         - Writing outputs (e.g., generating reports, sending emails)
  64 |         - Using external tools (e.g., calling LLMs, searching the web)
  65 |         - **NOTE**: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
  66 |     - For each utility function, implement it and write a simple test.
  67 |     - Document their input/output, as well as why they are necessary. For example:
  68 |       - `name`: `get_embedding` (`utils/get_embedding.py`)
  69 |       - `input`: `str`
  70 |       - `output`: a vector of 3072 floats
  71 |       - `necessity`: Used by the second node to embed text
  72 |     - Example utility implementation:
  73 |       ```python
  74 |       # utils/call_llm.py
  75 |       from openai import OpenAI
  76 | 
  77 |       def call_llm(prompt):    
  78 |           client = OpenAI(api_key="YOUR_API_KEY_HERE")
  79 |           r = client.chat.completions.create(
  80 |               model="gpt-4o",
  81 |               messages=[{"role": "user", "content": prompt}]
  82 |           )
  83 |           return r.choices[0].message.content
  84 |           
  85 |       if __name__ == "__main__":
  86 |           prompt = "What is the meaning of life?"
  87 |           print(call_llm(prompt))
  88 |       ```
  89 |     - > **Sometimes, design Utilities before Flow:**  For example, for an LLM project to automate a legacy system, the bottleneck will likely be the available interface to that system. Start by designing the hardest utilities for interfacing, and then build the flow around them.
  90 |       {: .best-practice }
  91 |     - > **Avoid Exception Handling in Utilities**: If a utility function is called from a Node's `exec()` method, avoid using `try...except` blocks within the utility. Let the Node's built-in retry mechanism handle failures.
  92 |       {: .warning }
  93 | 
  94 | 4. **Data Design**: Design the shared store that nodes will use to communicate.
  95 |    - One core design principle for PocketFlow is to use a well-designed [shared store](./core_abstraction/communication.md)—a data contract that all nodes agree upon to retrieve and store data.
  96 |       - For simple systems, use an in-memory dictionary.
  97 |       - For more complex systems or when persistence is required, use a database.
  98 |       - **Don't Repeat Yourself**: Use in-memory references or foreign keys.
  99 |       - Example shared store design:
 100 |         ```python
 101 |         shared = {
 102 |             "user": {
 103 |                 "id": "user123",
 104 |                 "context": {                # Another nested dict
 105 |                     "weather": {"temp": 72, "condition": "sunny"},
 106 |                     "location": "San Francisco"
 107 |                 }
 108 |             },
 109 |             "results": {}                   # Empty dict to store outputs
 110 |         }
 111 |         ```
 112 | 
 113 | 5. **Node Design**: Plan how each node will read and write data, and use utility functions.
 114 |    - For each [Node](./core_abstraction/node.md), describe its type, how it reads and writes data, and which utility function it uses. Keep it specific but high-level without codes. For example:
 115 |      - `type`: Regular (or Batch, or Async)
 116 |      - `prep`: Read "text" from the shared store
 117 |      - `exec`: Call the embedding utility function. **Avoid exception handling here**; let the Node's retry mechanism manage failures.
 118 |      - `post`: Write "embedding" to the shared store
 119 | 
 120 | 6. **Implementation**: Implement the initial nodes and flows based on the design.
 121 |    - 🎉 If you've reached this step, humans have finished the design. Now *Agentic Coding* begins!
 122 |    - **"Keep it simple, stupid!"** Avoid complex features and full-scale type checking.
 123 |    - **FAIL FAST**! Leverage the built-in [Node](./core_abstraction/node.md) retry and fallback mechanisms to handle failures gracefully. This helps you quickly identify weak points in the system.
 124 |    - Add logging throughout the code to facilitate debugging.
 125 | 
 126 | 7. **Optimization**:
 127 |    - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
 128 |    - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
 129 |    - If your flow design is already solid, move on to micro-optimizations:
 130 |      - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
 131 |      - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
 132 | 
 133 |    - > **You'll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
 134 |      >
 135 |      > <div align="center"><img src="https://github.com/the-pocket/.github/raw/main/assets/success.png?raw=true" width="400"/></div>
 136 |      {: .best-practice }
 137 | 
 138 | 8. **Reliability**  
 139 |    - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
 140 |    - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
 141 |    - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
 142 | 
 143 | ## Example LLM Project File Structure
 144 | 
 145 | ```
 146 | my_project/
 147 | ├── main.py
 148 | ├── nodes.py
 149 | ├── flow.py
 150 | ├── utils/
 151 | │   ├── __init__.py
 152 | │   ├── call_llm.py
 153 | │   └── search_web.py
 154 | ├── requirements.txt
 155 | └── docs/
 156 |     └── design.md
 157 | ```
 158 | 
 159 | - **`requirements.txt`**: Lists the Python dependencies for the project.
 160 |   ```
 161 |   PyYAML
 162 |   pocketflow
 163 |   ```
 164 | 
 165 | - **`docs/design.md`**: Contains project documentation for each step above. This should be *high-level* and *no-code*.
 166 |   ~~~
 167 |   # Design Doc: Your Project Name
 168 | 
 169 |   > Please DON'T remove notes for AI
 170 | 
 171 |   ## Requirements
 172 | 
 173 |   > Notes for AI: Keep it simple and clear.
 174 |   > If the requirements are abstract, write concrete user stories
 175 | 
 176 | 
 177 |   ## Flow Design
 178 | 
 179 |   > Notes for AI:
 180 |   > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
 181 |   > 2. Present a concise, high-level description of the workflow.
 182 | 
 183 |   ### Applicable Design Pattern:
 184 | 
 185 |   1. Map the file summary into chunks, then reduce these chunks into a final summary.
 186 |   2. Agentic file finder
 187 |     - *Context*: The entire summary of the file
 188 |     - *Action*: Find the file
 189 | 
 190 |   ### Flow high-level Design:
 191 | 
 192 |   1. **First Node**: This node is for ...
 193 |   2. **Second Node**: This node is for ...
 194 |   3. **Third Node**: This node is for ...
 195 | 
 196 |   ```mermaid
 197 |   flowchart TD
 198 |       firstNode[First Node] --> secondNode[Second Node]
 199 |       secondNode --> thirdNode[Third Node]
 200 |   ```
 201 |   ## Utility Functions
 202 | 
 203 |   > Notes for AI:
 204 |   > 1. Understand the utility function definition thoroughly by reviewing the doc.
 205 |   > 2. Include only the necessary utility functions, based on nodes in the flow.
 206 | 
 207 |   1. **Call LLM** (`utils/call_llm.py`)
 208 |     - *Input*: prompt (str)
 209 |     - *Output*: response (str)
 210 |     - Generally used by most nodes for LLM tasks
 211 | 
 212 |   2. **Embedding** (`utils/get_embedding.py`)
 213 |     - *Input*: str
 214 |     - *Output*: a vector of 3072 floats
 215 |     - Used by the second node to embed text
 216 | 
 217 |   ## Node Design
 218 | 
 219 |   ### Shared Store
 220 | 
 221 |   > Notes for AI: Try to minimize data redundancy
 222 | 
 223 |   The shared store structure is organized as follows:
 224 | 
 225 |   ```python
 226 |   shared = {
 227 |       "key": "value"
 228 |   }
 229 |   ```
 230 | 
 231 |   ### Node Steps
 232 | 
 233 |   > Notes for AI: Carefully decide whether to use Batch/Async Node/Flow.
 234 | 
 235 |   1. First Node
 236 |     - *Purpose*: Provide a short explanation of the node’s function
 237 |     - *Type*: Decide between Regular, Batch, or Async
 238 |     - *Steps*:
 239 |       - *prep*: Read "key" from the shared store
 240 |       - *exec*: Call the utility function
 241 |       - *post*: Write "key" to the shared store
 242 | 
 243 |   2. Second Node
 244 |     ...
 245 |   ~~~
 246 | 
 247 | 
 248 | - **`utils/`**: Contains all utility functions.
 249 |   - It's recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
 250 |   - Each file should also include a `main()` function to try that API call
 251 |   ```python
 252 |   from google import genai
 253 |   import os
 254 | 
 255 |   def call_llm(prompt: str) -> str:
 256 |       client = genai.Client(
 257 |           api_key=os.getenv("GEMINI_API_KEY", ""),
 258 |       )
 259 |       model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
 260 |       response = client.models.generate_content(model=model, contents=[prompt])
 261 |       return response.text
 262 | 
 263 |   if __name__ == "__main__":
 264 |       test_prompt = "Hello, how are you?"
 265 | 
 266 |       # First call - should hit the API
 267 |       print("Making call...")
 268 |       response1 = call_llm(test_prompt, use_cache=False)
 269 |       print(f"Response: {response1}")
 270 |   ```
 271 | 
 272 | - **`nodes.py`**: Contains all the node definitions.
 273 |   ```python
 274 |   # nodes.py
 275 |   from pocketflow import Node
 276 |   from utils.call_llm import call_llm
 277 | 
 278 |   class GetQuestionNode(Node):
 279 |       def exec(self, _):
 280 |           # Get question directly from user input
 281 |           user_question = input("Enter your question: ")
 282 |           return user_question
 283 |       
 284 |       def post(self, shared, prep_res, exec_res):
 285 |           # Store the user's question
 286 |           shared["question"] = exec_res
 287 |           return "default"  # Go to the next node
 288 | 
 289 |   class AnswerNode(Node):
 290 |       def prep(self, shared):
 291 |           # Read question from shared
 292 |           return shared["question"]
 293 |       
 294 |       def exec(self, question):
 295 |           # Call LLM to get the answer
 296 |           return call_llm(question)
 297 |       
 298 |       def post(self, shared, prep_res, exec_res):
 299 |           # Store the answer in shared
 300 |           shared["answer"] = exec_res
 301 |   ```
 302 | - **`flow.py`**: Implements functions that create flows by importing node definitions and connecting them.
 303 |   ```python
 304 |   # flow.py
 305 |   from pocketflow import Flow
 306 |   from nodes import GetQuestionNode, AnswerNode
 307 | 
 308 |   def create_qa_flow():
 309 |       """Create and return a question-answering flow."""
 310 |       # Create nodes
 311 |       get_question_node = GetQuestionNode()
 312 |       answer_node = AnswerNode()
 313 |       
 314 |       # Connect nodes in sequence
 315 |       get_question_node >> answer_node
 316 |       
 317 |       # Create flow starting with input node
 318 |       return Flow(start=get_question_node)
 319 |   ```
 320 | - **`main.py`**: Serves as the project's entry point.
 321 |   ```python
 322 |   # main.py
 323 |   from flow import create_qa_flow
 324 | 
 325 |   # Example main function
 326 |   # Please replace this with your own main function
 327 |   def main():
 328 |       shared = {
 329 |           "question": None,  # Will be populated by GetQuestionNode from user input
 330 |           "answer": None     # Will be populated by AnswerNode
 331 |       }
 332 | 
 333 |       # Create the flow and run it
 334 |       qa_flow = create_qa_flow()
 335 |       qa_flow.run(shared)
 336 |       print(f"Question: {shared['question']}")
 337 |       print(f"Answer: {shared['answer']}")
 338 | 
 339 |   if __name__ == "__main__":
 340 |       main()
 341 |   ```
 342 | 
 343 | ================================================
 344 | File: docs/index.md
 345 | ================================================
 346 | ---
 347 | layout: default
 348 | title: "Home"
 349 | nav_order: 1
 350 | ---
 351 | 
 352 | # Pocket Flow
 353 | 
 354 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
 355 | 
 356 | - **Lightweight**: Just the core graph abstraction in 100 lines. ZERO dependencies, and vendor lock-in.
 357 | - **Expressive**: Everything you love from larger frameworks—([Multi-](./design_pattern/multi_agent.html))[Agents](./design_pattern/agent.html), [Workflow](./design_pattern/workflow.html), [RAG](./design_pattern/rag.html), and more.  
 358 | - **Agentic-Coding**: Intuitive enough for AI agents to help humans build complex LLM applications.
 359 | 
 360 | <div align="center">
 361 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/meme.jpg?raw=true" width="400"/>
 362 | </div>
 363 | 
 364 | ## Core Abstraction
 365 | 
 366 | We model the LLM workflow as a **Graph + Shared Store**:
 367 | 
 368 | - [Node](./core_abstraction/node.md) handles simple (LLM) tasks.
 369 | - [Flow](./core_abstraction/flow.md) connects nodes through **Actions** (labeled edges).
 370 | - [Shared Store](./core_abstraction/communication.md) enables communication between nodes within flows.
 371 | - [Batch](./core_abstraction/batch.md) nodes/flows allow for data-intensive tasks.
 372 | - [Async](./core_abstraction/async.md) nodes/flows allow waiting for asynchronous tasks.
 373 | - [(Advanced) Parallel](./core_abstraction/parallel.md) nodes/flows handle I/O-bound tasks.
 374 | 
 375 | <div align="center">
 376 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/abstraction.png" width="500"/>
 377 | </div>
 378 | 
 379 | ## Design Pattern
 380 | 
 381 | From there, it’s easy to implement popular design patterns:
 382 | 
 383 | - [Agent](./design_pattern/agent.md) autonomously makes decisions.
 384 | - [Workflow](./design_pattern/workflow.md) chains multiple tasks into pipelines.
 385 | - [RAG](./design_pattern/rag.md) integrates data retrieval with generation.
 386 | - [Map Reduce](./design_pattern/mapreduce.md) splits data tasks into Map and Reduce steps.
 387 | - [Structured Output](./design_pattern/structure.md) formats outputs consistently.
 388 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md) coordinate multiple agents.
 389 | 
 390 | <div align="center">
 391 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/design.png" width="500"/>
 392 | </div>
 393 | 
 394 | ## Utility Function
 395 | 
 396 | We **do not** provide built-in utilities. Instead, we offer *examples*—please *implement your own*:
 397 | 
 398 | - [LLM Wrapper](./utility_function/llm.md)
 399 | - [Viz and Debug](./utility_function/viz.md)
 400 | - [Web Search](./utility_function/websearch.md)
 401 | - [Chunking](./utility_function/chunking.md)
 402 | - [Embedding](./utility_function/embedding.md)
 403 | - [Vector Databases](./utility_function/vector.md)
 404 | - [Text-to-Speech](./utility_function/text_to_speech.md)
 405 | 
 406 | **Why not built-in?**: I believe it's a *bad practice* for vendor-specific APIs in a general framework:
 407 | - *API Volatility*: Frequent changes lead to heavy maintenance for hardcoded APIs.
 408 | - *Flexibility*: You may want to switch vendors, use fine-tuned models, or run them locally.
 409 | - *Optimizations*: Prompt caching, batching, and streaming are easier without vendor lock-in.
 410 | 
 411 | ## Ready to build your Apps? 
 412 | 
 413 | Check out [Agentic Coding Guidance](./guide.md), the fastest way to develop LLM projects with Pocket Flow!
 414 | 
 415 | ================================================
 416 | File: docs/core_abstraction/async.md
 417 | ================================================
 418 | ---
 419 | layout: default
 420 | title: "(Advanced) Async"
 421 | parent: "Core Abstraction"
 422 | nav_order: 5
 423 | ---
 424 | 
 425 | # (Advanced) Async
 426 | 
 427 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
 428 | 
 429 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
 430 | 2. **exec_async()**: Typically used for async LLM calls.
 431 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
 432 | 
 433 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
 434 | 
 435 | ### Example
 436 | 
 437 | ```python
 438 | class SummarizeThenVerify(AsyncNode):
 439 |     async def prep_async(self, shared):
 440 |         # Example: read a file asynchronously
 441 |         doc_text = await read_file_async(shared["doc_path"])
 442 |         return doc_text
 443 | 
 444 |     async def exec_async(self, prep_res):
 445 |         # Example: async LLM call
 446 |         summary = await call_llm_async(f"Summarize: {prep_res}")
 447 |         return summary
 448 | 
 449 |     async def post_async(self, shared, prep_res, exec_res):
 450 |         # Example: wait for user feedback
 451 |         decision = await gather_user_feedback(exec_res)
 452 |         if decision == "approve":
 453 |             shared["summary"] = exec_res
 454 |             return "approve"
 455 |         return "deny"
 456 | 
 457 | summarize_node = SummarizeThenVerify()
 458 | final_node = Finalize()
 459 | 
 460 | # Define transitions
 461 | summarize_node - "approve" >> final_node
 462 | summarize_node - "deny"    >> summarize_node  # retry
 463 | 
 464 | flow = AsyncFlow(start=summarize_node)
 465 | 
 466 | async def main():
 467 |     shared = {"doc_path": "document.txt"}
 468 |     await flow.run_async(shared)
 469 |     print("Final Summary:", shared.get("summary"))
 470 | 
 471 | asyncio.run(main())
 472 | ```
 473 | 
 474 | ================================================
 475 | File: docs/core_abstraction/batch.md
 476 | ================================================
 477 | ---
 478 | layout: default
 479 | title: "Batch"
 480 | parent: "Core Abstraction"
 481 | nav_order: 4
 482 | ---
 483 | 
 484 | # Batch
 485 | 
 486 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
 487 | - **Chunk-based** processing (e.g., splitting large texts).
 488 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
 489 | 
 490 | ## 1. BatchNode
 491 | 
 492 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
 493 | 
 494 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
 495 | - **`exec(item)`**: called **once** per item in that iterable.
 496 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
 497 | 
 498 | 
 499 | ### Example: Summarize a Large File
 500 | 
 501 | ```python
 502 | class MapSummaries(BatchNode):
 503 |     def prep(self, shared):
 504 |         # Suppose we have a big file; chunk it
 505 |         content = shared["data"]
 506 |         chunk_size = 10000
 507 |         chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
 508 |         return chunks
 509 | 
 510 |     def exec(self, chunk):
 511 |         prompt = f"Summarize this chunk in 10 words: {chunk}"
 512 |         summary = call_llm(prompt)
 513 |         return summary
 514 | 
 515 |     def post(self, shared, prep_res, exec_res_list):
 516 |         combined = "\n".join(exec_res_list)
 517 |         shared["summary"] = combined
 518 |         return "default"
 519 | 
 520 | map_summaries = MapSummaries()
 521 | flow = Flow(start=map_summaries)
 522 | flow.run(shared)
 523 | ```
 524 | 
 525 | ---
 526 | 
 527 | ## 2. BatchFlow
 528 | 
 529 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
 530 | 
 531 | ### Example: Summarize Many Files
 532 | 
 533 | ```python
 534 | class SummarizeAllFiles(BatchFlow):
 535 |     def prep(self, shared):
 536 |         # Return a list of param dicts (one per file)
 537 |         filenames = list(shared["data"].keys())  # e.g., ["file1.txt", "file2.txt", ...]
 538 |         return [{"filename": fn} for fn in filenames]
 539 | 
 540 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
 541 | summarize_file = SummarizeFile(start=load_file)
 542 | 
 543 | # Wrap that flow into a BatchFlow:
 544 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
 545 | summarize_all_files.run(shared)
 546 | ```
 547 | 
 548 | ### Under the Hood
 549 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
 550 | 2. The **BatchFlow** loops through each dict. For each one:
 551 |    - It merges the dict with the BatchFlow’s own `params`.
 552 |    - It calls `flow.run(shared)` using the merged result.
 553 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
 554 | 
 555 | ---
 556 | 
 557 | ## 3. Nested or Multi-Level Batches
 558 | 
 559 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
 560 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
 561 | - **Inner** batch: returning a list of per-file param dicts.
 562 | 
 563 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
 564 | 
 565 | ```python
 566 | 
 567 | class FileBatchFlow(BatchFlow):
 568 |     def prep(self, shared):
 569 |         directory = self.params["directory"]
 570 |         # e.g., files = ["file1.txt", "file2.txt", ...]
 571 |         files = [f for f in os.listdir(directory) if f.endswith(".txt")]
 572 |         return [{"filename": f} for f in files]
 573 | 
 574 | class DirectoryBatchFlow(BatchFlow):
 575 |     def prep(self, shared):
 576 |         directories = [ "/path/to/dirA", "/path/to/dirB"]
 577 |         return [{"directory": d} for d in directories]
 578 | 
 579 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
 580 | inner_flow = FileBatchFlow(start=MapSummaries())
 581 | outer_flow = DirectoryBatchFlow(start=inner_flow)
 582 | ```
 583 | 
 584 | ================================================
 585 | File: docs/core_abstraction/communication.md
 586 | ================================================
 587 | ---
 588 | layout: default
 589 | title: "Communication"
 590 | parent: "Core Abstraction"
 591 | nav_order: 3
 592 | ---
 593 | 
 594 | # Communication
 595 | 
 596 | Nodes and Flows **communicate** in 2 ways:
 597 | 
 598 | 1. **Shared Store (for almost all the cases)** 
 599 | 
 600 |    - A global data structure (often an in-mem dict) that all nodes can read ( `prep()`) and write (`post()`).  
 601 |    - Great for data results, large content, or anything multiple nodes need.
 602 |    - You shall design the data structure and populate it ahead.
 603 |      
 604 |    - > **Separation of Concerns:** Use `Shared Store` for almost all cases to separate *Data Schema* from *Compute Logic*!  This approach is both flexible and easy to manage, resulting in more maintainable code. `Params` is more a syntax sugar for [Batch](./batch.md).
 605 |      {: .best-practice }
 606 | 
 607 | 2. **Params (only for [Batch](./batch.md))** 
 608 |    - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
 609 |    - Good for identifiers like filenames or numeric IDs, in Batch mode.
 610 | 
 611 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
 612 | 
 613 | ---
 614 | 
 615 | ## 1. Shared Store
 616 | 
 617 | ### Overview
 618 | 
 619 | A shared store is typically an in-mem dictionary, like:
 620 | ```python
 621 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
 622 | ```
 623 | 
 624 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
 625 | 
 626 | ### Example
 627 | 
 628 | ```python
 629 | class LoadData(Node):
 630 |     def post(self, shared, prep_res, exec_res):
 631 |         # We write data to shared store
 632 |         shared["data"] = "Some text content"
 633 |         return None
 634 | 
 635 | class Summarize(Node):
 636 |     def prep(self, shared):
 637 |         # We read data from shared store
 638 |         return shared["data"]
 639 | 
 640 |     def exec(self, prep_res):
 641 |         # Call LLM to summarize
 642 |         prompt = f"Summarize: {prep_res}"
 643 |         summary = call_llm(prompt)
 644 |         return summary
 645 | 
 646 |     def post(self, shared, prep_res, exec_res):
 647 |         # We write summary to shared store
 648 |         shared["summary"] = exec_res
 649 |         return "default"
 650 | 
 651 | load_data = LoadData()
 652 | summarize = Summarize()
 653 | load_data >> summarize
 654 | flow = Flow(start=load_data)
 655 | 
 656 | shared = {}
 657 | flow.run(shared)
 658 | ```
 659 | 
 660 | Here:
 661 | - `LoadData` writes to `shared["data"]`.
 662 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
 663 | 
 664 | ---
 665 | 
 666 | ## 2. Params
 667 | 
 668 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
 669 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
 670 | - **Set** via `set_params()`.
 671 | - **Cleared** and updated each time a parent Flow calls it.
 672 | 
 673 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow. 
 674 | > 
 675 | > If you need to set child node params, see [Batch](./batch.md).
 676 | {: .warning }
 677 | 
 678 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
 679 | 
 680 | ### Example
 681 | 
 682 | ```python
 683 | # 1) Create a Node that uses params
 684 | class SummarizeFile(Node):
 685 |     def prep(self, shared):
 686 |         # Access the node's param
 687 |         filename = self.params["filename"]
 688 |         return shared["data"].get(filename, "")
 689 | 
 690 |     def exec(self, prep_res):
 691 |         prompt = f"Summarize: {prep_res}"
 692 |         return call_llm(prompt)
 693 | 
 694 |     def post(self, shared, prep_res, exec_res):
 695 |         filename = self.params["filename"]
 696 |         shared["summary"][filename] = exec_res
 697 |         return "default"
 698 | 
 699 | # 2) Set params
 700 | node = SummarizeFile()
 701 | 
 702 | # 3) Set Node params directly (for testing)
 703 | node.set_params({"filename": "doc1.txt"})
 704 | node.run(shared)
 705 | 
 706 | # 4) Create Flow
 707 | flow = Flow(start=node)
 708 | 
 709 | # 5) Set Flow params (overwrites node params)
 710 | flow.set_params({"filename": "doc2.txt"})
 711 | flow.run(shared)  # The node summarizes doc2, not doc1
 712 | ```
 713 | 
 714 | ================================================
 715 | File: docs/core_abstraction/flow.md
 716 | ================================================
 717 | ---
 718 | layout: default
 719 | title: "Flow"
 720 | parent: "Core Abstraction"
 721 | nav_order: 2
 722 | ---
 723 | 
 724 | # Flow
 725 | 
 726 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
 727 | 
 728 | ## 1. Action-based Transitions
 729 | 
 730 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
 731 | 
 732 | You define transitions with the syntax:
 733 | 
 734 | 1. **Basic default transition**: `node_a >> node_b`
 735 |   This means if `node_a.post()` returns `"default"`, go to `node_b`. 
 736 |   (Equivalent to `node_a - "default" >> node_b`)
 737 | 
 738 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
 739 |   This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
 740 | 
 741 | It's possible to create loops, branching, or multi-step flows.
 742 | 
 743 | ## 2. Creating a Flow
 744 | 
 745 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
 746 | 
 747 | ### Example: Simple Sequence
 748 | 
 749 | Here's a minimal flow of two nodes in a chain:
 750 | 
 751 | ```python
 752 | node_a >> node_b
 753 | flow = Flow(start=node_a)
 754 | flow.run(shared)
 755 | ```
 756 | 
 757 | - When you run the flow, it executes `node_a`.  
 758 | - Suppose `node_a.post()` returns `"default"`.  
 759 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.  
 760 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
 761 | 
 762 | ### Example: Branching & Looping
 763 | 
 764 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
 765 | 
 766 | - `"approved"`: expense is approved, move to payment processing
 767 | - `"needs_revision"`: expense needs changes, send back for revision 
 768 | - `"rejected"`: expense is denied, finish the process
 769 | 
 770 | We can wire them like this:
 771 | 
 772 | ```python
 773 | # Define the flow connections
 774 | review - "approved" >> payment        # If approved, process payment
 775 | review - "needs_revision" >> revise   # If needs changes, go to revision
 776 | review - "rejected" >> finish         # If rejected, finish the process
 777 | 
 778 | revise >> review   # After revision, go back for another review
 779 | payment >> finish  # After payment, finish the process
 780 | 
 781 | flow = Flow(start=review)
 782 | ```
 783 | 
 784 | Let's see how it flows:
 785 | 
 786 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
 787 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
 788 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
 789 | 
 790 | ```mermaid
 791 | flowchart TD
 792 |     review[Review Expense] -->|approved| payment[Process Payment]
 793 |     review -->|needs_revision| revise[Revise Report]
 794 |     review -->|rejected| finish[Finish Process]
 795 | 
 796 |     revise --> review
 797 |     payment --> finish
 798 | ```
 799 | 
 800 | ### Running Individual Nodes vs. Running a Flow
 801 | 
 802 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action. 
 803 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
 804 | 
 805 | > `node.run(shared)` **does not** proceed to the successor.
 806 | > This is mainly for debugging or testing a single node.
 807 | > 
 808 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
 809 | {: .warning }
 810 | 
 811 | ## 3. Nested Flows
 812 | 
 813 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
 814 | 
 815 | 1. Use a Flow as a Node within another Flow's transitions.  
 816 | 2. Combine multiple smaller Flows into a larger Flow for reuse.  
 817 | 3. Node `params` will be a merging of **all** parents' `params`.
 818 | 
 819 | ### Flow's Node Methods
 820 | 
 821 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
 822 | 
 823 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
 824 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
 825 | 
 826 | ### Basic Flow Nesting
 827 | 
 828 | Here's how to connect a flow to another node:
 829 | 
 830 | ```python
 831 | # Create a sub-flow
 832 | node_a >> node_b
 833 | subflow = Flow(start=node_a)
 834 | 
 835 | # Connect it to another node
 836 | subflow >> node_c
 837 | 
 838 | # Create the parent flow
 839 | parent_flow = Flow(start=subflow)
 840 | ```
 841 | 
 842 | When `parent_flow.run()` executes:
 843 | 1. It starts `subflow`
 844 | 2. `subflow` runs through its nodes (`node_a->node_b`)
 845 | 3. After `subflow` completes, execution continues to `node_c`
 846 | 
 847 | ### Example: Order Processing Pipeline
 848 | 
 849 | Here's a practical example that breaks down order processing into nested flows:
 850 | 
 851 | ```python
 852 | # Payment processing sub-flow
 853 | validate_payment >> process_payment >> payment_confirmation
 854 | payment_flow = Flow(start=validate_payment)
 855 | 
 856 | # Inventory sub-flow
 857 | check_stock >> reserve_items >> update_inventory
 858 | inventory_flow = Flow(start=check_stock)
 859 | 
 860 | # Shipping sub-flow
 861 | create_label >> assign_carrier >> schedule_pickup
 862 | shipping_flow = Flow(start=create_label)
 863 | 
 864 | # Connect the flows into a main order pipeline
 865 | payment_flow >> inventory_flow >> shipping_flow
 866 | 
 867 | # Create the master flow
 868 | order_pipeline = Flow(start=payment_flow)
 869 | 
 870 | # Run the entire pipeline
 871 | order_pipeline.run(shared_data)
 872 | ```
 873 | 
 874 | This creates a clean separation of concerns while maintaining a clear execution path:
 875 | 
 876 | ```mermaid
 877 | flowchart LR
 878 |     subgraph order_pipeline[Order Pipeline]
 879 |         subgraph paymentFlow["Payment Flow"]
 880 |             A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
 881 |         end
 882 | 
 883 |         subgraph inventoryFlow["Inventory Flow"]
 884 |             D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
 885 |         end
 886 | 
 887 |         subgraph shippingFlow["Shipping Flow"]
 888 |             G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
 889 |         end
 890 | 
 891 |         paymentFlow --> inventoryFlow
 892 |         inventoryFlow --> shippingFlow
 893 |     end
 894 | ```
 895 | 
 896 | ================================================
 897 | File: docs/core_abstraction/node.md
 898 | ================================================
 899 | ---
 900 | layout: default
 901 | title: "Node"
 902 | parent: "Core Abstraction"
 903 | nav_order: 1
 904 | ---
 905 | 
 906 | # Node
 907 | 
 908 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
 909 | 
 910 | <div align="center">
 911 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/node.png?raw=true" width="400"/>
 912 | </div>
 913 | 
 914 | 1. `prep(shared)`
 915 |    - **Read and preprocess data** from `shared` store. 
 916 |    - Examples: *query DB, read files, or serialize data into a string*.
 917 |    - Return `prep_res`, which is used by `exec()` and `post()`.
 918 | 
 919 | 2. `exec(prep_res)`
 920 |    - **Execute compute logic**, with optional retries and error handling (below).
 921 |    - Examples: *(mostly) LLM calls, remote APIs, tool use*.
 922 |    - ⚠️ This shall be only for compute and **NOT** access `shared`.
 923 |    - ⚠️ If retries enabled, ensure idempotent implementation.
 924 |    - ⚠️ Defer exception handling to the Node's built-in retry mechanism.
 925 |    - Return `exec_res`, which is passed to `post()`.
 926 | 
 927 | 3. `post(shared, prep_res, exec_res)`
 928 |    - **Postprocess and write data** back to `shared`.
 929 |    - Examples: *update DB, change states, log results*.
 930 |    - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
 931 | 
 932 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
 933 | >
 934 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
 935 | {: .note }
 936 | 
 937 | ### Fault Tolerance & Retries
 938 | 
 939 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
 940 | 
 941 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
 942 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting). 
 943 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
 944 | 
 945 | ```python 
 946 | my_node = SummarizeFile(max_retries=3, wait=10)
 947 | ```
 948 | 
 949 | When an exception occurs in `exec()`, the Node automatically retries until:
 950 | 
 951 | - It either succeeds, or
 952 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
 953 | 
 954 | You can get the current retry times (0-based) from `self.cur_retry`.
 955 | 
 956 | ```python 
 957 | class RetryNode(Node):
 958 |     def exec(self, prep_res):
 959 |         print(f"Retry {self.cur_retry} times")
 960 |         raise Exception("Failed")
 961 | ```
 962 | 
 963 | ### Graceful Fallback
 964 | 
 965 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
 966 | 
 967 | ```python 
 968 | def exec_fallback(self, prep_res, exc):
 969 |     raise exc
 970 | ```
 971 | 
 972 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
 973 | 
 974 | ### Example: Summarize file
 975 | 
 976 | ```python 
 977 | class SummarizeFile(Node):
 978 |     def prep(self, shared):
 979 |         return shared["data"]
 980 | 
 981 |     def exec(self, prep_res):
 982 |         if not prep_res:
 983 |             return "Empty file content"
 984 |         prompt = f"Summarize this text in 10 words: {prep_res}"
 985 |         summary = call_llm(prompt)  # might fail
 986 |         return summary
 987 | 
 988 |     def exec_fallback(self, prep_res, exc):
 989 |         # Provide a simple fallback instead of crashing
 990 |         return "There was an error processing your request."
 991 | 
 992 |     def post(self, shared, prep_res, exec_res):
 993 |         shared["summary"] = exec_res
 994 |         # Return "default" by not returning
 995 | 
 996 | summarize_node = SummarizeFile(max_retries=3)
 997 | 
 998 | # node.run() calls prep->exec->post
 999 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
1000 | action_result = summarize_node.run(shared)
1001 | 
1002 | print("Action returned:", action_result)  # "default"
1003 | print("Summary stored:", shared["summary"])
1004 | ```
1005 | 
1006 | ================================================
1007 | File: docs/core_abstraction/parallel.md
1008 | ================================================
1009 | ---
1010 | layout: default
1011 | title: "(Advanced) Parallel"
1012 | parent: "Core Abstraction"
1013 | nav_order: 6
1014 | ---
1015 | 
1016 | # (Advanced) Parallel
1017 | 
1018 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows  **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute. 
1019 | 
1020 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
1021 | {: .warning }
1022 | 
1023 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
1024 | > 
1025 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
1026 | > 
1027 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
1028 | {: .best-practice }
1029 | 
1030 | ## AsyncParallelBatchNode
1031 | 
1032 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
1033 | 
1034 | ```python
1035 | class ParallelSummaries(AsyncParallelBatchNode):
1036 |     async def prep_async(self, shared):
1037 |         # e.g., multiple texts
1038 |         return shared["texts"]
1039 | 
1040 |     async def exec_async(self, text):
1041 |         prompt = f"Summarize: {text}"
1042 |         return await call_llm_async(prompt)
1043 | 
1044 |     async def post_async(self, shared, prep_res, exec_res_list):
1045 |         shared["summary"] = "\n\n".join(exec_res_list)
1046 |         return "default"
1047 | 
1048 | node = ParallelSummaries()
1049 | flow = AsyncFlow(start=node)
1050 | ```
1051 | 
1052 | ## AsyncParallelBatchFlow
1053 | 
1054 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
1055 | 
1056 | ```python
1057 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
1058 |     async def prep_async(self, shared):
1059 |         return [{"filename": f} for f in shared["files"]]
1060 | 
1061 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
1062 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
1063 | await parallel_flow.run_async(shared)
1064 | ```
1065 | 
1066 | ================================================
1067 | File: docs/design_pattern/agent.md
1068 | ================================================
1069 | ---
1070 | layout: default
1071 | title: "Agent"
1072 | parent: "Design Pattern"
1073 | nav_order: 1
1074 | ---
1075 | 
1076 | # Agent
1077 | 
1078 | Agent is a powerful design pattern in which nodes can take dynamic actions based on the context.
1079 | 
1080 | <div align="center">
1081 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/agent.png?raw=true" width="350"/>
1082 | </div>
1083 | 
1084 | ## Implement Agent with Graph
1085 | 
1086 | 1. **Context and Action:** Implement nodes that supply context and perform actions.  
1087 | 2. **Branching:** Use branching to connect each action node to an agent node. Use action to allow the agent to direct the [flow](../core_abstraction/flow.md) between nodes—and potentially loop back for multi-step.
1088 | 3. **Agent Node:** Provide a prompt to decide action—for example:
1089 | 
1090 | ```python
1091 | f"""
1092 | ### CONTEXT
1093 | Task: {task_description}
1094 | Previous Actions: {previous_actions}
1095 | Current State: {current_state}
1096 | 
1097 | ### ACTION SPACE
1098 | [1] search
1099 |   Description: Use web search to get results
1100 |   Parameters:
1101 |     - query (str): What to search for
1102 | 
1103 | [2] answer
1104 |   Description: Conclude based on the results
1105 |   Parameters:
1106 |     - result (str): Final answer to provide
1107 | 
1108 | ### NEXT ACTION
1109 | Decide the next action based on the current context and available action space.
1110 | Return your response in the following format:
1111 | 
1112 | ```yaml
1113 | thinking: |
1114 |     <your step-by-step reasoning process>
1115 | action: <action_name>
1116 | parameters:
1117 |     <parameter_name>: <parameter_value>
1118 | ```"""
1119 | ```
1120 | 
1121 | The core of building **high-performance** and **reliable** agents boils down to:
1122 | 
1123 | 1. **Context Management:** Provide *relevant, minimal context.* For example, rather than including an entire chat history, retrieve the most relevant via [RAG](./rag.md). Even with larger context windows, LLMs still fall victim to ["lost in the middle"](https://arxiv.org/abs/2307.03172), overlooking mid-prompt content.
1124 | 
1125 | 2. **Action Space:** Provide *a well-structured and unambiguous* set of actions—avoiding overlap like separate `read_databases` or  `read_csvs`. Instead, import CSVs into the database.
1126 | 
1127 | ## Example Good Action Design
1128 | 
1129 | - **Incremental:** Feed content in manageable chunks (500 lines or 1 page) instead of all at once.
1130 | 
1131 | - **Overview-zoom-in:** First provide high-level structure (table of contents, summary), then allow drilling into details (raw texts).
1132 | 
1133 | - **Parameterized/Programmable:** Instead of fixed actions, enable parameterized (columns to select) or programmable (SQL queries) actions, for example, to read CSV files.
1134 | 
1135 | - **Backtracking:** Let the agent undo the last step instead of restarting entirely, preserving progress when encountering errors or dead ends.
1136 | 
1137 | ## Example: Search Agent
1138 | 
1139 | This agent:
1140 | 1. Decides whether to search or answer
1141 | 2. If searches, loops back to decide if more search needed
1142 | 3. Answers when enough context gathered
1143 | 
1144 | ```python
1145 | class DecideAction(Node):
1146 |     def prep(self, shared):
1147 |         context = shared.get("context", "No previous search")
1148 |         query = shared["query"]
1149 |         return query, context
1150 |         
1151 |     def exec(self, inputs):
1152 |         query, context = inputs
1153 |         prompt = f"""
1154 | Given input: {query}
1155 | Previous search results: {context}
1156 | Should I: 1) Search web for more info 2) Answer with current knowledge
1157 | Output in yaml:
1158 | ```yaml
1159 | action: search/answer
1160 | reason: why this action
1161 | search_term: search phrase if action is search
1162 | ```"""
1163 |         resp = call_llm(prompt)
1164 |         yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
1165 |         result = yaml.safe_load(yaml_str)
1166 |         
1167 |         assert isinstance(result, dict)
1168 |         assert "action" in result
1169 |         assert "reason" in result
1170 |         assert result["action"] in ["search", "answer"]
1171 |         if result["action"] == "search":
1172 |             assert "search_term" in result
1173 |         
1174 |         return result
1175 | 
1176 |     def post(self, shared, prep_res, exec_res):
1177 |         if exec_res["action"] == "search":
1178 |             shared["search_term"] = exec_res["search_term"]
1179 |         return exec_res["action"]
1180 | 
1181 | class SearchWeb(Node):
1182 |     def prep(self, shared):
1183 |         return shared["search_term"]
1184 |         
1185 |     def exec(self, search_term):
1186 |         return search_web(search_term)
1187 |     
1188 |     def post(self, shared, prep_res, exec_res):
1189 |         prev_searches = shared.get("context", [])
1190 |         shared["context"] = prev_searches + [
1191 |             {"term": shared["search_term"], "result": exec_res}
1192 |         ]
1193 |         return "decide"
1194 |         
1195 | class DirectAnswer(Node):
1196 |     def prep(self, shared):
1197 |         return shared["query"], shared.get("context", "")
1198 |         
1199 |     def exec(self, inputs):
1200 |         query, context = inputs
1201 |         return call_llm(f"Context: {context}\nAnswer: {query}")
1202 | 
1203 |     def post(self, shared, prep_res, exec_res):
1204 |        print(f"Answer: {exec_res}")
1205 |        shared["answer"] = exec_res
1206 | 
1207 | # Connect nodes
1208 | decide = DecideAction()
1209 | search = SearchWeb()
1210 | answer = DirectAnswer()
1211 | 
1212 | decide - "search" >> search
1213 | decide - "answer" >> answer
1214 | search - "decide" >> decide  # Loop back
1215 | 
1216 | flow = Flow(start=decide)
1217 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
1218 | ```
1219 | 
1220 | ================================================
1221 | File: docs/design_pattern/mapreduce.md
1222 | ================================================
1223 | ---
1224 | layout: default
1225 | title: "Map Reduce"
1226 | parent: "Design Pattern"
1227 | nav_order: 4
1228 | ---
1229 | 
1230 | # Map Reduce
1231 | 
1232 | MapReduce is a design pattern suitable when you have either:
1233 | - Large input data (e.g., multiple files to process), or
1234 | - Large output data (e.g., multiple forms to fill)
1235 | 
1236 | and there is a logical way to break the task into smaller, ideally independent parts. 
1237 | 
1238 | <div align="center">
1239 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/mapreduce.png?raw=true" width="400"/>
1240 | </div>
1241 | 
1242 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
1243 | 
1244 | ### Example: Document Summarization
1245 | 
1246 | ```python
1247 | class SummarizeAllFiles(BatchNode):
1248 |     def prep(self, shared):
1249 |         files_dict = shared["files"]  # e.g. 10 files
1250 |         return list(files_dict.items())  # [("file1.txt", "aaa..."), ("file2.txt", "bbb..."), ...]
1251 | 
1252 |     def exec(self, one_file):
1253 |         filename, file_content = one_file
1254 |         summary_text = call_llm(f"Summarize the following file:\n{file_content}")
1255 |         return (filename, summary_text)
1256 | 
1257 |     def post(self, shared, prep_res, exec_res_list):
1258 |         shared["file_summaries"] = dict(exec_res_list)
1259 | 
1260 | class CombineSummaries(Node):
1261 |     def prep(self, shared):
1262 |         return shared["file_summaries"]
1263 | 
1264 |     def exec(self, file_summaries):
1265 |         # format as: "File1: summary\nFile2: summary...\n"
1266 |         text_list = []
1267 |         for fname, summ in file_summaries.items():
1268 |             text_list.append(f"{fname} summary:\n{summ}\n")
1269 |         big_text = "\n---\n".join(text_list)
1270 | 
1271 |         return call_llm(f"Combine these file summaries into one final summary:\n{big_text}")
1272 | 
1273 |     def post(self, shared, prep_res, final_summary):
1274 |         shared["all_files_summary"] = final_summary
1275 | 
1276 | batch_node = SummarizeAllFiles()
1277 | combine_node = CombineSummaries()
1278 | batch_node >> combine_node
1279 | 
1280 | flow = Flow(start=batch_node)
1281 | 
1282 | shared = {
1283 |     "files": {
1284 |         "file1.txt": "Alice was beginning to get very tired of sitting by her sister...",
1285 |         "file2.txt": "Some other interesting text ...",
1286 |         # ...
1287 |     }
1288 | }
1289 | flow.run(shared)
1290 | print("Individual Summaries:", shared["file_summaries"])
1291 | print("\nFinal Summary:\n", shared["all_files_summary"])
1292 | ```
1293 | 
1294 | ================================================
1295 | File: docs/design_pattern/rag.md
1296 | ================================================
1297 | ---
1298 | layout: default
1299 | title: "RAG"
1300 | parent: "Design Pattern"
1301 | nav_order: 3
1302 | ---
1303 | 
1304 | # RAG (Retrieval Augmented Generation)
1305 | 
1306 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1307 | 
1308 | <div align="center">
1309 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/rag.png?raw=true" width="400"/>
1310 | </div>
1311 | 
1312 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1313 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1314 | 
1315 | ---
1316 | ## Stage 1: Offline Indexing
1317 | 
1318 | We create three Nodes:
1319 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1320 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1321 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1322 | 
1323 | ```python
1324 | class ChunkDocs(BatchNode):
1325 |     def prep(self, shared):
1326 |         # A list of file paths in shared["files"]. We process each file.
1327 |         return shared["files"]
1328 | 
1329 |     def exec(self, filepath):
1330 |         # read file content. In real usage, do error handling.
1331 |         with open(filepath, "r", encoding="utf-8") as f:
1332 |             text = f.read()
1333 |         # chunk by 100 chars each
1334 |         chunks = []
1335 |         size = 100
1336 |         for i in range(0, len(text), size):
1337 |             chunks.append(text[i : i + size])
1338 |         return chunks
1339 |     
1340 |     def post(self, shared, prep_res, exec_res_list):
1341 |         # exec_res_list is a list of chunk-lists, one per file.
1342 |         # flatten them all into a single list of chunks.
1343 |         all_chunks = []
1344 |         for chunk_list in exec_res_list:
1345 |             all_chunks.extend(chunk_list)
1346 |         shared["all_chunks"] = all_chunks
1347 | 
1348 | class EmbedDocs(BatchNode):
1349 |     def prep(self, shared):
1350 |         return shared["all_chunks"]
1351 | 
1352 |     def exec(self, chunk):
1353 |         return get_embedding(chunk)
1354 | 
1355 |     def post(self, shared, prep_res, exec_res_list):
1356 |         # Store the list of embeddings.
1357 |         shared["all_embeds"] = exec_res_list
1358 |         print(f"Total embeddings: {len(exec_res_list)}")
1359 | 
1360 | class StoreIndex(Node):
1361 |     def prep(self, shared):
1362 |         # We'll read all embeds from shared.
1363 |         return shared["all_embeds"]
1364 | 
1365 |     def exec(self, all_embeds):
1366 |         # Create a vector index (faiss or other DB in real usage).
1367 |         index = create_index(all_embeds)
1368 |         return index
1369 | 
1370 |     def post(self, shared, prep_res, index):
1371 |         shared["index"] = index
1372 | 
1373 | # Wire them in sequence
1374 | chunk_node = ChunkDocs()
1375 | embed_node = EmbedDocs()
1376 | store_node = StoreIndex()
1377 | 
1378 | chunk_node >> embed_node >> store_node
1379 | 
1380 | OfflineFlow = Flow(start=chunk_node)
1381 | ```
1382 | 
1383 | Usage example:
1384 | 
1385 | ```python
1386 | shared = {
1387 |     "files": ["doc1.txt", "doc2.txt"],  # any text files
1388 | }
1389 | OfflineFlow.run(shared)
1390 | ```
1391 | 
1392 | ---
1393 | ## Stage 2: Online Query & Answer
1394 | 
1395 | We have 3 nodes:
1396 | 1. `EmbedQuery` – embeds the user’s question.
1397 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1398 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1399 | 
1400 | ```python
1401 | class EmbedQuery(Node):
1402 |     def prep(self, shared):
1403 |         return shared["question"]
1404 | 
1405 |     def exec(self, question):
1406 |         return get_embedding(question)
1407 | 
1408 |     def post(self, shared, prep_res, q_emb):
1409 |         shared["q_emb"] = q_emb
1410 | 
1411 | class RetrieveDocs(Node):
1412 |     def prep(self, shared):
1413 |         # We'll need the query embedding, plus the offline index/chunks
1414 |         return shared["q_emb"], shared["index"], shared["all_chunks"]
1415 | 
1416 |     def exec(self, inputs):
1417 |         q_emb, index, chunks = inputs
1418 |         I, D = search_index(index, q_emb, top_k=1)
1419 |         best_id = I[0][0]
1420 |         relevant_chunk = chunks[best_id]
1421 |         return relevant_chunk
1422 | 
1423 |     def post(self, shared, prep_res, relevant_chunk):
1424 |         shared["retrieved_chunk"] = relevant_chunk
1425 |         print("Retrieved chunk:", relevant_chunk[:60], "...")
1426 | 
1427 | class GenerateAnswer(Node):
1428 |     def prep(self, shared):
1429 |         return shared["question"], shared["retrieved_chunk"]
1430 | 
1431 |     def exec(self, inputs):
1432 |         question, chunk = inputs
1433 |         prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1434 |         return call_llm(prompt)
1435 | 
1436 |     def post(self, shared, prep_res, answer):
1437 |         shared["answer"] = answer
1438 |         print("Answer:", answer)
1439 | 
1440 | embed_qnode = EmbedQuery()
1441 | retrieve_node = RetrieveDocs()
1442 | generate_node = GenerateAnswer()
1443 | 
1444 | embed_qnode >> retrieve_node >> generate_node
1445 | OnlineFlow = Flow(start=embed_qnode)
1446 | ```
1447 | 
1448 | Usage example:
1449 | 
1450 | ```python
1451 | # Suppose we already ran OfflineFlow and have:
1452 | # shared["all_chunks"], shared["index"], etc.
1453 | shared["question"] = "Why do people like cats?"
1454 | 
1455 | OnlineFlow.run(shared)
1456 | # final answer in shared["answer"]
1457 | ```
1458 | 
1459 | ================================================
1460 | File: docs/design_pattern/structure.md
1461 | ================================================
1462 | ---
1463 | layout: default
1464 | title: "Structured Output"
1465 | parent: "Design Pattern"
1466 | nav_order: 5
1467 | ---
1468 | 
1469 | # Structured Output
1470 | 
1471 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1472 | 
1473 | There are several approaches to achieve a structured output:
1474 | - **Prompting** the LLM to strictly return a defined structure.
1475 | - Using LLMs that natively support **schema enforcement**.
1476 | - **Post-processing** the LLM's response to extract structured content.
1477 | 
1478 | In practice, **Prompting** is simple and reliable for modern LLMs.
1479 | 
1480 | ### Example Use Cases
1481 | 
1482 | - Extracting Key Information 
1483 | 
1484 | ```yaml
1485 | product:
1486 |   name: Widget Pro
1487 |   price: 199.99
1488 |   description: |
1489 |     A high-quality widget designed for professionals.
1490 |     Recommended for advanced users.
1491 | ```
1492 | 
1493 | - Summarizing Documents into Bullet Points
1494 | 
1495 | ```yaml
1496 | summary:
1497 |   - This product is easy to use.
1498 |   - It is cost-effective.
1499 |   - Suitable for all skill levels.
1500 | ```
1501 | 
1502 | - Generating Configuration Files
1503 | 
1504 | ```yaml
1505 | server:
1506 |   host: 127.0.0.1
1507 |   port: 8080
1508 |   ssl: true
1509 | ```
1510 | 
1511 | ## Prompt Engineering
1512 | 
1513 | When prompting the LLM to produce **structured** output:
1514 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1515 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1516 | 
1517 | ### Example Text Summarization
1518 | 
1519 | ```python
1520 | class SummarizeNode(Node):
1521 |     def exec(self, prep_res):
1522 |         # Suppose `prep_res` is the text to summarize.
1523 |         prompt = f"""
1524 | Please summarize the following text as YAML, with exactly 3 bullet points
1525 | 
1526 | {prep_res}
1527 | 
1528 | Now, output:
1529 | ```yaml
1530 | summary:
1531 |   - bullet 1
1532 |   - bullet 2
1533 |   - bullet 3
1534 | ```"""
1535 |         response = call_llm(prompt)
1536 |         yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1537 | 
1538 |         import yaml
1539 |         structured_result = yaml.safe_load(yaml_str)
1540 | 
1541 |         assert "summary" in structured_result
1542 |         assert isinstance(structured_result["summary"], list)
1543 | 
1544 |         return structured_result
1545 | ```
1546 | 
1547 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1548 | {: .note }
1549 | 
1550 | ### Why YAML instead of JSON?
1551 | 
1552 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1553 | 
1554 | **In JSON**  
1555 | 
1556 | ```json
1557 | {
1558 |   "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1559 | }
1560 | ```
1561 | 
1562 | - Every double quote inside the string must be escaped with `\"`.
1563 | - Each newline in the dialogue must be represented as `\n`.
1564 | 
1565 | **In YAML**  
1566 | 
1567 | ```yaml
1568 | dialogue: |
1569 |   Alice said: "Hello Bob.
1570 |   How are you?
1571 |   I am good."
1572 | ```
1573 | 
1574 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1575 | - Newlines are naturally preserved without needing `\n`.
1576 | 
1577 | ================================================
1578 | File: docs/design_pattern/workflow.md
1579 | ================================================
1580 | ---
1581 | layout: default
1582 | title: "Workflow"
1583 | parent: "Design Pattern"
1584 | nav_order: 2
1585 | ---
1586 | 
1587 | # Workflow
1588 | 
1589 | Many real-world tasks are too complex for one LLM call. The solution is to **Task Decomposition**: decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1590 | 
1591 | <div align="center">
1592 |   <img src="https://github.com/the-pocket/.github/raw/main/assets/workflow.png?raw=true" width="400"/>
1593 | </div>
1594 | 
1595 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1596 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1597 | > 
1598 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1599 | {: .best-practice }
1600 | 
1601 | ### Example: Article Writing
1602 | 
1603 | ```python
1604 | class GenerateOutline(Node):
1605 |     def prep(self, shared): return shared["topic"]
1606 |     def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1607 |     def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1608 | 
1609 | class WriteSection(Node):
1610 |     def prep(self, shared): return shared["outline"]
1611 |     def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1612 |     def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1613 | 
1614 | class ReviewAndRefine(Node):
1615 |     def prep(self, shared): return shared["draft"]
1616 |     def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1617 |     def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1618 | 
1619 | # Connect nodes
1620 | outline = GenerateOutline()
1621 | write = WriteSection()
1622 | review = ReviewAndRefine()
1623 | 
1624 | outline >> write >> review
1625 | 
1626 | # Create and run flow
1627 | writing_flow = Flow(start=outline)
1628 | shared = {"topic": "AI Safety"}
1629 | writing_flow.run(shared)
1630 | ```
1631 | 
1632 | For *dynamic cases*, consider using [Agents](./agent.md).
1633 | 
1634 | ================================================
1635 | File: docs/utility_function/llm.md
1636 | ================================================
1637 | ---
1638 | layout: default
1639 | title: "LLM Wrapper"
1640 | parent: "Utility Function"
1641 | nav_order: 1
1642 | ---
1643 | 
1644 | # LLM Wrappers
1645 | 
1646 | Check out libraries like [litellm](https://github.com/BerriAI/litellm). 
1647 | Here, we provide some minimal example implementations:
1648 | 
1649 | 1. OpenAI
1650 |     ```python
1651 |     def call_llm(prompt):
1652 |         from openai import OpenAI
1653 |         client = OpenAI(api_key="YOUR_API_KEY_HERE")
1654 |         r = client.chat.completions.create(
1655 |             model="gpt-4o",
1656 |             messages=[{"role": "user", "content": prompt}]
1657 |         )
1658 |         return r.choices[0].message.content
1659 | 
1660 |     # Example usage
1661 |     call_llm("How are you?")
1662 |     ```
1663 |     > Store the API key in an environment variable like OPENAI_API_KEY for security.
1664 |     {: .best-practice }
1665 | 
1666 | 2. Claude (Anthropic)
1667 |     ```python
1668 |     def call_llm(prompt):
1669 |         from anthropic import Anthropic
1670 |         client = Anthropic(api_key="YOUR_API_KEY_HERE")
1671 |         r = client.messages.create(
1672 |             model="claude-sonnet-4-0",
1673 |             messages=[
1674 |                 {"role": "user", "content": prompt}
1675 |             ]
1676 |         )
1677 |         return r.content[0].text
1678 |     ```
1679 | 
1680 | 3. Google (Generative AI Studio / PaLM API)
1681 |     ```python
1682 |     def call_llm(prompt):
1683 |     from google import genai
1684 |     client = genai.Client(api_key='GEMINI_API_KEY')
1685 |         response = client.models.generate_content(
1686 |         model='gemini-2.5-pro',
1687 |         contents=prompt
1688 |     )
1689 |     return response.text
1690 |     ```
1691 | 
1692 | 4. Azure (Azure OpenAI)
1693 |     ```python
1694 |     def call_llm(prompt):
1695 |         from openai import AzureOpenAI
1696 |         client = AzureOpenAI(
1697 |             azure_endpoint="https://<YOUR_RESOURCE_NAME>.openai.azure.com/",
1698 |             api_key="YOUR_API_KEY_HERE",
1699 |             api_version="2023-05-15"
1700 |         )
1701 |         r = client.chat.completions.create(
1702 |             model="<YOUR_DEPLOYMENT_NAME>",
1703 |             messages=[{"role": "user", "content": prompt}]
1704 |         )
1705 |         return r.choices[0].message.content
1706 |     ```
1707 | 
1708 | 5. Ollama (Local LLM)
1709 |     ```python
1710 |     def call_llm(prompt):
1711 |         from ollama import chat
1712 |         response = chat(
1713 |             model="llama2",
1714 |             messages=[{"role": "user", "content": prompt}]
1715 |         )
1716 |         return response.message.content
1717 |     ```
1718 | 
1719 | ## Improvements
1720 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1721 | 
1722 | - Handle chat history:
1723 | 
1724 | ```python
1725 | def call_llm(messages):
1726 |     from openai import OpenAI
1727 |     client = OpenAI(api_key="YOUR_API_KEY_HERE")
1728 |     r = client.chat.completions.create(
1729 |         model="gpt-4o",
1730 |         messages=messages
1731 |     )
1732 |     return r.choices[0].message.content
1733 | ```
1734 | 
1735 | - Add in-memory caching 
1736 | 
1737 | ```python
1738 | from functools import lru_cache
1739 | 
1740 | @lru_cache(maxsize=1000)
1741 | def call_llm(prompt):
1742 |     # Your implementation here
1743 |     pass
1744 | ```
1745 | 
1746 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1747 | >
1748 | > To address this, you could use cached results only if not retried.
1749 | {: .warning }
1750 | 
1751 | 
1752 | ```python
1753 | from functools import lru_cache
1754 | 
1755 | @lru_cache(maxsize=1000)
1756 | def cached_call(prompt):
1757 |     pass
1758 | 
1759 | def call_llm(prompt, use_cache):
1760 |     if use_cache:
1761 |         return cached_call(prompt)
1762 |     # Call the underlying function directly
1763 |     return cached_call.__wrapped__(prompt)
1764 | 
1765 | class SummarizeNode(Node):
1766 |     def exec(self, text):
1767 |         return call_llm(f"Summarize: {text}", self.cur_retry==0)
1768 | ```
1769 | 
1770 | - Enable logging:
1771 | 
1772 | ```python
1773 | def call_llm(prompt):
1774 |     import logging
1775 |     logging.info(f"Prompt: {prompt}")
1776 |     response = ... # Your implementation here
1777 |     logging.info(f"Response: {response}")
1778 |     return response
1779 | ```


--------------------------------------------------------------------------------