4 |
5 |
6 |
7 | Vite + React + TS
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/project/tsconfig.node.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2022",
4 | "lib": ["ES2023"],
5 | "module": "ESNext",
6 | "skipLibCheck": true,
7 |
8 | /* Bundler mode */
9 | "moduleResolution": "bundler",
10 | "allowImportingTsExtensions": true,
11 | "isolatedModules": true,
12 | "moduleDetection": "force",
13 | "noEmit": true,
14 |
15 | /* Linting */
16 | "strict": true,
17 | "noUnusedLocals": true,
18 | "noUnusedParameters": true,
19 | "noFallthroughCasesInSwitch": true
20 | },
21 | "include": ["vite.config.ts"]
22 | }
23 |
--------------------------------------------------------------------------------
/project/tsconfig.app.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "useDefineForClassFields": true,
5 | "lib": ["ES2020", "DOM", "DOM.Iterable"],
6 | "module": "ESNext",
7 | "skipLibCheck": true,
8 |
9 | /* Bundler mode */
10 | "moduleResolution": "bundler",
11 | "allowImportingTsExtensions": true,
12 | "isolatedModules": true,
13 | "moduleDetection": "force",
14 | "noEmit": true,
15 | "jsx": "react-jsx",
16 |
17 | /* Linting */
18 | "strict": true,
19 | "noUnusedLocals": true,
20 | "noUnusedParameters": true,
21 | "noFallthroughCasesInSwitch": true
22 | },
23 | "include": ["src"]
24 | }
25 |
--------------------------------------------------------------------------------
/project/eslint.config.js:
--------------------------------------------------------------------------------
1 | import js from '@eslint/js';
2 | import globals from 'globals';
3 | import reactHooks from 'eslint-plugin-react-hooks';
4 | import reactRefresh from 'eslint-plugin-react-refresh';
5 | import tseslint from 'typescript-eslint';
6 |
7 | export default tseslint.config(
8 | { ignores: ['dist'] },
9 | {
10 | extends: [js.configs.recommended, ...tseslint.configs.recommended],
11 | files: ['**/*.{ts,tsx}'],
12 | languageOptions: {
13 | ecmaVersion: 2020,
14 | globals: globals.browser,
15 | },
16 | plugins: {
17 | 'react-hooks': reactHooks,
18 | 'react-refresh': reactRefresh,
19 | },
20 | rules: {
21 | ...reactHooks.configs.recommended.rules,
22 | 'react-refresh/only-export-components': [
23 | 'warn',
24 | { allowConstantExport: true },
25 | ],
26 | },
27 | }
28 | );
29 |
--------------------------------------------------------------------------------
/project/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "vite-react-typescript-starter",
3 | "private": true,
4 | "version": "0.0.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "build": "vite build",
9 | "lint": "eslint .",
10 | "preview": "vite preview"
11 | },
12 | "dependencies": {
13 | "lucide-react": "^0.344.0",
14 | "react": "^18.3.1",
15 | "react-dom": "^18.3.1"
16 | },
17 | "devDependencies": {
18 | "@eslint/js": "^9.9.1",
19 | "@types/react": "^18.3.5",
20 | "@types/react-dom": "^18.3.0",
21 | "@vitejs/plugin-react": "^4.3.1",
22 | "autoprefixer": "^10.4.18",
23 | "eslint": "^9.9.1",
24 | "eslint-plugin-react-hooks": "^5.1.0-rc.0",
25 | "eslint-plugin-react-refresh": "^0.4.11",
26 | "globals": "^15.9.0",
27 | "postcss": "^8.4.35",
28 | "tailwindcss": "^3.4.1",
29 | "typescript": "^5.5.3",
30 | "typescript-eslint": "^8.3.0",
31 | "vite": "^5.4.2"
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Zachary Huang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | node_modules/
3 | vendor/
4 | .pnp/
5 | .pnp.js
6 |
7 | # Build outputs
8 | dist/
9 | build/
10 | out/
11 | *.pyc
12 | __pycache__/
13 |
14 | # Environment files
15 | .env
16 | .env.local
17 | .env.*.local
18 | .env.development
19 | .env.test
20 | .env.production
21 |
22 | # IDE - VSCode
23 | .vscode/*
24 | !.vscode/settings.json
25 | !.vscode/tasks.json
26 | !.vscode/launch.json
27 | !.vscode/extensions.json
28 |
29 | # IDE - JetBrains
30 | .idea/
31 | *.iml
32 | *.iws
33 | *.ipr
34 |
35 | # IDE - Eclipse
36 | .project
37 | .classpath
38 | .settings/
39 |
40 | # Logs
41 | logs/
42 | *.log
43 | npm-debug.log*
44 | yarn-debug.log*
45 | yarn-error.log*
46 |
47 | # Operating System
48 | .DS_Store
49 | Thumbs.db
50 | *.swp
51 | *.swo
52 |
53 | # Testing
54 | coverage/
55 | .nyc_output/
56 |
57 | # Temporary files
58 | *.tmp
59 | *.temp
60 | .cache/
61 |
62 | # Compiled files
63 | *.com
64 | *.class
65 | *.dll
66 | *.exe
67 | *.o
68 | *.so
69 |
70 | # Package files
71 | *.7z
72 | *.dmg
73 | *.gz
74 | *.iso
75 | *.jar
76 | *.rar
77 | *.tar
78 | *.zip
79 |
80 | # Database
81 | *.sqlite
82 | *.sqlite3
83 | *.db
84 |
85 | # Optional npm cache directory
86 | .npm
87 |
88 | # Optional eslint cache
89 | .eslintcache
90 |
91 | # Optional REPL history
92 | .node_repl_history
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import argparse
3 | import logging
4 | from flow import coding_agent_flow
5 |
6 | # Set up logging
7 | logging.basicConfig(
8 | level=logging.INFO,
9 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
10 | handlers=[
11 | logging.StreamHandler(),
12 | logging.FileHandler('coding_agent.log')
13 | ]
14 | )
15 |
16 | logger = logging.getLogger('main')
17 |
18 | def main():
19 | """
20 | Run the coding agent to help with code operations
21 | """
22 | # Parse command-line arguments
23 | parser = argparse.ArgumentParser(description='Coding Agent - AI-powered coding assistant')
24 | parser.add_argument('--query', '-q', type=str, help='User query to process', required=False)
25 | parser.add_argument('--working-dir', '-d', type=str, default=os.path.join(os.getcwd(), "project"),
26 | help='Working directory for file operations (default: current directory)')
27 | args = parser.parse_args()
28 |
29 | # If no query provided via command line, ask for it
30 | user_query = args.query
31 | if not user_query:
32 | user_query = input("What would you like me to help you with? ")
33 |
34 | # Initialize shared memory
35 | shared = {
36 | "user_query": user_query,
37 | "working_dir": args.working_dir,
38 | "history": [],
39 | "response": None
40 | }
41 |
42 | logger.info(f"Working directory: {args.working_dir}")
43 |
44 | # Run the flow
45 | coding_agent_flow.run(shared)
46 |
47 | if __name__ == "__main__":
48 | main()
--------------------------------------------------------------------------------
/utils/delete_file.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Tuple
3 |
4 | def delete_file(target_file: str) -> Tuple[str, bool]:
5 | """
6 | Remove a file from the file system.
7 |
8 | Args:
9 | target_file: Path to the file to delete
10 |
11 | Returns:
12 | Tuple of (result message, success status)
13 | """
14 | try:
15 | if not os.path.exists(target_file):
16 | return f"File {target_file} does not exist", False
17 |
18 | os.remove(target_file)
19 | return f"Successfully deleted {target_file}", True
20 |
21 | except Exception as e:
22 | return f"Error deleting file: {str(e)}", False
23 |
24 |
25 | if __name__ == "__main__":
26 | # Test delete_file with a temporary file
27 | temp_file = "temp_delete_test.txt"
28 |
29 | # First create a test file
30 | try:
31 | with open(temp_file, 'w') as f:
32 | f.write("This is a test file for deletion testing.")
33 | print(f"Created test file: {temp_file}")
34 | except Exception as e:
35 | print(f"Error creating test file: {str(e)}")
36 | exit(1)
37 |
38 | # Test if file exists
39 | if os.path.exists(temp_file):
40 | print(f"Test file exists: {temp_file}")
41 | else:
42 | print(f"Error: Test file does not exist")
43 | exit(1)
44 |
45 | # Test deleting the file
46 | delete_result, delete_success = delete_file(temp_file)
47 | print(f"Delete result: {delete_result}, success: {delete_success}")
48 |
49 | # Verify the file was deleted
50 | if not os.path.exists(temp_file):
51 | print("File was successfully deleted")
52 | else:
53 | print("Error: File was not deleted")
54 |
55 | # Test deleting a non-existent file
56 | delete_result, delete_success = delete_file("non_existent_file.txt")
57 | print(f"\nDelete non-existent file result: {delete_result}, success: {delete_success}")
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Build Cursor with Cursor
2 |
3 | Cursor is cool! But what if we use it to build an open-source, customizable AI coding agent? We’ll develop a “Cursor Agent” that writes, reviews, and refactors code—right within Cursor itself. It’s meta, it’s moddable, and it’s powered by the power of Cursor. Let’s build Cursor…with Cursor.
4 |
5 |
12 |
13 | - To install:
14 | ```bash
15 | pip install -r requirements.txt
16 | ```
17 |
18 | - To run the Agent
19 | ```bash
20 | python main.py --query "For Trusted by industry leaders, add two more boxes." --working-dir ./project
21 | ```
22 |
23 | - **How does it work?** The best starting point to learn is the [design doc](docs/design.md) and the [flow code](flow.py).
24 |
25 | - **Note**: This project hasn't been pressure-tested or optimized. We intentionally keep it simple for learning.
26 |
27 | ## I built this Cursor Agent using Cursor itself!
28 |
29 | - I built using [**Agentic Coding**](https://the-pocket.github.io/PocketFlow/guide.html), the fastest development paradigm, where humans simply [design](docs/design.md) and agents [code](flow.py).
30 |
31 | - The secret weapon is [Pocket Flow](https://github.com/The-Pocket/PocketFlow), a 100-line LLM framework that lets Agents (e.g., Cursor AI) build for you
32 |
33 | - Step-by-step YouTube development tutorial:
34 |
35 |
36 |
144 | {[
145 | {
146 | quote: "SaasFlow has transformed how we manage our operations. The automation features alone have saved us countless hours.",
147 | author: "Sarah Johnson",
148 | role: "CEO at TechCorp",
149 | image: "https://images.unsplash.com/photo-1494790108377-be9c29b29330?auto=format&fit=crop&w=150&q=80"
150 | },
151 | {
152 | quote: "The analytics capabilities are incredible. We've gained insights that have helped us make better business decisions.",
153 | author: "Michael Chen",
154 | role: "CTO at InnovateLabs",
155 | image: "https://images.unsplash.com/photo-1472099645785-5658abf4ff4e?auto=format&fit=crop&w=150&q=80"
156 | }
157 | ].map((testimonial, index) => (
158 |
159 |
{testimonial.quote}
160 |
161 |
166 |
167 |
{testimonial.author}
168 |
{testimonial.role}
169 |
170 |
171 |
172 | ))}
173 |
174 |
175 |
176 |
177 | {/* Footer */}
178 |
215 |
216 | );
217 | }
218 |
219 | export default App;
--------------------------------------------------------------------------------
/docs/design.md:
--------------------------------------------------------------------------------
1 | # Design Doc: Coding Agent
2 |
3 | > Please DON'T remove notes for AI
4 |
5 | ## Requirements
6 |
7 | > Notes for AI: Keep it simple and clear.
8 | > If the requirements are abstract, write concrete user stories
9 |
10 | Implement a Coding Agent, based on the following cursor instruction:
11 |
12 | ```
13 | API Parameters and Tool Usage
14 | ===========================
15 |
16 | 1. File Operations:
17 | - read_file:
18 | * target_file: Path to the file (relative or absolute)
19 | * explanation: One sentence explaining the purpose
20 | * (Note: Will automatically read the entire file)
21 |
22 | - edit_file:
23 | * target_file: Path to the file to modify
24 | * instructions: Clear, single-sentence description of the edit
25 | * code_edit: The code changes with context, following these rules:
26 | - Use "// ... existing code ..." to represent unchanged code between edits
27 | - Include sufficient context around the changes to resolve ambiguity
28 | - Minimize repeating unchanged code
29 | - Never omit code without using the "// ... existing code ..." marker
30 | - No need to specify line numbers - the context helps locate the changes
31 | Example:
32 | ```
33 | // ... existing code ...
34 | function newEdit() {
35 | // new code here
36 | }
37 | // ... existing code ...
38 | ```
39 |
40 | - delete_file:
41 | * target_file: Path to the file to delete
42 | * explanation: Purpose of the deletion
43 |
44 | 2. Search Operations:
45 | - grep_search:
46 | * query: Exact text or regex pattern to find
47 | * case_sensitive: Optional boolean
48 | * include_pattern: Optional file type filter (e.g. "*.ts")
49 | * exclude_pattern: Optional files to exclude
50 | * explanation: Purpose of the search
51 | Note: Results capped at 50 matches
52 |
53 | 3. Directory Operations:
54 | - list_dir:
55 | * relative_workspace_path: Path to list contents of
56 | * explanation: Purpose of listing
57 |
58 | Important Notes:
59 | - All file paths can be relative
60 | - Explanations should be clear and concise
61 | - Tool calls must include all required parameters
62 | - Optional parameters should only be included when necessary
63 | - Use exact values provided by the user when available
64 | - File search results are limited to 10 results
65 | ```
66 |
67 | We want to additionally add a code edit agent that, given the context, updates the file.
68 |
69 | Note: For educational purposes, the instruction is a simplification of cursor. Specifically:
70 | 1. For read_file, cursor AI reads by a small chunk specified by line number, and 250 lines at maximum.
71 | Reading by chunk is a good practice to avoid large files.
72 | However, here we read the entire file directly.
73 | 2. For search, Cursor AI also supports codebase_search (embedding) and file_search (fuzzy file name).
74 | Here, we only consider grep_search.
75 | 3. Cursor AI also supports run_terminal_cmd, web_search, diff_history.
76 | Here, we exclude these actions.
77 |
78 | ## Flow Design
79 |
80 | > Notes for AI:
81 | > 1. Consider the design patterns of agent, map-reduce, rag, and workflow. Apply them if they fit.
82 | > 2. Present a concise, high-level description of the workflow.
83 |
84 | ### Applicable Design Pattern
85 |
86 | 1. Main Decision Agent
87 | - **Context**: User input, system context, and previous action results
88 | - **Action Space**:
89 | - `read_file`: {target_file, explanation}
90 | - `edit_file`: {target_file, instructions, code_edit}
91 | - `delete_file`: {target_file, explanation}
92 | - `grep_search`: {query, case_sensitive, include_pattern, exclude_pattern, explanation}
93 | - `list_dir`: {relative_workspace_path, explanation}
94 | - `finish`: Return final response to user
95 | - **Flow**:
96 | 1. Parse user request and examine current state
97 | 2. Match request to available tools
98 | 3. Select tool and prepare parameters
99 | 4. Run tool or call Edit File Agent
100 | 5. Analyze results and decide next step (another tool or finish)
101 | 6. When complete, format final response
102 |
103 | 2. Edit File Agent
104 | - **Context**: File path, content, and edit instructions
105 | - **Internal Flow**:
106 | 1. **Read File Action**:
107 | - Reads target file to understand full context
108 | - Parameters: {target_file, explanation="Reading for edit analysis"}
109 | - Provides complete code structure for analysis
110 |
111 | 2. **Analyze and Plan Changes Node**:
112 | - Reviews edit instructions from Main Agent
113 | - Outputs a list of specific edits in format:
114 | ```
115 | [
116 | {
117 | start_line: int, // First line to replace (1-indexed)
118 | end_line: int, // Last line to replace (1-indexed)
119 | replacement: str // New code
120 | },
121 | ...
122 | ]
123 | ```
124 |
125 | 3. **Apply Changes Batch Node**:
126 | - Processes each edit in the plan
127 | - Sorts edits in **descending order by start_line** (from bottom to top of file)
128 | - This ensures that line numbers remain valid for all edits since changes to later lines won't affect the position of earlier lines
129 | - Applies edits in correct order to handle overlapping changes
130 |
131 | ### Flow High-level Design
132 |
133 | ```mermaid
134 | flowchart TD
135 | userRequest[User Request] --> mainAgent[Main Decision Agent]
136 |
137 | mainAgent -->|read_file| readFile[Read File Action]
138 | mainAgent -->|edit_file| editAgent[Edit File Agent]
139 | mainAgent -->|delete_file| deleteFile[Delete File Action]
140 | mainAgent -->|grep_search| grepSearch[Grep Search Action]
141 | mainAgent -->|list_dir| listDir[List Directory Action with Tree Viz]
142 |
143 | readFile --> mainAgent
144 | editAgent --> mainAgent
145 | deleteFile --> mainAgent
146 | grepSearch --> mainAgent
147 | listDir --> mainAgent
148 |
149 | mainAgent -->|done| formatResponse[Format Response]
150 | formatResponse --> userResponse[Response to User]
151 |
152 | %% Edit File Agent subflow
153 | subgraph editAgent[Edit File Agent]
154 | readTarget[Read File Action] --> analyzeAndPlan[Analyze and Plan Changes]
155 | analyzeAndPlan --> applyChanges[Apply Changes Batch]
156 | end
157 | ```
158 |
159 | ## Utility Functions
160 |
161 | > Notes for AI:
162 | > 1. Understand the utility function definition thoroughly by reviewing the doc.
163 | > 2. Include only the necessary utility functions, based on nodes in the flow.
164 |
165 | **IMPORTANT**: All file and directory paths in utility functions should be interpreted relative to the `working_dir` provided in the shared memory. Utilities should construct absolute paths by joining `working_dir` with the relative paths they receive as parameters.
166 |
167 | 1. **Call LLM** (`utils/call_llm.py`)
168 | - Makes API calls to language model services
169 | - Input: prompt/messages
170 | - Output: LLM response text
171 |
172 | 2. **File Operations**
173 | - **Read File** (`utils/read_file.py`)
174 | - Reads content from specified files
175 | - Input: target_file
176 | - Output: file content, success status
177 |
178 | - **Insert File** (`utils/insert_file.py`)
179 | - Writes or inserts content to a target file
180 | - Input: target_file, content, line_number
181 | - Output: result message, success status
182 |
183 | - **Remove File** (`utils/remove_file.py`)
184 | - Removes content from a file based on line numbers
185 | - Input: target_file, start_line (optional), end_line (optional)
186 | - Output: result message, success status
187 |
188 | - **Delete File** (`utils/delete_file.py`)
189 | - Deletes a file from the file system
190 | - Input: target_file
191 | - Output: result message, success status
192 |
193 | - **Replace File** (`utils/replace_file.py`)
194 | - Replaces content in a file based on line numbers
195 | - Input: target_file, start_line, end_line, new_content
196 | - Output: result message, success status
197 |
198 | 3. **Search Operations** (`utils/search_ops.py`)
199 | - **Grep Search**
200 | - Searches through files for specific patterns using ripgrep-like functionality
201 | - Input: query, case_sensitive (optional), include_pattern (optional), exclude_pattern (optional), working_dir (optional)
202 | - Output: list of matches (file path, line number, content), success status
203 |
204 | 4. **Directory Operations** (`utils/dir_ops.py`)
205 | - **List Directory**
206 | - Lists contents of a directory with a tree visualization
207 | - Input: relative_workspace_path
208 | - Output: success status, tree visualization string
209 |
210 | With these utility functions, we can implement the nodes defined in our flow design to create a robust coding agent that can read, modify, search, and navigate through codebase files.
211 |
212 | ## Node Design
213 |
214 | ### Shared Memory
215 |
216 | An improved and simpler shared memory structure:
217 |
218 | ```python
219 | shared = {
220 | # User's original query
221 | "user_query": str,
222 |
223 | # Current working directory - all file operations are relative to this path
224 | "working_dir": str, # IMPORTANT: All file paths in operations are interpreted relative to this directory
225 |
226 | # Action history - stores all actions and their results
227 | "history": [
228 | {
229 | "tool": str, # Tool name (e.g., "read_file")
230 | "reason": str, # Brief explanation of why this tool was called
231 | "params": dict, # Parameters used for the tool
232 | "result": any, # Result returned by the tool
233 | "timestamp": str # When the action was performed
234 | }
235 | ],
236 |
237 | # For edit operations (only used during edits)
238 | "edit_operations": [
239 | {
240 | "start_line": int,
241 | "end_line": int,
242 | "replacement": str
243 | }
244 | ],
245 |
246 | # Final response to return to user
247 | "response": str
248 | }
249 | ```
250 |
251 | ### Node Steps
252 |
253 | 1. Main Decision Agent Node
254 | - **Purpose**: Interprets user requests and decides which tool to use
255 | - **Type**: Regular Node
256 | - **Steps**:
257 | - **prep**:
258 | - Read `shared["user_query"]` and `shared["history"]`
259 | - Return user query and relevant history
260 | - **exec**:
261 | - Call LLM to decide which tool to use and prepare parameters
262 | - Return tool name, reason for using it, and parameters
263 | - **post**:
264 | - Add new action to `shared["history"]` with tool, reason, and parameters
265 | - Return action string for the selected tool
266 |
267 | 2. Read File Action Node
268 | - **Purpose**: Reads specified file content
269 | - **Type**: Regular Node
270 | - **Steps**:
271 | - **prep**:
272 | - Get file path from last entry in `shared["history"]["params"]`
273 | - Ensure path is interpreted relative to `shared["working_dir"]`
274 | - Return file path
275 | - **exec**:
276 | - Call read_file utility with the path
277 | - Return file content
278 | - **post**:
279 | - Update last history entry with result
280 | - Return "decide_next"
281 |
282 | 3. Grep Search Action Node
283 | - **Purpose**: Searches for patterns in files
284 | - **Type**: Regular Node
285 | - **Steps**:
286 | - **prep**:
287 | - Get search parameters from last entry in `shared["history"]["params"]`
288 | - Ensure any path patterns are interpreted relative to `shared["working_dir"]`
289 | - Return search parameters
290 | - **exec**:
291 | - Call grep_search utility
292 | - Return search results
293 | - **post**:
294 | - Update last history entry with results
295 | - Return "decide_next"
296 |
297 | 4. List Directory Action Node
298 | - **Purpose**: Lists directory contents with tree visualization
299 | - **Type**: Regular Node
300 | - **Steps**:
301 | - **prep**:
302 | - Get directory path from last entry in `shared["history"]["params"]`
303 | - Ensure path is interpreted relative to `shared["working_dir"]`
304 | - Return path
305 | - **exec**:
306 | - Call list_dir utility which now returns (success, tree_str)
307 | - Return success status and tree visualization string
308 | - **post**:
309 | - Update last history entry with the result:
310 | ```python
311 | history_entry = shared["history"][-1]
312 | success, tree_str = exec_res
313 | history_entry["result"] = {
314 | "success": success,
315 | "tree_visualization": tree_str
316 | }
317 | ```
318 | - Return "decide_next"
319 |
320 | 5. Delete File Action Node
321 | - **Purpose**: Deletes a file
322 | - **Type**: Regular Node
323 | - **Steps**:
324 | - **prep**:
325 | - Get file path from last entry in `shared["history"]["params"]`
326 | - Ensure path is interpreted relative to `shared["working_dir"]`
327 | - Return file path
328 | - **exec**:
329 | - Call delete_file utility
330 | - Return success status
331 | - **post**:
332 | - Update last history entry with result
333 | - Return "decide_next"
334 |
335 | 6. Read Target File Node (Edit Agent)
336 | - **Purpose**: Reads file for editing (first step in edit process)
337 | - **Type**: Regular Node
338 | - **Steps**:
339 | - **prep**:
340 | - Get file path from last entry in `shared["history"]["params"]` (the edit_file action)
341 | - Ensure path is interpreted relative to `shared["working_dir"]`
342 | - Return file path
343 | - **exec**:
344 | - Call read_file utility to read entire file
345 | - Return file content
346 | - **post**:
347 | - Store file content in the history entry
348 | - Return "analyze_plan"
349 |
350 | 7. Analyze and Plan Changes Node (Edit Agent)
351 | - **Purpose**: Plans specific edit operations
352 | - **Type**: Regular Node
353 | - **Steps**:
354 | - **prep**:
355 | - Get file content from history
356 | - Get edit instructions and code_edit from history params
357 | - Return file content, instructions, and code_edit
358 | - **exec**:
359 | - Call LLM to analyze and create edit plan
360 | - Return structured list of edits
361 | - **post**:
362 | - Store edits in `shared["edit_operations"]`
363 | - Return "apply_changes"
364 |
365 | 8. Apply Changes Batch Node (Edit Agent)
366 | - **Purpose**: Applies edits to file
367 | - **Type**: BatchNode
368 | - **Steps**:
369 | - **prep**:
370 | - Read `shared["edit_operations"]`
371 | - Sort in descending order by start_line
372 | - Return sorted edit operations
373 | - **exec**:
374 | - For each edit operation, call replace_file utility with:
375 | - target_file (from history)
376 | - start_line and end_line (from edit operation)
377 | - replacement (from edit operation)
378 | - Return success status for each operation
379 | - **post**:
380 | - Update edit result in history
381 | - Clear `shared["edit_operations"]` after processing
382 | - Return "decide_next"
383 |
384 | 9. Format Response Node
385 | - **Purpose**: Creates response for user
386 | - **Type**: Regular Node
387 | - **Steps**:
388 | - **prep**:
389 | - Read `shared["history"]`
390 | - Return history
391 | - **exec**:
392 | - Call LLM to generate response
393 | - Return formatted response
394 | - **post**:
395 | - Store response in `shared["response"]`
396 | - Return "done"
--------------------------------------------------------------------------------
/blog.md:
--------------------------------------------------------------------------------
1 | # Building Cursor with Cursor: A Step-by-Step Guide to Creating Your Own AI Coding Agent
2 |
3 | 
4 |
5 | Have you ever wished you could customize your AI coding assistant to work exactly the way you want? What if you could build your own version of Cursor—an AI-powered code editor—using Cursor itself? That's exactly what we're doing in this tutorial: creating a customizable, open-source AI coding agent that operates right within Cursor.
6 |
7 | In this step-by-step guide, we'll dive deep into the code to show you how to build a powerful AI assistant that can:
8 |
9 | - Navigate and understand codebases
10 | - Implement code changes based on natural language instructions
11 | - Make intelligent decisions about which files to inspect or modify
12 | - Learn from its own history of operations
13 |
14 | Let's dive in!
15 |
16 | ## Table of Contents
17 |
18 | 1. [Understanding the Architecture](#understanding-the-architecture-1)
19 | 2. [Setting Up Your Environment](#setting-up-your-environment-2)
20 | 3. [The Core: Building with Pocket Flow](#the-core-building-with-pocket-flow-3)
21 | 4. [Implementing Decision Making](#implementing-decision-making-4)
22 | 5. [File Operations: Reading and Writing Code](#file-operations-reading-and-writing-code-5)
23 | 6. [Code Analysis and Planning](#code-analysis-and-planning-6)
24 | 7. [Applying Code Changes](#applying-code-changes-7)
25 | 8. [Running Your Agent](#running-your-agent-8)
26 | 9. [Advanced: Customizing Your Agent](#advanced-customizing-your-agent-9)
27 | 10. [Conclusion and Next Steps](#conclusion-and-next-steps-10)
28 |
29 |
30 | ## 1. Understanding the Architecture
31 |
32 | Before we write a single line of code, let's understand the architecture of our Cursor Agent. The system is built on a flow-based architecture using [Pocket Flow](https://github.com/The-Pocket/PocketFlow), a minimalist 100-line LLM framework that enables agentic development.
33 |
34 | Here's a high-level overview of our architecture:
35 |
36 | ```mermaid
37 | flowchart TD
38 | A[MainDecisionAgent] -->|read_file| B[ReadFileAction]
39 | A -->|grep_search| C[GrepSearchAction]
40 | A -->|list_dir| D[ListDirAction]
41 | A -->|edit_file| E[EditFileNode]
42 | A -->|delete_file| F[DeleteFileAction]
43 | A -->|finish| G[FormatResponseNode]
44 |
45 | E --> H[AnalyzeAndPlanNode]
46 | H --> I[ApplyChangesNode]
47 | I --> A
48 | ```
49 |
50 | This architecture separates concerns into distinct nodes:
51 | - Decision making (what operation to perform next)
52 | - File operations (reading, writing, and searching)
53 | - Code analysis (understanding and planning changes)
54 | - Code modification (safely applying changes)
55 |
56 |
57 | ## 2. Setting Up Your Environment
58 |
59 | Let's get our environment ready:
60 |
61 | ```bash
62 | # Clone the repository
63 | git clone https://github.com/The-Pocket/Tutorial-Cursor
64 | cd Tutorial-Cursor
65 |
66 | # Install dependencies
67 | pip install -r requirements.txt
68 | ```
69 |
70 |
71 | ## 3. The Core: Building with Pocket Flow
72 |
73 | Our agent is built on the Pocket Flow framework, which provides three core abstractions:
74 |
75 | 1. **Nodes**: Individual units of computation that perform specific tasks
76 | 2. **Flows**: Directed graphs of nodes that define the program's execution path
77 | 3. **Shared Store**: A dictionary that all nodes can access to share data
78 |
79 | Let's look at the core imports and setup:
80 |
81 | ```python
82 | # flow.py
83 | from pocketflow import Node, Flow, BatchNode
84 | import os
85 | import yaml
86 | import logging
87 | from datetime import datetime
88 | from typing import List, Dict, Any, Tuple
89 |
90 | # Import utility functions
91 | from utils.call_llm import call_llm
92 | from utils.read_file import read_file
93 | from utils.delete_file import delete_file
94 | from utils.replace_file import replace_file
95 | from utils.search_ops import grep_search
96 | from utils.dir_ops import list_dir
97 | ```
98 |
99 | This imports the core classes from Pocket Flow and our custom utility functions that handle file operations and LLM calls.
100 |
101 |
102 | ## 4. Implementing Decision Making
103 |
104 | At the heart of our agent is the `MainDecisionAgent`, which determines what action to take based on the user's request and the current state of the system.
105 |
106 | Here's how it's implemented:
107 |
108 | ```python
109 | class MainDecisionAgent(Node):
110 | def prep(self, shared: Dict[str, Any]) -> Tuple[str, List[Dict[str, Any]]]:
111 | # Get user query and history
112 | user_query = shared.get("user_query", "")
113 | history = shared.get("history", [])
114 |
115 | return user_query, history
116 |
117 | def exec(self, inputs: Tuple[str, List[Dict[str, Any]]]) -> Dict[str, Any]:
118 | user_query, history = inputs
119 |
120 | # Format history for context
121 | history_str = format_history_summary(history)
122 |
123 | # Create prompt for the LLM
124 | prompt = f"""You are a coding assistant that helps modify and navigate code. Given the following request,
125 | decide which tool to use from the available options.
126 |
127 | User request: {user_query}
128 |
129 | Here are the actions you performed:
130 | {history_str}
131 |
132 | Available tools:
133 | 1. read_file: Read content from a file
134 | - Parameters: target_file (path)
135 |
136 | 2. edit_file: Make changes to a file
137 | - Parameters: target_file (path), instructions, code_edit
138 |
139 | [... more tool descriptions ...]
140 |
141 | Respond with a YAML object containing:
142 | ```yaml
143 | tool: one of: read_file, edit_file, delete_file, grep_search, list_dir, finish
144 | reason: |
145 | detailed explanation of why you chose this tool and what you intend to do
146 | params:
147 | # parameters specific to the chosen tool
148 | ```"""
149 |
150 | # Call LLM to decide action
151 | response = call_llm(prompt)
152 |
153 | # Parse YAML response
154 | yaml_content = extract_yaml_from_response(response)
155 | decision = yaml.safe_load(yaml_content)
156 |
157 | # Validate the required fields
158 | assert "tool" in decision, "Tool name is missing"
159 | assert "reason" in decision, "Reason is missing"
160 |
161 | return decision
162 |
163 | def post(self, shared: Dict[str, Any], prep_res: Any, exec_res: Dict[str, Any]) -> str:
164 | # Add the decision to history
165 | shared.setdefault("history", []).append({
166 | "tool": exec_res["tool"],
167 | "reason": exec_res["reason"],
168 | "params": exec_res.get("params", {}),
169 | "timestamp": datetime.now().isoformat()
170 | })
171 |
172 | # Return the name of the tool to determine which node to execute next
173 | return exec_res["tool"]
174 | ```
175 |
176 | This node:
177 | 1. Gathers the user's query and the history of previous actions
178 | 2. Formats a prompt for the LLM with all available tools
179 | 3. Calls the LLM to decide what action to take
180 | 4. Parses the response and validates it
181 | 5. Adds the decision to the history
182 | 6. Returns the name of the selected tool, which determines the next node to execute
183 |
184 |
185 | ## 5. File Operations: Reading and Writing Code
186 |
187 | Let's look at how our agent reads files, which is a fundamental operation:
188 |
189 | ```python
190 | class ReadFileAction(Node):
191 | def prep(self, shared: Dict[str, Any]) -> str:
192 | # Get parameters from the last history entry
193 | history = shared.get("history", [])
194 | last_action = history[-1]
195 | file_path = last_action["params"].get("target_file")
196 |
197 | # Ensure path is relative to working directory
198 | working_dir = shared.get("working_dir", "")
199 | full_path = os.path.join(working_dir, file_path) if working_dir else file_path
200 |
201 | return full_path
202 |
203 | def exec(self, file_path: str) -> Tuple[str, bool]:
204 | # Call read_file utility which returns a tuple of (content, success)
205 | return read_file(file_path)
206 |
207 | def post(self, shared: Dict[str, Any], prep_res: str, exec_res: Tuple[str, bool]) -> str:
208 | # Unpack the tuple returned by read_file()
209 | content, success = exec_res
210 |
211 | # Update the result in the last history entry
212 | history = shared.get("history", [])
213 | if history:
214 | history[-1]["result"] = {
215 | "success": success,
216 | "content": content
217 | }
218 |
219 | return "decision" # Go back to the decision node
220 | ```
221 |
222 | The `read_file` utility function itself is implemented like this:
223 |
224 | ```python
225 | def read_file(target_file: str) -> Tuple[str, bool]:
226 | """
227 | Read content from a file with support for line ranges.
228 | Prepends 1-based line numbers to each line in the output.
229 |
230 | Returns:
231 | Tuple of (file content with line numbers, success status)
232 | """
233 | try:
234 | if not os.path.exists(target_file):
235 | return f"Error: File {target_file} does not exist", False
236 |
237 | with open(target_file, 'r', encoding='utf-8') as f:
238 | lines = f.readlines()
239 | # Add line numbers to each line
240 | numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
241 | return ''.join(numbered_lines), True
242 |
243 | except Exception as e:
244 | return f"Error reading file: {str(e)}", False
245 | ```
246 |
247 | This provides a clean, line-numbered view of the file content that makes it easier for the LLM to reference specific lines in its analysis.
248 |
249 |
250 | ## 6. Code Analysis and Planning
251 |
252 | When the agent needs to modify code, it first analyzes the code and plans the changes using `AnalyzeAndPlanNode`:
253 |
254 | ```python
255 | class AnalyzeAndPlanNode(Node):
256 | def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
257 | # Get history
258 | history = shared.get("history", [])
259 | last_action = history[-1]
260 |
261 | # Get file content and edit instructions
262 | file_content = last_action.get("file_content")
263 | instructions = last_action["params"].get("instructions")
264 | code_edit = last_action["params"].get("code_edit")
265 |
266 | return {
267 | "file_content": file_content,
268 | "instructions": instructions,
269 | "code_edit": code_edit
270 | }
271 |
272 | def exec(self, params: Dict[str, Any]) -> List[Dict[str, Any]]:
273 | file_content = params["file_content"]
274 | instructions = params["instructions"]
275 | code_edit = params["code_edit"]
276 |
277 | # Generate a prompt for the LLM to analyze the edit
278 | prompt = f"""
279 | As a code editing assistant, I need to convert the following code edit instruction
280 | and code edit pattern into specific edit operations (start_line, end_line, replacement).
281 |
282 | FILE CONTENT:
283 | {file_content}
284 |
285 | EDIT INSTRUCTIONS:
286 | {instructions}
287 |
288 | CODE EDIT PATTERN (markers like "// ... existing code ..." indicate unchanged code):
289 | {code_edit}
290 |
291 | Analyze the file content and the edit pattern to determine exactly where changes should be made.
292 | Return a YAML object with your reasoning and an array of edit operations:
293 |
294 | ```yaml
295 | reasoning: |
296 | Explain your thinking process about how you're interpreting the edit pattern.
297 |
298 | operations:
299 | - start_line: 10
300 | end_line: 15
301 | replacement: |
302 | # New code here
303 | ```"""
304 |
305 | # Call LLM to analyze the edit
306 | response = call_llm(prompt)
307 |
308 | # Parse the response and extract edit operations
309 | yaml_content = extract_yaml_from_response(response)
310 | result = yaml.safe_load(yaml_content)
311 |
312 | # Store reasoning in shared memory
313 | shared["edit_reasoning"] = result.get("reasoning", "")
314 |
315 | # Return the operations
316 | return result.get("operations", [])
317 | ```
318 |
319 | This node:
320 | 1. Extracts the file content, instructions, and code edit pattern from the history
321 | 2. Creates a prompt for the LLM to analyze the edit
322 | 3. Calls the LLM to determine the exact line numbers and replacement text
323 | 4. Parses the response to extract the edit operations
324 | 5. Stores the reasoning in shared memory
325 | 6. Returns the operations as a list of dictionaries
326 |
327 |
328 | ## 7. Applying Code Changes
329 |
330 | Once the agent has planned the changes, it applies them using `ApplyChangesNode`:
331 |
332 | ```python
333 | class ApplyChangesNode(BatchNode):
334 | def prep(self, shared: Dict[str, Any]) -> List[Dict[str, Any]]:
335 | # Get edit operations
336 | edit_operations = shared.get("edit_operations", [])
337 |
338 | # Sort edit operations in descending order by start_line
339 | # This ensures that line numbers remain valid as we edit from bottom to top
340 | sorted_ops = sorted(edit_operations, key=lambda op: op["start_line"], reverse=True)
341 |
342 | # Get target file from history
343 | history = shared.get("history", [])
344 | last_action = history[-1]
345 | target_file = last_action["params"].get("target_file")
346 |
347 | # Ensure path is relative to working directory
348 | working_dir = shared.get("working_dir", "")
349 | full_path = os.path.join(working_dir, target_file) if working_dir else target_file
350 |
351 | # Attach file path to each operation
352 | for op in sorted_ops:
353 | op["target_file"] = full_path
354 |
355 | return sorted_ops
356 |
357 | def exec(self, op: Dict[str, Any]) -> Tuple[bool, str]:
358 | # Call replace_file utility to replace content
359 | return replace_file(
360 | target_file=op["target_file"],
361 | start_line=op["start_line"],
362 | end_line=op["end_line"],
363 | content=op["replacement"]
364 | )
365 |
366 | def post(self, shared: Dict[str, Any], prep_res: List[Dict[str, Any]], exec_res_list: List[Tuple[bool, str]]) -> str:
367 | # Check if all operations were successful
368 | all_successful = all(success for success, _ in exec_res_list)
369 |
370 | # Update edit result in history
371 | history = shared.get("history", [])
372 | if history:
373 | history[-1]["result"] = {
374 | "success": all_successful,
375 | "operations": len(exec_res_list),
376 | "details": [{"success": s, "message": m} for s, m in exec_res_list],
377 | "reasoning": shared.get("edit_reasoning", "")
378 | }
379 |
380 | return "decision" # Go back to the decision node
381 | ```
382 |
383 | This node is a `BatchNode`, which allows it to process multiple operations in a single run. It:
384 | 1. Gets the edit operations from shared memory
385 | 2. Sorts them in descending order by start line to ensure edits remain valid
386 | 3. Attaches the target file path to each operation
387 | 4. Executes each operation using the `replace_file` utility
388 | 5. Updates the history with the results
389 | 6. Returns to the decision node
390 |
391 | The `replace_file` utility works by combining `remove_file` and `insert_file`:
392 |
393 | ```python
394 | def replace_file(target_file: str, start_line: int, end_line: int, content: str) -> Tuple[str, bool]:
395 | try:
396 | # First, remove the specified lines
397 | remove_result, remove_success = remove_file(target_file, start_line, end_line)
398 |
399 | if not remove_success:
400 | return f"Error during remove step: {remove_result}", False
401 |
402 | # Then, insert the new content at the start line
403 | insert_result, insert_success = insert_file(target_file, content, start_line)
404 |
405 | if not insert_success:
406 | return f"Error during insert step: {insert_result}", False
407 |
408 | return f"Successfully replaced lines {start_line} to {end_line}", True
409 |
410 | except Exception as e:
411 | return f"Error replacing content: {str(e)}", False
412 | ```
413 |
414 |
415 | ## 8. Running Your Agent
416 |
417 | Now that we've implemented all the key components, let's put it all together in our `main.py`:
418 |
419 | ```python
420 | import os
421 | import argparse
422 | import logging
423 | from flow import coding_agent_flow
424 |
425 | def main():
426 | # Parse command-line arguments
427 | parser = argparse.ArgumentParser(description='Coding Agent - AI-powered coding assistant')
428 | parser.add_argument('--query', '-q', type=str, help='User query to process', required=False)
429 | parser.add_argument('--working-dir', '-d', type=str, default=os.path.join(os.getcwd(), "project"),
430 | help='Working directory for file operations')
431 | args = parser.parse_args()
432 |
433 | # If no query provided via command line, ask for it
434 | user_query = args.query
435 | if not user_query:
436 | user_query = input("What would you like me to help you with? ")
437 |
438 | # Initialize shared memory
439 | shared = {
440 | "user_query": user_query,
441 | "working_dir": args.working_dir,
442 | "history": [],
443 | "response": None
444 | }
445 |
446 | # Run the flow
447 | coding_agent_flow.run(shared)
448 |
449 | if __name__ == "__main__":
450 | main()
451 | ```
452 |
453 | And finally, let's create the flow in `flow.py`:
454 |
455 | ```python
456 | # Define the nodes
457 | main_decision = MainDecisionAgent()
458 | read_file_action = ReadFileAction()
459 | grep_search_action = GrepSearchAction()
460 | list_dir_action = ListDirAction()
461 | delete_file_action = DeleteFileAction()
462 | edit_file_node = EditFileNode()
463 | analyze_plan_node = AnalyzeAndPlanNode()
464 | apply_changes_node = ApplyChangesNode()
465 | format_response_node = FormatResponseNode()
466 |
467 | # Connect the nodes
468 | main_decision - "read_file" >> read_file_action
469 | main_decision - "grep_search" >> grep_search_action
470 | main_decision - "list_dir" >> list_dir_action
471 | main_decision - "delete_file" >> delete_file_action
472 | main_decision - "edit_file" >> edit_file_node
473 | main_decision - "finish" >> format_response_node
474 |
475 | # Connect action nodes back to main decision
476 | read_file_action - "decision" >> main_decision
477 | grep_search_action - "decision" >> main_decision
478 | list_dir_action - "decision" >> main_decision
479 | delete_file_action - "decision" >> main_decision
480 |
481 | # Connect edit flow
482 | edit_file_node - "analyze" >> analyze_plan_node
483 | analyze_plan_node - "apply" >> apply_changes_node
484 | apply_changes_node - "decision" >> main_decision
485 |
486 | # Create the flow
487 | coding_agent_flow = Flow(start=main_decision)
488 | ```
489 |
490 | Now you can run your agent with:
491 |
492 | ```bash
493 | python main.py --query "List all Python files" --working-dir ./project
494 | ```
495 |
496 |
497 | ## 9. Advanced: Customizing Your Agent
498 |
499 | One of the most powerful aspects of this architecture is how easy it is to customize. Let's explore a few ways you can extend this agent:
500 |
501 | ### 1. Adding New Tools
502 |
503 | To add a new tool, simply:
504 | 1. Create a new action node class
505 | 2. Add it to the `MainDecisionAgent`'s prompt
506 | 3. Connect it to the flow
507 |
508 | For example, to add a "run_tests" tool:
509 |
510 | ```python
511 | class RunTestsAction(Node):
512 | def prep(self, shared):
513 | # Get test directory from parameters
514 | history = shared.get("history", [])
515 | last_action = history[-1]
516 | test_dir = last_action["params"].get("test_dir")
517 | return test_dir
518 |
519 | def exec(self, test_dir):
520 | # Run tests and capture output
521 | import subprocess
522 | result = subprocess.run(
523 | ["pytest", test_dir],
524 | capture_output=True,
525 | text=True
526 | )
527 | return result.stdout, result.returncode == 0
528 |
529 | def post(self, shared, prep_res, exec_res):
530 | # Update history with test results
531 | output, success = exec_res
532 | history = shared.get("history", [])
533 | if history:
534 | history[-1]["result"] = {
535 | "success": success,
536 | "output": output
537 | }
538 | return "decision"
539 |
540 | # Then add to your flow:
541 | run_tests_action = RunTestsAction()
542 | main_decision - "run_tests" >> run_tests_action
543 | run_tests_action - "decision" >> main_decision
544 | ```
545 |
546 | ### 2. Improving Code Analysis
547 |
548 | You can enhance the code analysis capabilities by modifying the prompts in `AnalyzeAndPlanNode`:
549 |
550 | ```python
551 | # Add language-specific hints
552 | language_hints = {
553 | ".py": "This is Python code. Look for function and class definitions.",
554 | ".js": "This is JavaScript code. Look for function declarations and exports.",
555 | # Add more languages as needed
556 | }
557 |
558 | # Update the prompt with language-specific hints
559 | file_ext = os.path.splitext(target_file)[1]
560 | language_hint = language_hints.get(file_ext, "")
561 | prompt += f"\n\nLANGUAGE HINT: {language_hint}"
562 | ```
563 |
564 | ### 3. Adding Memory and Context
565 |
566 | To give your agent more context, you could add a vector database to store and retrieve relevant information:
567 |
568 | ```python
569 | class VectorDBNode(Node):
570 | def prep(self, shared):
571 | # Get text to store
572 | history = shared.get("history", [])
573 | context = ""
574 | for action in history:
575 | if action["tool"] == "read_file" and action.get("result", {}).get("success", False):
576 | content = action["result"]["content"]
577 | context += f"File: {action['params']['target_file']}\n{content}\n\n"
578 | return context
579 |
580 | def exec(self, context):
581 | # Store in vector DB
582 | embeddings = OpenAIEmbeddings()
583 | vectordb = Chroma.from_texts(
584 | texts=[context],
585 | embedding=embeddings,
586 | persist_directory="./db"
587 | )
588 | return vectordb
589 |
590 | def post(self, shared, prep_res, exec_res):
591 | shared["vectordb"] = exec_res
592 | return "decision"
593 | ```
594 |
595 |
596 | ## 10. Conclusion and Next Steps
597 |
598 | Congratulations! You've built a customizable AI coding agent that can help you navigate and modify code based on natural language instructions. This agent demonstrates the power of agentic development, where AI systems help build better AI systems.
599 |
600 | The possibilities for extending this agent are endless:
601 | - Add support for more programming languages
602 | - Implement code refactoring capabilities
603 | - Create specialized tools for specific frameworks
604 | - Add security checks before making changes
605 | - Implement static analysis to catch potential bugs
606 |
607 | As LLM capabilities continue to improve, agents like this will become even more powerful tools in a developer's arsenal.
608 |
609 | Want to learn more? Subscribe to our [YouTube channel](https://www.youtube.com/@ZacharyLLM?sub_confirmation=1) for a step-by-step video tutorial on building and extending this agent.
610 |
611 | Happy coding!
612 |
--------------------------------------------------------------------------------
/flow.py:
--------------------------------------------------------------------------------
1 | from pocketflow import Node, Flow, BatchNode
2 | import os
3 | import yaml # Add YAML support
4 | import logging
5 | from datetime import datetime
6 | from typing import List, Dict, Any, Tuple
7 |
8 | # Import utility functions
9 | from utils.call_llm import call_llm
10 | from utils.read_file import read_file
11 | from utils.delete_file import delete_file
12 | from utils.replace_file import replace_file
13 | from utils.search_ops import grep_search
14 | from utils.dir_ops import list_dir
15 |
16 | # Set up logging
17 | logging.basicConfig(
18 | level=logging.INFO,
19 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
20 | handlers=[
21 | logging.StreamHandler(),
22 | logging.FileHandler('coding_agent.log')
23 | ]
24 | )
25 | logging.getLogger("httpx").setLevel(logging.WARNING)
26 | logger = logging.getLogger('coding_agent')
27 |
28 | def format_history_summary(history: List[Dict[str, Any]]) -> str:
29 | if not history:
30 | return "No previous actions."
31 |
32 | history_str = "\n"
33 |
34 | for i, action in enumerate(history):
35 | # Header for all entries - removed timestamp
36 | history_str += f"Action {i+1}:\n"
37 | history_str += f"- Tool: {action['tool']}\n"
38 | history_str += f"- Reason: {action['reason']}\n"
39 |
40 | # Add parameters
41 | params = action.get("params", {})
42 | if params:
43 | history_str += f"- Parameters:\n"
44 | for k, v in params.items():
45 | history_str += f" - {k}: {v}\n"
46 |
47 | # Add detailed result information
48 | result = action.get("result")
49 | if result:
50 | if isinstance(result, dict):
51 | success = result.get("success", False)
52 | history_str += f"- Result: {'Success' if success else 'Failed'}\n"
53 |
54 | # Add tool-specific details
55 | if action['tool'] == 'read_file' and success:
56 | content = result.get("content", "")
57 | # Show full content without truncating
58 | history_str += f"- Content: {content}\n"
59 | elif action['tool'] == 'grep_search' and success:
60 | matches = result.get("matches", [])
61 | history_str += f"- Matches: {len(matches)}\n"
62 | # Show all matches without limiting to first 3
63 | for j, match in enumerate(matches):
64 | history_str += f" {j+1}. {match.get('file')}:{match.get('line')}: {match.get('content')}\n"
65 | elif action['tool'] == 'edit_file' and success:
66 | operations = result.get("operations", 0)
67 | history_str += f"- Operations: {operations}\n"
68 |
69 | # Include the reasoning if available
70 | reasoning = result.get("reasoning", "")
71 | if reasoning:
72 | history_str += f"- Reasoning: {reasoning}\n"
73 | elif action['tool'] == 'list_dir' and success:
74 | # Get the tree visualization string
75 | tree_visualization = result.get("tree_visualization", "")
76 | history_str += "- Directory structure:\n"
77 |
78 | # Properly handle and format the tree visualization
79 | if tree_visualization and isinstance(tree_visualization, str):
80 | # First, ensure we handle any special line ending characters properly
81 | clean_tree = tree_visualization.replace('\r\n', '\n').strip()
82 |
83 | if clean_tree:
84 | # Add each line with proper indentation
85 | for line in clean_tree.split('\n'):
86 | # Ensure the line is properly indented
87 | if line.strip(): # Only include non-empty lines
88 | history_str += f" {line}\n"
89 | else:
90 | history_str += " (No tree structure data)\n"
91 | else:
92 | history_str += " (Empty or inaccessible directory)\n"
93 | logger.debug(f"Tree visualization missing or invalid: {tree_visualization}")
94 | else:
95 | history_str += f"- Result: {result}\n"
96 |
97 | # Add separator between actions
98 | history_str += "\n" if i < len(history) - 1 else ""
99 |
100 | return history_str
101 |
102 | #############################################
103 | # Main Decision Agent Node
104 | #############################################
105 | class MainDecisionAgent(Node):
106 | def prep(self, shared: Dict[str, Any]) -> Tuple[str, List[Dict[str, Any]]]:
107 | # Get user query and history
108 | user_query = shared.get("user_query", "")
109 | history = shared.get("history", [])
110 |
111 | return user_query, history
112 |
113 | def exec(self, inputs: Tuple[str, List[Dict[str, Any]]]) -> Dict[str, Any]:
114 | user_query, history = inputs
115 | logger.info(f"MainDecisionAgent: Analyzing user query: {user_query}")
116 |
117 | # Format history using the utility function with 'basic' detail level
118 | history_str = format_history_summary(history)
119 |
120 | # Create prompt for the LLM using YAML instead of JSON
121 | prompt = f"""You are a coding assistant that helps modify and navigate code. Given the following request,
122 | decide which tool to use from the available options.
123 |
124 | User request: {user_query}
125 |
126 | Here are the actions you performed:
127 | {history_str}
128 |
129 | Available tools:
130 | 1. read_file: Read content from a file
131 | - Parameters: target_file (path)
132 | - Example:
133 | tool: read_file
134 | reason: I need to read the main.py file to understand its structure
135 | params:
136 | target_file: main.py
137 |
138 | 2. edit_file: Make changes to a file
139 | - Parameters: target_file (path), instructions, code_edit
140 | - Code_edit_instructions:
141 | - The code changes with context, following these rules:
142 | - Use "// ... existing code ..." to represent unchanged code between edits
143 | - Include sufficient context around the changes to resolve ambiguity
144 | - Minimize repeating unchanged code
145 | - Never omit code without using the "// ... existing code ..." marker
146 | - No need to specify line numbers - the context helps locate the changes
147 | - Example:
148 | tool: edit_file
149 | reason: I need to add error handling to the file reading function
150 | params:
151 | target_file: utils/read_file.py
152 | instructions: Add try-except block around the file reading operation
153 | code_edit: |
154 | // ... existing file reading code ...
155 | function newEdit() {{
156 | // new code here
157 | }}
158 | // ... existing file reading code ...
159 |
160 | 3. delete_file: Remove a file
161 | - Parameters: target_file (path)
162 | - Example:
163 | tool: delete_file
164 | reason: The temporary file is no longer needed
165 | params:
166 | target_file: temp.txt
167 |
168 | 4. grep_search: Search for patterns in files
169 | - Parameters: query, case_sensitive (optional), include_pattern (optional), exclude_pattern (optional)
170 | - Example:
171 | tool: grep_search
172 | reason: I need to find all occurrences of 'logger' in Python files
173 | params:
174 | query: logger
175 | include_pattern: "*.py"
176 | case_sensitive: false
177 |
178 | 5. list_dir: List contents of a directory
179 | - Parameters: relative_workspace_path
180 | - Example:
181 | tool: list_dir
182 | reason: I need to see all files in the utils directory
183 | params:
184 | relative_workspace_path: utils
185 | - Result: Returns a tree visualization of the directory structure
186 |
187 | 6. finish: End the process and provide final response
188 | - No parameters required
189 | - Example:
190 | tool: finish
191 | reason: I have completed the requested task of finding all logger instances
192 | params: {{}}
193 |
194 | Respond with a YAML object containing:
195 | ```yaml
196 | tool: one of: read_file, edit_file, delete_file, grep_search, list_dir, finish
197 | reason: |
198 | detailed explanation of why you chose this tool and what you intend to do
199 | if you chose finish, explain why no more actions are needed
200 | params:
201 | # parameters specific to the chosen tool
202 | ```
203 |
204 | If you believe no more actions are needed, use "finish" as the tool and explain why in the reason.
205 | """
206 |
207 | # Call LLM to decide action
208 | response = call_llm(prompt)
209 |
210 | # Look for YAML structure in the response
211 | yaml_content = ""
212 | if "```yaml" in response:
213 | yaml_blocks = response.split("```yaml")
214 | if len(yaml_blocks) > 1:
215 | yaml_content = yaml_blocks[1].split("```")[0].strip()
216 | elif "```yml" in response:
217 | yaml_blocks = response.split("```yml")
218 | if len(yaml_blocks) > 1:
219 | yaml_content = yaml_blocks[1].split("```")[0].strip()
220 | elif "```" in response:
221 | # Try to extract from generic code block
222 | yaml_blocks = response.split("```")
223 | if len(yaml_blocks) > 1:
224 | yaml_content = yaml_blocks[1].strip()
225 | else:
226 | # If no code blocks, try to use the entire response
227 | yaml_content = response.strip()
228 |
229 | if yaml_content:
230 | decision = yaml.safe_load(yaml_content)
231 |
232 | # Validate the required fields
233 | assert "tool" in decision, "Tool name is missing"
234 | assert "reason" in decision, "Reason is missing"
235 |
236 | # For tools other than "finish", params must be present
237 | if decision["tool"] != "finish":
238 | assert "params" in decision, "Parameters are missing"
239 | else:
240 | decision["params"] = {}
241 |
242 | return decision
243 | else:
244 | raise ValueError("No YAML object found in response")
245 |
246 | def post(self, shared: Dict[str, Any], prep_res: Any, exec_res: Dict[str, Any]) -> str:
247 | logger.info(f"MainDecisionAgent: Selected tool: {exec_res['tool']}")
248 |
249 | # Initialize history if not present
250 | if "history" not in shared:
251 | shared["history"] = []
252 |
253 | # Add this action to history
254 | shared["history"].append({
255 | "tool": exec_res["tool"],
256 | "reason": exec_res["reason"],
257 | "params": exec_res.get("params", {}),
258 | "result": None, # Will be filled in by action nodes
259 | "timestamp": datetime.now().isoformat()
260 | })
261 |
262 | # Return the action to take
263 | return exec_res["tool"]
264 |
265 | #############################################
266 | # Read File Action Node
267 | #############################################
268 | class ReadFileAction(Node):
269 | def prep(self, shared: Dict[str, Any]) -> str:
270 | # Get parameters from the last history entry
271 | history = shared.get("history", [])
272 | if not history:
273 | raise ValueError("No history found")
274 |
275 | last_action = history[-1]
276 | file_path = last_action["params"].get("target_file")
277 |
278 | if not file_path:
279 | raise ValueError("Missing target_file parameter")
280 |
281 | # Ensure path is relative to working directory
282 | working_dir = shared.get("working_dir", "")
283 | full_path = os.path.join(working_dir, file_path) if working_dir else file_path
284 |
285 | # Use the reason for logging instead of explanation
286 | reason = last_action.get("reason", "No reason provided")
287 | logger.info(f"ReadFileAction: {reason}")
288 |
289 | return full_path
290 |
291 | def exec(self, file_path: str) -> Tuple[str, bool]:
292 | # Call read_file utility which returns a tuple of (content, success)
293 | return read_file(file_path)
294 |
295 | def post(self, shared: Dict[str, Any], prep_res: str, exec_res: Tuple[str, bool]) -> str:
296 | # Unpack the tuple returned by read_file()
297 | content, success = exec_res
298 |
299 | # Update the result in the last history entry
300 | history = shared.get("history", [])
301 | if history:
302 | history[-1]["result"] = {
303 | "success": success,
304 | "content": content
305 | }
306 |
307 | #############################################
308 | # Grep Search Action Node
309 | #############################################
310 | class GrepSearchAction(Node):
311 | def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
312 | # Get parameters from the last history entry
313 | history = shared.get("history", [])
314 | if not history:
315 | raise ValueError("No history found")
316 |
317 | last_action = history[-1]
318 | params = last_action["params"]
319 |
320 | if "query" not in params:
321 | raise ValueError("Missing query parameter")
322 |
323 | # Use the reason for logging instead of explanation
324 | reason = last_action.get("reason", "No reason provided")
325 | logger.info(f"GrepSearchAction: {reason}")
326 |
327 | # Ensure paths are relative to working directory
328 | working_dir = shared.get("working_dir", "")
329 |
330 | return {
331 | "query": params["query"],
332 | "case_sensitive": params.get("case_sensitive", False),
333 | "include_pattern": params.get("include_pattern"),
334 | "exclude_pattern": params.get("exclude_pattern"),
335 | "working_dir": working_dir
336 | }
337 |
338 | def exec(self, params: Dict[str, Any]) -> Tuple[bool, List[Dict[str, Any]]]:
339 | # Use current directory if not specified
340 | working_dir = params.pop("working_dir", "")
341 |
342 | # Call grep_search utility which returns (success, matches)
343 | return grep_search(
344 | query=params["query"],
345 | case_sensitive=params.get("case_sensitive", False),
346 | include_pattern=params.get("include_pattern"),
347 | exclude_pattern=params.get("exclude_pattern"),
348 | working_dir=working_dir
349 | )
350 |
351 | def post(self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Tuple[bool, List[Dict[str, Any]]]) -> str:
352 | matches, success = exec_res
353 |
354 | # Update the result in the last history entry
355 | history = shared.get("history", [])
356 | if history:
357 | history[-1]["result"] = {
358 | "success": success,
359 | "matches": matches
360 | }
361 |
362 | #############################################
363 | # List Directory Action Node
364 | #############################################
365 | class ListDirAction(Node):
366 | def prep(self, shared: Dict[str, Any]) -> str:
367 | # Get parameters from the last history entry
368 | history = shared.get("history", [])
369 | if not history:
370 | raise ValueError("No history found")
371 |
372 | last_action = history[-1]
373 | path = last_action["params"].get("relative_workspace_path", ".")
374 |
375 | # Use the reason for logging instead of explanation
376 | reason = last_action.get("reason", "No reason provided")
377 | logger.info(f"ListDirAction: {reason}")
378 |
379 | # Ensure path is relative to working directory
380 | working_dir = shared.get("working_dir", "")
381 | full_path = os.path.join(working_dir, path) if working_dir else path
382 |
383 | return full_path
384 |
385 | def exec(self, path: str) -> Tuple[bool, str]:
386 | # Call list_dir utility which now returns (success, tree_str)
387 | success, tree_str = list_dir(path)
388 |
389 | return success, tree_str
390 |
391 | def post(self, shared: Dict[str, Any], prep_res: str, exec_res: Tuple[bool, str]) -> str:
392 | success, tree_str = exec_res
393 |
394 | # Update the result in the last history entry with the new structure
395 | history = shared.get("history", [])
396 | if history:
397 | history[-1]["result"] = {
398 | "success": success,
399 | "tree_visualization": tree_str
400 | }
401 |
402 | #############################################
403 | # Delete File Action Node
404 | #############################################
405 | class DeleteFileAction(Node):
406 | def prep(self, shared: Dict[str, Any]) -> str:
407 | # Get parameters from the last history entry
408 | history = shared.get("history", [])
409 | if not history:
410 | raise ValueError("No history found")
411 |
412 | last_action = history[-1]
413 | file_path = last_action["params"].get("target_file")
414 |
415 | if not file_path:
416 | raise ValueError("Missing target_file parameter")
417 |
418 | # Use the reason for logging instead of explanation
419 | reason = last_action.get("reason", "No reason provided")
420 | logger.info(f"DeleteFileAction: {reason}")
421 |
422 | # Ensure path is relative to working directory
423 | working_dir = shared.get("working_dir", "")
424 | full_path = os.path.join(working_dir, file_path) if working_dir else file_path
425 |
426 | return full_path
427 |
428 | def exec(self, file_path: str) -> Tuple[bool, str]:
429 | # Call delete_file utility which returns (success, message)
430 | return delete_file(file_path)
431 |
432 | def post(self, shared: Dict[str, Any], prep_res: str, exec_res: Tuple[bool, str]) -> str:
433 | success, message = exec_res
434 |
435 | # Update the result in the last history entry
436 | history = shared.get("history", [])
437 | if history:
438 | history[-1]["result"] = {
439 | "success": success,
440 | "message": message
441 | }
442 |
443 | #############################################
444 | # Read Target File Node (Edit Agent)
445 | #############################################
446 | class ReadTargetFileNode(Node):
447 | def prep(self, shared: Dict[str, Any]) -> str:
448 | # Get parameters from the last history entry
449 | history = shared.get("history", [])
450 | if not history:
451 | raise ValueError("No history found")
452 |
453 | last_action = history[-1]
454 | file_path = last_action["params"].get("target_file")
455 |
456 | if not file_path:
457 | raise ValueError("Missing target_file parameter")
458 |
459 | # Ensure path is relative to working directory
460 | working_dir = shared.get("working_dir", "")
461 | full_path = os.path.join(working_dir, file_path) if working_dir else file_path
462 |
463 | return full_path
464 |
465 | def exec(self, file_path: str) -> Tuple[str, bool]:
466 | # Call read_file utility which returns (content, success)
467 | return read_file(file_path)
468 |
469 | def post(self, shared: Dict[str, Any], prep_res: str, exec_res: Tuple[str, bool]) -> str:
470 | content, success = exec_res
471 | logger.info("ReadTargetFileNode: File read completed for editing")
472 |
473 | # Store file content in the history entry
474 | history = shared.get("history", [])
475 | if history:
476 | history[-1]["file_content"] = content
477 |
478 | #############################################
479 | # Analyze and Plan Changes Node
480 | #############################################
481 | class AnalyzeAndPlanNode(Node):
482 | def prep(self, shared: Dict[str, Any]) -> Dict[str, Any]:
483 | # Get history
484 | history = shared.get("history", [])
485 | if not history:
486 | raise ValueError("No history found")
487 |
488 | last_action = history[-1]
489 | file_content = last_action.get("file_content")
490 | instructions = last_action["params"].get("instructions")
491 | code_edit = last_action["params"].get("code_edit")
492 |
493 | if not file_content:
494 | raise ValueError("File content not found")
495 | if not instructions:
496 | raise ValueError("Missing instructions parameter")
497 | if not code_edit:
498 | raise ValueError("Missing code_edit parameter")
499 |
500 | return {
501 | "file_content": file_content,
502 | "instructions": instructions,
503 | "code_edit": code_edit
504 | }
505 |
506 | def exec(self, params: Dict[str, Any]) -> List[Dict[str, Any]]:
507 | file_content = params["file_content"]
508 | instructions = params["instructions"]
509 | code_edit = params["code_edit"]
510 |
511 | # File content as lines
512 | file_lines = file_content.split('\n')
513 | total_lines = len(file_lines)
514 |
515 | # Generate a prompt for the LLM to analyze the edit using YAML instead of JSON
516 | prompt = f"""
517 | As a code editing assistant, I need to convert the following code edit instruction
518 | and code edit pattern into specific edit operations (start_line, end_line, replacement).
519 |
520 | FILE CONTENT:
521 | {file_content}
522 |
523 | EDIT INSTRUCTIONS:
524 | {instructions}
525 |
526 | CODE EDIT PATTERN (markers like "// ... existing code ..." indicate unchanged code):
527 | {code_edit}
528 |
529 | Analyze the file content and the edit pattern to determine exactly where changes should be made.
530 | Be very careful with start and end lines. They are 1-indexed and inclusive. These will be REPLACED, not APPENDED!
531 | If you want APPEND, just copy that line as the first line of the replacement.
532 | Return a YAML object with your reasoning and an array of edit operations:
533 |
534 | ```yaml
535 | reasoning: |
536 | First explain your thinking process about how you're interpreting the edit pattern.
537 | Explain how you identified where the edits should be made in the original file.
538 | Describe any assumptions or decisions you made when determining the edit locations.
539 | You need to be very precise with the start and end lines! Reason why not 1 line before or after the start and end lines.
540 |
541 | operations:
542 | - start_line: 10
543 | end_line: 15
544 | replacement: |
545 | def process_file(filename):
546 | # New implementation with better error handling
547 | try:
548 | with open(filename, 'r') as f:
549 | return f.read()
550 | except FileNotFoundError:
551 | return None
552 |
553 | - start_line: 25
554 | end_line: 25
555 | replacement: |
556 | logger.info("File processing completed")
557 | ```
558 |
559 | For lines that include "// ... existing code ...", do not include them in the replacement.
560 | Instead, identify the exact lines they represent in the original file and set the line
561 | numbers accordingly. Start_line and end_line are 1-indexed.
562 |
563 | If the instruction indicates content should be appended to the file, set both start_line and end_line
564 | to the maximum line number + 1, which will add the content at the end of the file.
565 | """
566 |
567 | # Call LLM to analyze
568 | response = call_llm(prompt)
569 |
570 | # Look for YAML structure in the response
571 | yaml_content = ""
572 | if "```yaml" in response:
573 | yaml_blocks = response.split("```yaml")
574 | if len(yaml_blocks) > 1:
575 | yaml_content = yaml_blocks[1].split("```")[0].strip()
576 | elif "```yml" in response:
577 | yaml_blocks = response.split("```yml")
578 | if len(yaml_blocks) > 1:
579 | yaml_content = yaml_blocks[1].split("```")[0].strip()
580 | elif "```" in response:
581 | # Try to extract from generic code block
582 | yaml_blocks = response.split("```")
583 | if len(yaml_blocks) > 1:
584 | yaml_content = yaml_blocks[1].strip()
585 |
586 | if yaml_content:
587 | decision = yaml.safe_load(yaml_content)
588 |
589 | # Validate the required fields
590 | assert "reasoning" in decision, "Reasoning is missing"
591 | assert "operations" in decision, "Operations are missing"
592 |
593 | # Ensure operations is a list
594 | if not isinstance(decision["operations"], list):
595 | raise ValueError("Operations are not a list")
596 |
597 | # Validate operations
598 | for op in decision["operations"]:
599 | assert "start_line" in op, "start_line is missing"
600 | assert "end_line" in op, "end_line is missing"
601 | assert "replacement" in op, "replacement is missing"
602 | assert 1 <= op["start_line"] <= total_lines, f"start_line out of range: {op['start_line']}"
603 | assert 1 <= op["end_line"] <= total_lines, f"end_line out of range: {op['end_line']}"
604 | assert op["start_line"] <= op["end_line"], f"start_line > end_line: {op['start_line']} > {op['end_line']}"
605 |
606 | return decision
607 | else:
608 | raise ValueError("No YAML object found in response")
609 |
610 | def post(self, shared: Dict[str, Any], prep_res: Dict[str, Any], exec_res: Dict[str, Any]) -> str:
611 | # Store reasoning and edit operations in shared
612 | shared["edit_reasoning"] = exec_res.get("reasoning", "")
613 | shared["edit_operations"] = exec_res.get("operations", [])
614 |
615 |
616 |
617 | #############################################
618 | # Apply Changes Batch Node
619 | #############################################
620 | class ApplyChangesNode(BatchNode):
621 | def prep(self, shared: Dict[str, Any]) -> List[Dict[str, Any]]:
622 | # Get edit operations
623 | edit_operations = shared.get("edit_operations", [])
624 | if not edit_operations:
625 | logger.warning("No edit operations found")
626 | return []
627 |
628 | # Sort edit operations in descending order by start_line
629 | # This ensures that line numbers remain valid as we edit from bottom to top
630 | sorted_ops = sorted(edit_operations, key=lambda op: op["start_line"], reverse=True)
631 |
632 | # Get target file from history
633 | history = shared.get("history", [])
634 | if not history:
635 | raise ValueError("No history found")
636 |
637 | last_action = history[-1]
638 | target_file = last_action["params"].get("target_file")
639 |
640 | if not target_file:
641 | raise ValueError("Missing target_file parameter")
642 |
643 | # Ensure path is relative to working directory
644 | working_dir = shared.get("working_dir", "")
645 | full_path = os.path.join(working_dir, target_file) if working_dir else target_file
646 |
647 | # Attach file path to each operation
648 | for op in sorted_ops:
649 | op["target_file"] = full_path
650 |
651 | return sorted_ops
652 |
653 | def exec(self, op: Dict[str, Any]) -> Tuple[bool, str]:
654 | # Call replace_file utility which returns (success, message)
655 | return replace_file(
656 | target_file=op["target_file"],
657 | start_line=op["start_line"],
658 | end_line=op["end_line"],
659 | content=op["replacement"]
660 | )
661 |
662 | def post(self, shared: Dict[str, Any], prep_res: List[Dict[str, Any]], exec_res_list: List[Tuple[bool, str]]) -> str:
663 | # Check if all operations were successful
664 | all_successful = all(success for success, _ in exec_res_list)
665 |
666 | # Format results for history
667 | result_details = [
668 | {"success": success, "message": message}
669 | for success, message in exec_res_list
670 | ]
671 |
672 | # Update edit result in history
673 | history = shared.get("history", [])
674 | if history:
675 | history[-1]["result"] = {
676 | "success": all_successful,
677 | "operations": len(exec_res_list),
678 | "details": result_details,
679 | "reasoning": shared.get("edit_reasoning", "")
680 | }
681 |
682 | # Clear edit operations and reasoning after processing
683 | shared.pop("edit_operations", None)
684 | shared.pop("edit_reasoning", None)
685 |
686 |
687 |
688 | #############################################
689 | # Format Response Node
690 | #############################################
691 | class FormatResponseNode(Node):
692 | def prep(self, shared: Dict[str, Any]) -> List[Dict[str, Any]]:
693 | # Get history
694 | history = shared.get("history", [])
695 |
696 | return history
697 |
698 | def exec(self, history: List[Dict[str, Any]]) -> str:
699 | # If no history, return a generic message
700 | if not history:
701 | return "No actions were performed."
702 |
703 | # Generate a summary of actions for the LLM using the utility function
704 | actions_summary = format_history_summary(history)
705 |
706 | # Prompt for the LLM to generate the final response
707 | prompt = f"""
708 | You are a coding assistant. You have just performed a series of actions based on the
709 | user's request. Summarize what you did in a clear, helpful response.
710 |
711 | Here are the actions you performed:
712 | {actions_summary}
713 |
714 | Generate a comprehensive yet concise response that explains:
715 | 1. What actions were taken
716 | 2. What was found or modified
717 | 3. Any next steps the user might want to take
718 |
719 | IMPORTANT:
720 | - Focus on the outcomes and results, not the specific tools used
721 | - Write as if you are directly speaking to the user
722 | - When providing code examples or structured information, use YAML format enclosed in triple backticks
723 | """
724 |
725 | # Call LLM to generate response
726 | response = call_llm(prompt)
727 |
728 | return response
729 |
730 | def post(self, shared: Dict[str, Any], prep_res: List[Dict[str, Any]], exec_res: str) -> str:
731 | logger.info(f"###### Final Response Generated ######\n{exec_res}\n###### End of Response ######")
732 |
733 | # Store response in shared
734 | shared["response"] = exec_res
735 |
736 | return "done"
737 |
738 | #############################################
739 | # Edit Agent Flow
740 | #############################################
741 | def create_edit_agent() -> Flow:
742 | # Create nodes
743 | read_target = ReadTargetFileNode()
744 | analyze_plan = AnalyzeAndPlanNode()
745 | apply_changes = ApplyChangesNode()
746 |
747 | # Connect nodes using default action (no named actions)
748 | read_target >> analyze_plan
749 | analyze_plan >> apply_changes
750 |
751 | # Create flow
752 | return Flow(start=read_target)
753 |
754 | #############################################
755 | # Main Flow
756 | #############################################
757 | def create_main_flow() -> Flow:
758 | # Create nodes
759 | main_agent = MainDecisionAgent()
760 | read_action = ReadFileAction()
761 | grep_action = GrepSearchAction()
762 | list_dir_action = ListDirAction()
763 | delete_action = DeleteFileAction()
764 | edit_agent = create_edit_agent()
765 | format_response = FormatResponseNode()
766 |
767 | # Connect main agent to action nodes
768 | main_agent - "read_file" >> read_action
769 | main_agent - "grep_search" >> grep_action
770 | main_agent - "list_dir" >> list_dir_action
771 | main_agent - "delete_file" >> delete_action
772 | main_agent - "edit_file" >> edit_agent
773 | main_agent - "finish" >> format_response
774 |
775 | # Connect action nodes back to main agent using default action
776 | read_action >> main_agent
777 | grep_action >> main_agent
778 | list_dir_action >> main_agent
779 | delete_action >> main_agent
780 | edit_agent >> main_agent
781 |
782 | # Create flow
783 | return Flow(start=main_agent)
784 |
785 | # Create the main flow
786 | coding_agent_flow = create_main_flow()
--------------------------------------------------------------------------------
/.cursorrules:
--------------------------------------------------------------------------------
1 | ================================================
2 | File: docs/guide.md
3 | ================================================
4 | ---
5 | layout: default
6 | title: "Build your LLM App"
7 | ---
8 |
9 | # LLM System Design Playbook
10 |
11 | > If you are an AI assistant involved in building LLM Systems, read this guide **VERY, VERY** carefully! This is the most important chapter in the entire document. Throughout development, you should always (1) start with a small and simple solution, (2) design at a high level (`docs/design.md`) before implementation, and (3) frequently ask humans for feedback and clarification.
12 | {: .warning }
13 |
14 | ## System Design Steps
15 |
16 | These system designs should be a collaboration between humans and AI assistants:
17 |
18 | | Stage | Human | AI | Comment |
19 | |:-----------------------|:----------:|:---------:|:------------------------------------------------------------------------|
20 | | 1. Requirements | ★★★ High | ★☆☆ Low | Humans understand the requirements and context. |
21 | | 2. Flow | ★★☆ Medium | ★★☆ Medium | Humans specify the high-level design, and the AI fills in the details. |
22 | | 3. Utilities | ★★☆ Medium | ★★☆ Medium | Humans provide available external APIs and integrations, and the AI helps with implementation. |
23 | | 4. Node | ★☆☆ Low | ★★★ High | The AI helps design the node types and data handling based on the flow. |
24 | | 5. Implementation | ★☆☆ Low | ★★★ High | The AI implements the flow based on the design. |
25 | | 6. Optimization | ★★☆ Medium | ★★☆ Medium | Humans evaluate the results, and the AI helps optimize. |
26 | | 7. Reliability | ★☆☆ Low | ★★★ High | The AI writes test cases and addresses corner cases. |
27 |
28 | 1. **Requirements**: Clarify the requirements for your project, and evaluate whether an AI system is a good fit. AI systems are:
29 | - suitable for routine tasks that require common sense (e.g., filling out forms, replying to emails).
30 | - suitable for creative tasks where all inputs are provided (e.g., building slides, writing SQL).
31 | - **NOT** suitable for tasks that are highly ambiguous and require complex info (e.g., building a startup).
32 | - > **If a human can’t solve it, an LLM can’t automate it!** Before building an LLM system, thoroughly understand the problem by manually solving example inputs to develop intuition.
33 | {: .best-practice }
34 |
35 |
36 | 2. **Flow Design**: Outline at a high level, describe how your AI system orchestrates nodes.
37 | - Identify applicable design patterns (e.g., [Map Reduce](./design_pattern/mapreduce.md), [Agent](./design_pattern/agent.md), [RAG](./design_pattern/rag.md)).
38 | - For each node, provide a high-level purpose description.
39 | - Draw the Flow in mermaid diagram.
40 |
41 | 3. **Utilities**: Based on the Flow Design, identify and implement necessary utility functions.
42 | - Think of your AI system as the brain. It needs a body—these *external utility functions*—to interact with the real world:
43 |
44 |
45 | - Reading inputs (e.g., retrieving Slack messages, reading emails)
46 | - Writing outputs (e.g., generating reports, sending emails)
47 | - Using external tools (e.g., calling LLMs, searching the web)
48 |
49 | - NOTE: *LLM-based tasks* (e.g., summarizing text, analyzing sentiment) are **NOT** utility functions; rather, they are *core functions* internal in the AI system.
50 | - > **Start small!** Only include the most important ones to begin with!
51 | {: .best-practice }
52 |
53 |
54 | 4. **Node Design**: Plan how each node will read and write data, and use utility functions.
55 | - Start with the shared data design
56 | - For simple systems, use an in-memory dictionary.
57 | - For more complex systems or when persistence is required, use a database.
58 | - **Remove Data Redundancy**: Don’t store the same data. Use in-memory references or foreign keys.
59 | - For each node, design its type and data handling:
60 | - `type`: Decide between Regular, Batch, or Async
61 | - `prep`: How the node reads data
62 | - `exec`: Which utility function this node uses
63 | - `post`: How the node writes data
64 |
65 | 5. **Implementation**: Implement the initial nodes and flows based on the design.
66 | - **“Keep it simple, stupid!”** Avoid complex features and full-scale type checking.
67 | - **FAIL FAST**! Avoid `try` logic so you can quickly identify any weak points in the system.
68 | - Add logging throughout the code to facilitate debugging.
69 |
70 | 6. **Optimization**:
71 | - **Use Intuition**: For a quick initial evaluation, human intuition is often a good start.
72 | - **Redesign Flow (Back to Step 3)**: Consider breaking down tasks further, introducing agentic decisions, or better managing input contexts.
73 | - If your flow design is already solid, move on to micro-optimizations:
74 | - **Prompt Engineering**: Use clear, specific instructions with examples to reduce ambiguity.
75 | - **In-Context Learning**: Provide robust examples for tasks that are difficult to specify with instructions alone.
76 |
77 | - > **You’ll likely iterate a lot!** Expect to repeat Steps 3–6 hundreds of times.
78 | >
79 | >
80 | {: .best-practice }
81 |
82 | 7. **Reliability**
83 | - **Node Retries**: Add checks in the node `exec` to ensure outputs meet requirements, and consider increasing `max_retries` and `wait` times.
84 | - **Logging and Visualization**: Maintain logs of all attempts and visualize node results for easier debugging.
85 | - **Self-Evaluation**: Add a separate node (powered by an LLM) to review outputs when results are uncertain.
86 |
87 | ## Example LLM Project File Structure
88 |
89 | ```
90 | my_project/
91 | ├── main.py
92 | ├── flow.py
93 | ├── utils/
94 | │ ├── __init__.py
95 | │ ├── call_llm.py
96 | │ └── search_web.py
97 | ├── requirements.txt
98 | └── docs/
99 | └── design.md
100 | ```
101 |
102 | - **`docs/design.md`**: Contains project documentation for each step above. This should be high-level and no-code.
103 | - **`utils/`**: Contains all utility functions.
104 | - It’s recommended to dedicate one Python file to each API call, for example `call_llm.py` or `search_web.py`.
105 | - Each file should also include a `main()` function to try that API call
106 | - **`flow.py`**: Implements the system's flow, starting with node definitions followed by the overall structure.
107 | - **`main.py`**: Serves as the project’s entry point.
108 |
109 | ================================================
110 | File: docs/index.md
111 | ================================================
112 | ---
113 | layout: default
114 | title: "Home"
115 | nav_order: 1
116 | ---
117 |
118 | # Pocket Flow
119 |
120 | A [100-line](https://github.com/the-pocket/PocketFlow/blob/main/pocketflow/__init__.py) minimalist LLM framework for *Agents, Task Decomposition, RAG, etc*.
121 |
122 | We model the LLM workflow as a **Graph + Shared Store**:
123 | - **Nodes** handle simple (LLM) tasks.
124 | - Nodes connect through **Actions** (labeled edges) for *Agents*.
125 | - **Flows** orchestrate a directed graph of Nodes for *Task Decomposition*.
126 | - Nodes within Flows communicate through a **Shared Store**.
127 | - A Flow can be used as a Node (for **Nesting**).
128 | - **Batch** Nodes/Flows for data-intensive tasks.
129 | - **Async** Nodes/Flows allow waits for asynchronous tasks.
130 |
131 |
132 |
133 |
134 |
135 | > If you are an AI assistant reading this document, please read all the non-optional and non-advanced parts. If you are helping a human build an LLM app, please carefully review the [Development Playbook](./guide.md).
136 | {: .warning }
137 |
138 | ## Core Abstraction
139 |
140 | - [Node](./core_abstraction/node.md)
141 | - [Flow](./core_abstraction/flow.md)
142 | - [Communication](./core_abstraction/communication.md)
143 | - [Batch](./core_abstraction/batch.md)
144 | - [(Advanced) Async](./core_abstraction/async.md)
145 | - [(Advanced) Parallel](./core_abstraction/parallel.md)
146 |
147 | ## Utility Function
148 |
149 | - [LLM Wrapper](./utility_function/llm.md)
150 | - [Tool](./utility_function/tool.md)
151 | - [(Optional) Viz and Debug](./utility_function/viz.md)
152 | - Chunking
153 |
154 | > We do not provide built-in utility functions. Example implementations are provided as reference.
155 | {: .warning }
156 |
157 |
158 | ## Design Pattern
159 |
160 | - [Structured Output](./design_pattern/structure.md)
161 | - [Workflow](./design_pattern/workflow.md)
162 | - [Map Reduce](./design_pattern/mapreduce.md)
163 | - [RAG](./design_pattern/rag.md)
164 | - [Agent](./design_pattern/agent.md)
165 | - [(Optional) Chat Memory](./design_pattern/memory.md)
166 | - [(Advanced) Multi-Agents](./design_pattern/multi_agent.md)
167 | - Evaluation
168 |
169 | ## [Develop your LLM Apps](./guide.md)
170 |
171 | ================================================
172 | File: docs/core_abstraction/async.md
173 | ================================================
174 | ---
175 | layout: default
176 | title: "(Advanced) Async"
177 | parent: "Core Abstraction"
178 | nav_order: 5
179 | ---
180 |
181 | # (Advanced) Async
182 |
183 | **Async** Nodes implement `prep_async()`, `exec_async()`, `exec_fallback_async()`, and/or `post_async()`. This is useful for:
184 |
185 | 1. **prep_async()**: For *fetching/reading data (files, APIs, DB)* in an I/O-friendly way.
186 | 2. **exec_async()**: Typically used for async LLM calls.
187 | 3. **post_async()**: For *awaiting user feedback*, *coordinating across multi-agents* or any additional async steps after `exec_async()`.
188 |
189 | **Note**: `AsyncNode` must be wrapped in `AsyncFlow`. `AsyncFlow` can also include regular (sync) nodes.
190 |
191 | ### Example
192 |
193 | ```python
194 | class SummarizeThenVerify(AsyncNode):
195 | async def prep_async(self, shared):
196 | # Example: read a file asynchronously
197 | doc_text = await read_file_async(shared["doc_path"])
198 | return doc_text
199 |
200 | async def exec_async(self, prep_res):
201 | # Example: async LLM call
202 | summary = await call_llm_async(f"Summarize: {prep_res}")
203 | return summary
204 |
205 | async def post_async(self, shared, prep_res, exec_res):
206 | # Example: wait for user feedback
207 | decision = await gather_user_feedback(exec_res)
208 | if decision == "approve":
209 | shared["summary"] = exec_res
210 | return "approve"
211 | return "deny"
212 |
213 | summarize_node = SummarizeThenVerify()
214 | final_node = Finalize()
215 |
216 | # Define transitions
217 | summarize_node - "approve" >> final_node
218 | summarize_node - "deny" >> summarize_node # retry
219 |
220 | flow = AsyncFlow(start=summarize_node)
221 |
222 | async def main():
223 | shared = {"doc_path": "document.txt"}
224 | await flow.run_async(shared)
225 | print("Final Summary:", shared.get("summary"))
226 |
227 | asyncio.run(main())
228 | ```
229 |
230 | ================================================
231 | File: docs/core_abstraction/batch.md
232 | ================================================
233 | ---
234 | layout: default
235 | title: "Batch"
236 | parent: "Core Abstraction"
237 | nav_order: 4
238 | ---
239 |
240 | # Batch
241 |
242 | **Batch** makes it easier to handle large inputs in one Node or **rerun** a Flow multiple times. Example use cases:
243 | - **Chunk-based** processing (e.g., splitting large texts).
244 | - **Iterative** processing over lists of input items (e.g., user queries, files, URLs).
245 |
246 | ## 1. BatchNode
247 |
248 | A **BatchNode** extends `Node` but changes `prep()` and `exec()`:
249 |
250 | - **`prep(shared)`**: returns an **iterable** (e.g., list, generator).
251 | - **`exec(item)`**: called **once** per item in that iterable.
252 | - **`post(shared, prep_res, exec_res_list)`**: after all items are processed, receives a **list** of results (`exec_res_list`) and returns an **Action**.
253 |
254 |
255 | ### Example: Summarize a Large File
256 |
257 | ```python
258 | class MapSummaries(BatchNode):
259 | def prep(self, shared):
260 | # Suppose we have a big file; chunk it
261 | content = shared["data"]
262 | chunk_size = 10000
263 | chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
264 | return chunks
265 |
266 | def exec(self, chunk):
267 | prompt = f"Summarize this chunk in 10 words: {chunk}"
268 | summary = call_llm(prompt)
269 | return summary
270 |
271 | def post(self, shared, prep_res, exec_res_list):
272 | combined = "\n".join(exec_res_list)
273 | shared["summary"] = combined
274 | return "default"
275 |
276 | map_summaries = MapSummaries()
277 | flow = Flow(start=map_summaries)
278 | flow.run(shared)
279 | ```
280 |
281 | ---
282 |
283 | ## 2. BatchFlow
284 |
285 | A **BatchFlow** runs a **Flow** multiple times, each time with different `params`. Think of it as a loop that replays the Flow for each parameter set.
286 |
287 |
288 | ### Example: Summarize Many Files
289 |
290 | ```python
291 | class SummarizeAllFiles(BatchFlow):
292 | def prep(self, shared):
293 | # Return a list of param dicts (one per file)
294 | filenames = list(shared["data"].keys()) # e.g., ["file1.txt", "file2.txt", ...]
295 | return [{"filename": fn} for fn in filenames]
296 |
297 | # Suppose we have a per-file Flow (e.g., load_file >> summarize >> reduce):
298 | summarize_file = SummarizeFile(start=load_file)
299 |
300 | # Wrap that flow into a BatchFlow:
301 | summarize_all_files = SummarizeAllFiles(start=summarize_file)
302 | summarize_all_files.run(shared)
303 | ```
304 |
305 | ### Under the Hood
306 | 1. `prep(shared)` returns a list of param dicts—e.g., `[{filename: "file1.txt"}, {filename: "file2.txt"}, ...]`.
307 | 2. The **BatchFlow** loops through each dict. For each one:
308 | - It merges the dict with the BatchFlow’s own `params`.
309 | - It calls `flow.run(shared)` using the merged result.
310 | 3. This means the sub-Flow is run **repeatedly**, once for every param dict.
311 |
312 | ---
313 |
314 | ## 3. Nested or Multi-Level Batches
315 |
316 | You can nest a **BatchFlow** in another **BatchFlow**. For instance:
317 | - **Outer** batch: returns a list of diretory param dicts (e.g., `{"directory": "/pathA"}`, `{"directory": "/pathB"}`, ...).
318 | - **Inner** batch: returning a list of per-file param dicts.
319 |
320 | At each level, **BatchFlow** merges its own param dict with the parent’s. By the time you reach the **innermost** node, the final `params` is the merged result of **all** parents in the chain. This way, a nested structure can keep track of the entire context (e.g., directory + file name) at once.
321 |
322 | ```python
323 |
324 | class FileBatchFlow(BatchFlow):
325 | def prep(self, shared):
326 | directory = self.params["directory"]
327 | # e.g., files = ["file1.txt", "file2.txt", ...]
328 | files = [f for f in os.listdir(directory) if f.endswith(".txt")]
329 | return [{"filename": f} for f in files]
330 |
331 | class DirectoryBatchFlow(BatchFlow):
332 | def prep(self, shared):
333 | directories = [ "/path/to/dirA", "/path/to/dirB"]
334 | return [{"directory": d} for d in directories]
335 |
336 | # MapSummaries have params like {"directory": "/path/to/dirA", "filename": "file1.txt"}
337 | inner_flow = FileBatchFlow(start=MapSummaries())
338 | outer_flow = DirectoryBatchFlow(start=inner_flow)
339 | ```
340 |
341 | ================================================
342 | File: docs/core_abstraction/communication.md
343 | ================================================
344 | ---
345 | layout: default
346 | title: "Communication"
347 | parent: "Core Abstraction"
348 | nav_order: 3
349 | ---
350 |
351 | # Communication
352 |
353 | Nodes and Flows **communicate** in two ways:
354 |
355 | 1. **Shared Store (recommended)**
356 |
357 | - A global data structure (often an in-mem dict) that all nodes can read and write by `prep()` and `post()`.
358 | - Great for data results, large content, or anything multiple nodes need.
359 | - You shall design the data structure and populate it ahead.
360 |
361 | 2. **Params (only for [Batch](./batch.md))**
362 | - Each node has a local, ephemeral `params` dict passed in by the **parent Flow**, used as an identifier for tasks. Parameter keys and values shall be **immutable**.
363 | - Good for identifiers like filenames or numeric IDs, in Batch mode.
364 |
365 | If you know memory management, think of the **Shared Store** like a **heap** (shared by all function calls), and **Params** like a **stack** (assigned by the caller).
366 |
367 | > Use `Shared Store` for almost all cases. It's flexible and easy to manage. It separates *Data Schema* from *Compute Logic*, making the code easier to maintain. `Params` is more a syntax sugar for [Batch](./batch.md).
368 | {: .best-practice }
369 |
370 | ---
371 |
372 | ## 1. Shared Store
373 |
374 | ### Overview
375 |
376 | A shared store is typically an in-mem dictionary, like:
377 | ```python
378 | shared = {"data": {}, "summary": {}, "config": {...}, ...}
379 | ```
380 |
381 | It can also contain local file handlers, DB connections, or a combination for persistence. We recommend deciding the data structure or DB schema first based on your app requirements.
382 |
383 | ### Example
384 |
385 | ```python
386 | class LoadData(Node):
387 | def post(self, shared, prep_res, exec_res):
388 | # We write data to shared store
389 | shared["data"] = "Some text content"
390 | return None
391 |
392 | class Summarize(Node):
393 | def prep(self, shared):
394 | # We read data from shared store
395 | return shared["data"]
396 |
397 | def exec(self, prep_res):
398 | # Call LLM to summarize
399 | prompt = f"Summarize: {prep_res}"
400 | summary = call_llm(prompt)
401 | return summary
402 |
403 | def post(self, shared, prep_res, exec_res):
404 | # We write summary to shared store
405 | shared["summary"] = exec_res
406 | return "default"
407 |
408 | load_data = LoadData()
409 | summarize = Summarize()
410 | load_data >> summarize
411 | flow = Flow(start=load_data)
412 |
413 | shared = {}
414 | flow.run(shared)
415 | ```
416 |
417 | Here:
418 | - `LoadData` writes to `shared["data"]`.
419 | - `Summarize` reads from `shared["data"]`, summarizes, and writes to `shared["summary"]`.
420 |
421 | ---
422 |
423 | ## 2. Params
424 |
425 | **Params** let you store *per-Node* or *per-Flow* config that doesn't need to live in the shared store. They are:
426 | - **Immutable** during a Node's run cycle (i.e., they don't change mid-`prep->exec->post`).
427 | - **Set** via `set_params()`.
428 | - **Cleared** and updated each time a parent Flow calls it.
429 |
430 | > Only set the uppermost Flow params because others will be overwritten by the parent Flow.
431 | >
432 | > If you need to set child node params, see [Batch](./batch.md).
433 | {: .warning }
434 |
435 | Typically, **Params** are identifiers (e.g., file name, page number). Use them to fetch the task you assigned or write to a specific part of the shared store.
436 |
437 | ### Example
438 |
439 | ```python
440 | # 1) Create a Node that uses params
441 | class SummarizeFile(Node):
442 | def prep(self, shared):
443 | # Access the node's param
444 | filename = self.params["filename"]
445 | return shared["data"].get(filename, "")
446 |
447 | def exec(self, prep_res):
448 | prompt = f"Summarize: {prep_res}"
449 | return call_llm(prompt)
450 |
451 | def post(self, shared, prep_res, exec_res):
452 | filename = self.params["filename"]
453 | shared["summary"][filename] = exec_res
454 | return "default"
455 |
456 | # 2) Set params
457 | node = SummarizeFile()
458 |
459 | # 3) Set Node params directly (for testing)
460 | node.set_params({"filename": "doc1.txt"})
461 | node.run(shared)
462 |
463 | # 4) Create Flow
464 | flow = Flow(start=node)
465 |
466 | # 5) Set Flow params (overwrites node params)
467 | flow.set_params({"filename": "doc2.txt"})
468 | flow.run(shared) # The node summarizes doc2, not doc1
469 | ```
470 |
471 | ================================================
472 | File: docs/core_abstraction/flow.md
473 | ================================================
474 | ---
475 | layout: default
476 | title: "Flow"
477 | parent: "Core Abstraction"
478 | nav_order: 2
479 | ---
480 |
481 | # Flow
482 |
483 | A **Flow** orchestrates a graph of Nodes. You can chain Nodes in a sequence or create branching depending on the **Actions** returned from each Node's `post()`.
484 |
485 | ## 1. Action-based Transitions
486 |
487 | Each Node's `post()` returns an **Action** string. By default, if `post()` doesn't return anything, we treat that as `"default"`.
488 |
489 | You define transitions with the syntax:
490 |
491 | 1. **Basic default transition**: `node_a >> node_b`
492 | This means if `node_a.post()` returns `"default"`, go to `node_b`.
493 | (Equivalent to `node_a - "default" >> node_b`)
494 |
495 | 2. **Named action transition**: `node_a - "action_name" >> node_b`
496 | This means if `node_a.post()` returns `"action_name"`, go to `node_b`.
497 |
498 | It's possible to create loops, branching, or multi-step flows.
499 |
500 | ## 2. Creating a Flow
501 |
502 | A **Flow** begins with a **start** node. You call `Flow(start=some_node)` to specify the entry point. When you call `flow.run(shared)`, it executes the start node, looks at its returned Action from `post()`, follows the transition, and continues until there's no next node.
503 |
504 | ### Example: Simple Sequence
505 |
506 | Here's a minimal flow of two nodes in a chain:
507 |
508 | ```python
509 | node_a >> node_b
510 | flow = Flow(start=node_a)
511 | flow.run(shared)
512 | ```
513 |
514 | - When you run the flow, it executes `node_a`.
515 | - Suppose `node_a.post()` returns `"default"`.
516 | - The flow then sees `"default"` Action is linked to `node_b` and runs `node_b`.
517 | - `node_b.post()` returns `"default"` but we didn't define `node_b >> something_else`. So the flow ends there.
518 |
519 | ### Example: Branching & Looping
520 |
521 | Here's a simple expense approval flow that demonstrates branching and looping. The `ReviewExpense` node can return three possible Actions:
522 |
523 | - `"approved"`: expense is approved, move to payment processing
524 | - `"needs_revision"`: expense needs changes, send back for revision
525 | - `"rejected"`: expense is denied, finish the process
526 |
527 | We can wire them like this:
528 |
529 | ```python
530 | # Define the flow connections
531 | review - "approved" >> payment # If approved, process payment
532 | review - "needs_revision" >> revise # If needs changes, go to revision
533 | review - "rejected" >> finish # If rejected, finish the process
534 |
535 | revise >> review # After revision, go back for another review
536 | payment >> finish # After payment, finish the process
537 |
538 | flow = Flow(start=review)
539 | ```
540 |
541 | Let's see how it flows:
542 |
543 | 1. If `review.post()` returns `"approved"`, the expense moves to the `payment` node
544 | 2. If `review.post()` returns `"needs_revision"`, it goes to the `revise` node, which then loops back to `review`
545 | 3. If `review.post()` returns `"rejected"`, it moves to the `finish` node and stops
546 |
547 | ```mermaid
548 | flowchart TD
549 | review[Review Expense] -->|approved| payment[Process Payment]
550 | review -->|needs_revision| revise[Revise Report]
551 | review -->|rejected| finish[Finish Process]
552 |
553 | revise --> review
554 | payment --> finish
555 | ```
556 |
557 | ### Running Individual Nodes vs. Running a Flow
558 |
559 | - `node.run(shared)`: Just runs that node alone (calls `prep->exec->post()`), returns an Action.
560 | - `flow.run(shared)`: Executes from the start node, follows Actions to the next node, and so on until the flow can't continue.
561 |
562 | > `node.run(shared)` **does not** proceed to the successor.
563 | > This is mainly for debugging or testing a single node.
564 | >
565 | > Always use `flow.run(...)` in production to ensure the full pipeline runs correctly.
566 | {: .warning }
567 |
568 | ## 3. Nested Flows
569 |
570 | A **Flow** can act like a Node, which enables powerful composition patterns. This means you can:
571 |
572 | 1. Use a Flow as a Node within another Flow's transitions.
573 | 2. Combine multiple smaller Flows into a larger Flow for reuse.
574 | 3. Node `params` will be a merging of **all** parents' `params`.
575 |
576 | ### Flow's Node Methods
577 |
578 | A **Flow** is also a **Node**, so it will run `prep()` and `post()`. However:
579 |
580 | - It **won't** run `exec()`, as its main logic is to orchestrate its nodes.
581 | - `post()` always receives `None` for `exec_res` and should instead get the flow execution results from the shared store.
582 |
583 | ### Basic Flow Nesting
584 |
585 | Here's how to connect a flow to another node:
586 |
587 | ```python
588 | # Create a sub-flow
589 | node_a >> node_b
590 | subflow = Flow(start=node_a)
591 |
592 | # Connect it to another node
593 | subflow >> node_c
594 |
595 | # Create the parent flow
596 | parent_flow = Flow(start=subflow)
597 | ```
598 |
599 | When `parent_flow.run()` executes:
600 | 1. It starts `subflow`
601 | 2. `subflow` runs through its nodes (`node_a->node_b`)
602 | 3. After `subflow` completes, execution continues to `node_c`
603 |
604 | ### Example: Order Processing Pipeline
605 |
606 | Here's a practical example that breaks down order processing into nested flows:
607 |
608 | ```python
609 | # Payment processing sub-flow
610 | validate_payment >> process_payment >> payment_confirmation
611 | payment_flow = Flow(start=validate_payment)
612 |
613 | # Inventory sub-flow
614 | check_stock >> reserve_items >> update_inventory
615 | inventory_flow = Flow(start=check_stock)
616 |
617 | # Shipping sub-flow
618 | create_label >> assign_carrier >> schedule_pickup
619 | shipping_flow = Flow(start=create_label)
620 |
621 | # Connect the flows into a main order pipeline
622 | payment_flow >> inventory_flow >> shipping_flow
623 |
624 | # Create the master flow
625 | order_pipeline = Flow(start=payment_flow)
626 |
627 | # Run the entire pipeline
628 | order_pipeline.run(shared_data)
629 | ```
630 |
631 | This creates a clean separation of concerns while maintaining a clear execution path:
632 |
633 | ```mermaid
634 | flowchart LR
635 | subgraph order_pipeline[Order Pipeline]
636 | subgraph paymentFlow["Payment Flow"]
637 | A[Validate Payment] --> B[Process Payment] --> C[Payment Confirmation]
638 | end
639 |
640 | subgraph inventoryFlow["Inventory Flow"]
641 | D[Check Stock] --> E[Reserve Items] --> F[Update Inventory]
642 | end
643 |
644 | subgraph shippingFlow["Shipping Flow"]
645 | G[Create Label] --> H[Assign Carrier] --> I[Schedule Pickup]
646 | end
647 |
648 | paymentFlow --> inventoryFlow
649 | inventoryFlow --> shippingFlow
650 | end
651 | ```
652 |
653 | ================================================
654 | File: docs/core_abstraction/node.md
655 | ================================================
656 | ---
657 | layout: default
658 | title: "Node"
659 | parent: "Core Abstraction"
660 | nav_order: 1
661 | ---
662 |
663 | # Node
664 |
665 | A **Node** is the smallest building block. Each Node has 3 steps `prep->exec->post`:
666 |
667 |
668 |
669 |
670 |
671 | 1. `prep(shared)`
672 | - **Read and preprocess data** from `shared` store.
673 | - Examples: *query DB, read files, or serialize data into a string*.
674 | - Return `prep_res`, which is used by `exec()` and `post()`.
675 |
676 | 2. `exec(prep_res)`
677 | - **Execute compute logic**, with optional retries and error handling (below).
678 | - Examples: *(mostly) LLM calls, remote APIs, tool use*.
679 | - ⚠️ This shall be only for compute and **NOT** access `shared`.
680 | - ⚠️ If retries enabled, ensure idempotent implementation.
681 | - Return `exec_res`, which is passed to `post()`.
682 |
683 | 3. `post(shared, prep_res, exec_res)`
684 | - **Postprocess and write data** back to `shared`.
685 | - Examples: *update DB, change states, log results*.
686 | - **Decide the next action** by returning a *string* (`action = "default"` if *None*).
687 |
688 | > **Why 3 steps?** To enforce the principle of *separation of concerns*. The data storage and data processing are operated separately.
689 | >
690 | > All steps are *optional*. E.g., you can only implement `prep` and `post` if you just need to process data.
691 | {: .note }
692 |
693 | ### Fault Tolerance & Retries
694 |
695 | You can **retry** `exec()` if it raises an exception via two parameters when define the Node:
696 |
697 | - `max_retries` (int): Max times to run `exec()`. The default is `1` (**no** retry).
698 | - `wait` (int): The time to wait (in **seconds**) before next retry. By default, `wait=0` (no waiting).
699 | `wait` is helpful when you encounter rate-limits or quota errors from your LLM provider and need to back off.
700 |
701 | ```python
702 | my_node = SummarizeFile(max_retries=3, wait=10)
703 | ```
704 |
705 | When an exception occurs in `exec()`, the Node automatically retries until:
706 |
707 | - It either succeeds, or
708 | - The Node has retried `max_retries - 1` times already and fails on the last attempt.
709 |
710 | You can get the current retry times (0-based) from `self.cur_retry`.
711 |
712 | ```python
713 | class RetryNode(Node):
714 | def exec(self, prep_res):
715 | print(f"Retry {self.cur_retry} times")
716 | raise Exception("Failed")
717 | ```
718 |
719 | ### Graceful Fallback
720 |
721 | To **gracefully handle** the exception (after all retries) rather than raising it, override:
722 |
723 | ```python
724 | def exec_fallback(self, prep_res, exc):
725 | raise exc
726 | ```
727 |
728 | By default, it just re-raises exception. But you can return a fallback result instead, which becomes the `exec_res` passed to `post()`.
729 |
730 | ### Example: Summarize file
731 |
732 | ```python
733 | class SummarizeFile(Node):
734 | def prep(self, shared):
735 | return shared["data"]
736 |
737 | def exec(self, prep_res):
738 | if not prep_res:
739 | return "Empty file content"
740 | prompt = f"Summarize this text in 10 words: {prep_res}"
741 | summary = call_llm(prompt) # might fail
742 | return summary
743 |
744 | def exec_fallback(self, prep_res, exc):
745 | # Provide a simple fallback instead of crashing
746 | return "There was an error processing your request."
747 |
748 | def post(self, shared, prep_res, exec_res):
749 | shared["summary"] = exec_res
750 | # Return "default" by not returning
751 |
752 | summarize_node = SummarizeFile(max_retries=3)
753 |
754 | # node.run() calls prep->exec->post
755 | # If exec() fails, it retries up to 3 times before calling exec_fallback()
756 | action_result = summarize_node.run(shared)
757 |
758 | print("Action returned:", action_result) # "default"
759 | print("Summary stored:", shared["summary"])
760 | ```
761 |
762 | ================================================
763 | File: docs/core_abstraction/parallel.md
764 | ================================================
765 | ---
766 | layout: default
767 | title: "(Advanced) Parallel"
768 | parent: "Core Abstraction"
769 | nav_order: 6
770 | ---
771 |
772 | # (Advanced) Parallel
773 |
774 | **Parallel** Nodes and Flows let you run multiple **Async** Nodes and Flows **concurrently**—for example, summarizing multiple texts at once. This can improve performance by overlapping I/O and compute.
775 |
776 | > Because of Python’s GIL, parallel nodes and flows can’t truly parallelize CPU-bound tasks (e.g., heavy numerical computations). However, they excel at overlapping I/O-bound work—like LLM calls, database queries, API requests, or file I/O.
777 | {: .warning }
778 |
779 | > - **Ensure Tasks Are Independent**: If each item depends on the output of a previous item, **do not** parallelize.
780 | >
781 | > - **Beware of Rate Limits**: Parallel calls can **quickly** trigger rate limits on LLM services. You may need a **throttling** mechanism (e.g., semaphores or sleep intervals).
782 | >
783 | > - **Consider Single-Node Batch APIs**: Some LLMs offer a **batch inference** API where you can send multiple prompts in a single call. This is more complex to implement but can be more efficient than launching many parallel requests and mitigates rate limits.
784 | {: .best-practice }
785 |
786 | ## AsyncParallelBatchNode
787 |
788 | Like **AsyncBatchNode**, but run `exec_async()` in **parallel**:
789 |
790 | ```python
791 | class ParallelSummaries(AsyncParallelBatchNode):
792 | async def prep_async(self, shared):
793 | # e.g., multiple texts
794 | return shared["texts"]
795 |
796 | async def exec_async(self, text):
797 | prompt = f"Summarize: {text}"
798 | return await call_llm_async(prompt)
799 |
800 | async def post_async(self, shared, prep_res, exec_res_list):
801 | shared["summary"] = "\n\n".join(exec_res_list)
802 | return "default"
803 |
804 | node = ParallelSummaries()
805 | flow = AsyncFlow(start=node)
806 | ```
807 |
808 | ## AsyncParallelBatchFlow
809 |
810 | Parallel version of **BatchFlow**. Each iteration of the sub-flow runs **concurrently** using different parameters:
811 |
812 | ```python
813 | class SummarizeMultipleFiles(AsyncParallelBatchFlow):
814 | async def prep_async(self, shared):
815 | return [{"filename": f} for f in shared["files"]]
816 |
817 | sub_flow = AsyncFlow(start=LoadAndSummarizeFile())
818 | parallel_flow = SummarizeMultipleFiles(start=sub_flow)
819 | await parallel_flow.run_async(shared)
820 | ```
821 |
822 | ================================================
823 | File: docs/design_pattern/agent.md
824 | ================================================
825 | ---
826 | layout: default
827 | title: "Agent"
828 | parent: "Design Pattern"
829 | nav_order: 6
830 | ---
831 |
832 | # Agent
833 |
834 | Agent is a powerful design pattern, where node can take dynamic actions based on the context it receives.
835 | To express an agent, create a Node (the agent) with [branching](../core_abstraction/flow.md) to other nodes (Actions).
836 |
837 | > The core of build **performant** and **reliable** agents boils down to:
838 | >
839 | > 1. **Context Management:** Provide *clear, relevant context* so agents can understand the problem.E.g., Rather than dumping an entire chat history or entire files, use a [Workflow](./workflow.md) that filters out and includes only the most relevant information.
840 | >
841 | > 2. **Action Space:** Define *a well-structured, unambiguous, and easy-to-use* set of actions. For instance, avoid creating overlapping actions like `read_databases` and `read_csvs`. Instead, unify data sources (e.g., move CSVs into a database) and design a single action. The action can be parameterized (e.g., string for search) or programmable (e.g., SQL queries).
842 | {: .best-practice }
843 |
844 | ### Example: Search Agent
845 |
846 | This agent:
847 | 1. Decides whether to search or answer
848 | 2. If searches, loops back to decide if more search needed
849 | 3. Answers when enough context gathered
850 |
851 | ```python
852 | class DecideAction(Node):
853 | def prep(self, shared):
854 | context = shared.get("context", "No previous search")
855 | query = shared["query"]
856 | return query, context
857 |
858 | def exec(self, inputs):
859 | query, context = inputs
860 | prompt = f"""
861 | Given input: {query}
862 | Previous search results: {context}
863 | Should I: 1) Search web for more info 2) Answer with current knowledge
864 | Output in yaml:
865 | ```yaml
866 | action: search/answer
867 | reason: why this action
868 | search_term: search phrase if action is search
869 | ```"""
870 | resp = call_llm(prompt)
871 | yaml_str = resp.split("```yaml")[1].split("```")[0].strip()
872 | result = yaml.safe_load(yaml_str)
873 |
874 | assert isinstance(result, dict)
875 | assert "action" in result
876 | assert "reason" in result
877 | assert result["action"] in ["search", "answer"]
878 | if result["action"] == "search":
879 | assert "search_term" in result
880 |
881 | return result
882 |
883 | def post(self, shared, prep_res, exec_res):
884 | if exec_res["action"] == "search":
885 | shared["search_term"] = exec_res["search_term"]
886 | return exec_res["action"]
887 |
888 | class SearchWeb(Node):
889 | def prep(self, shared):
890 | return shared["search_term"]
891 |
892 | def exec(self, search_term):
893 | return search_web(search_term)
894 |
895 | def post(self, shared, prep_res, exec_res):
896 | prev_searches = shared.get("context", [])
897 | shared["context"] = prev_searches + [
898 | {"term": shared["search_term"], "result": exec_res}
899 | ]
900 | return "decide"
901 |
902 | class DirectAnswer(Node):
903 | def prep(self, shared):
904 | return shared["query"], shared.get("context", "")
905 |
906 | def exec(self, inputs):
907 | query, context = inputs
908 | return call_llm(f"Context: {context}\nAnswer: {query}")
909 |
910 | def post(self, shared, prep_res, exec_res):
911 | print(f"Answer: {exec_res}")
912 | shared["answer"] = exec_res
913 |
914 | # Connect nodes
915 | decide = DecideAction()
916 | search = SearchWeb()
917 | answer = DirectAnswer()
918 |
919 | decide - "search" >> search
920 | decide - "answer" >> answer
921 | search - "decide" >> decide # Loop back
922 |
923 | flow = Flow(start=decide)
924 | flow.run({"query": "Who won the Nobel Prize in Physics 2024?"})
925 | ```
926 |
927 | ================================================
928 | File: docs/design_pattern/mapreduce.md
929 | ================================================
930 | ---
931 | layout: default
932 | title: "Map Reduce"
933 | parent: "Design Pattern"
934 | nav_order: 3
935 | ---
936 |
937 | # Map Reduce
938 |
939 | MapReduce is a design pattern suitable when you have either:
940 | - Large input data (e.g., multiple files to process), or
941 | - Large output data (e.g., multiple forms to fill)
942 |
943 | and there is a logical way to break the task into smaller, ideally independent parts.
944 | You first break down the task using [BatchNode](../core_abstraction/batch.md) in the map phase, followed by aggregation in the reduce phase.
945 |
946 | ### Example: Document Summarization
947 |
948 | ```python
949 | class MapSummaries(BatchNode):
950 | def prep(self, shared): return [shared["text"][i:i+10000] for i in range(0, len(shared["text"]), 10000)]
951 | def exec(self, chunk): return call_llm(f"Summarize this chunk: {chunk}")
952 | def post(self, shared, prep_res, exec_res_list): shared["summaries"] = exec_res_list
953 |
954 | class ReduceSummaries(Node):
955 | def prep(self, shared): return shared["summaries"]
956 | def exec(self, summaries): return call_llm(f"Combine these summaries: {summaries}")
957 | def post(self, shared, prep_res, exec_res): shared["final_summary"] = exec_res
958 |
959 | # Connect nodes
960 | map_node = MapSummaries()
961 | reduce_node = ReduceSummaries()
962 | map_node >> reduce_node
963 |
964 | # Create flow
965 | summarize_flow = Flow(start=map_node)
966 | summarize_flow.run(shared)
967 | ```
968 |
969 | ================================================
970 | File: docs/design_pattern/memory.md
971 | ================================================
972 | ---
973 | layout: default
974 | title: "Chat Memory"
975 | parent: "Design Pattern"
976 | nav_order: 5
977 | ---
978 |
979 | # Chat Memory
980 |
981 | Multi-turn conversations require memory management to maintain context while avoiding overwhelming the LLM.
982 |
983 | ### 1. Naive Approach: Full History
984 |
985 | Sending the full chat history may overwhelm LLMs.
986 |
987 | ```python
988 | class ChatNode(Node):
989 | def prep(self, shared):
990 | if "history" not in shared:
991 | shared["history"] = []
992 | user_input = input("You: ")
993 | return shared["history"], user_input
994 |
995 | def exec(self, inputs):
996 | history, user_input = inputs
997 | messages = [{"role": "system", "content": "You are a helpful assistant"}]
998 | for h in history:
999 | messages.append(h)
1000 | messages.append({"role": "user", "content": user_input})
1001 | response = call_llm(messages)
1002 | return response
1003 |
1004 | def post(self, shared, prep_res, exec_res):
1005 | shared["history"].append({"role": "user", "content": prep_res[1]})
1006 | shared["history"].append({"role": "assistant", "content": exec_res})
1007 | return "continue"
1008 |
1009 | chat = ChatNode()
1010 | chat - "continue" >> chat
1011 | flow = Flow(start=chat)
1012 | ```
1013 |
1014 | ### 2. Improved Memory Management
1015 |
1016 | We can:
1017 | 1. Limit the chat history to the most recent 4.
1018 | 2. Use [vector search](./tool.md) to retrieve relevant exchanges beyond the last 4.
1019 |
1020 | ```python
1021 | ################################
1022 | # Node A: Retrieve user input & relevant messages
1023 | ################################
1024 | class ChatRetrieve(Node):
1025 | def prep(self, s):
1026 | s.setdefault("history", [])
1027 | s.setdefault("memory_index", None)
1028 | user_input = input("You: ")
1029 | return user_input
1030 |
1031 | def exec(self, user_input):
1032 | emb = get_embedding(user_input)
1033 | relevant = []
1034 | if len(shared["history"]) > 8 and shared["memory_index"]:
1035 | idx, _ = search_index(shared["memory_index"], emb, top_k=2)
1036 | relevant = [shared["history"][i[0]] for i in idx]
1037 | return (user_input, relevant)
1038 |
1039 | def post(self, s, p, r):
1040 | user_input, relevant = r
1041 | s["user_input"] = user_input
1042 | s["relevant"] = relevant
1043 | return "continue"
1044 |
1045 | ################################
1046 | # Node B: Call LLM, update history + index
1047 | ################################
1048 | class ChatReply(Node):
1049 | def prep(self, s):
1050 | user_input = s["user_input"]
1051 | recent = s["history"][-8:]
1052 | relevant = s.get("relevant", [])
1053 | return user_input, recent, relevant
1054 |
1055 | def exec(self, inputs):
1056 | user_input, recent, relevant = inputs
1057 | msgs = [{"role":"system","content":"You are a helpful assistant."}]
1058 | if relevant:
1059 | msgs.append({"role":"system","content":f"Relevant: {relevant}"})
1060 | msgs.extend(recent)
1061 | msgs.append({"role":"user","content":user_input})
1062 | ans = call_llm(msgs)
1063 | return ans
1064 |
1065 | def post(self, s, pre, ans):
1066 | user_input, _, _ = pre
1067 | s["history"].append({"role":"user","content":user_input})
1068 | s["history"].append({"role":"assistant","content":ans})
1069 |
1070 | # Manage memory index
1071 | if len(s["history"]) == 8:
1072 | embs = []
1073 | for i in range(0, 8, 2):
1074 | text = s["history"][i]["content"] + " " + s["history"][i+1]["content"]
1075 | embs.append(get_embedding(text))
1076 | s["memory_index"] = create_index(embs)
1077 | elif len(s["history"]) > 8:
1078 | text = s["history"][-2]["content"] + " " + s["history"][-1]["content"]
1079 | new_emb = np.array([get_embedding(text)]).astype('float32')
1080 | s["memory_index"].add(new_emb)
1081 |
1082 | print(f"Assistant: {ans}")
1083 | return "continue"
1084 |
1085 | ################################
1086 | # Flow wiring
1087 | ################################
1088 | retrieve = ChatRetrieve()
1089 | reply = ChatReply()
1090 | retrieve - "continue" >> reply
1091 | reply - "continue" >> retrieve
1092 |
1093 | flow = Flow(start=retrieve)
1094 | shared = {}
1095 | flow.run(shared)
1096 | ```
1097 |
1098 | ================================================
1099 | File: docs/design_pattern/multi_agent.md
1100 | ================================================
1101 | ---
1102 | layout: default
1103 | title: "(Advanced) Multi-Agents"
1104 | parent: "Design Pattern"
1105 | nav_order: 7
1106 | ---
1107 |
1108 | # (Advanced) Multi-Agents
1109 |
1110 | Multiple [Agents](./flow.md) can work together by handling subtasks and communicating the progress.
1111 | Communication between agents is typically implemented using message queues in shared storage.
1112 |
1113 | > Most of time, you don't need Multi-Agents. Start with a simple solution first.
1114 | {: .best-practice }
1115 |
1116 | ### Example Agent Communication: Message Queue
1117 |
1118 | Here's a simple example showing how to implement agent communication using `asyncio.Queue`.
1119 | The agent listens for messages, processes them, and continues listening:
1120 |
1121 | ```python
1122 | class AgentNode(AsyncNode):
1123 | async def prep_async(self, _):
1124 | message_queue = self.params["messages"]
1125 | message = await message_queue.get()
1126 | print(f"Agent received: {message}")
1127 | return message
1128 |
1129 | # Create node and flow
1130 | agent = AgentNode()
1131 | agent >> agent # connect to self
1132 | flow = AsyncFlow(start=agent)
1133 |
1134 | # Create heartbeat sender
1135 | async def send_system_messages(message_queue):
1136 | counter = 0
1137 | messages = [
1138 | "System status: all systems operational",
1139 | "Memory usage: normal",
1140 | "Network connectivity: stable",
1141 | "Processing load: optimal"
1142 | ]
1143 |
1144 | while True:
1145 | message = f"{messages[counter % len(messages)]} | timestamp_{counter}"
1146 | await message_queue.put(message)
1147 | counter += 1
1148 | await asyncio.sleep(1)
1149 |
1150 | async def main():
1151 | message_queue = asyncio.Queue()
1152 | shared = {}
1153 | flow.set_params({"messages": message_queue})
1154 |
1155 | # Run both coroutines
1156 | await asyncio.gather(
1157 | flow.run_async(shared),
1158 | send_system_messages(message_queue)
1159 | )
1160 |
1161 | asyncio.run(main())
1162 | ```
1163 |
1164 | The output:
1165 |
1166 | ```
1167 | Agent received: System status: all systems operational | timestamp_0
1168 | Agent received: Memory usage: normal | timestamp_1
1169 | Agent received: Network connectivity: stable | timestamp_2
1170 | Agent received: Processing load: optimal | timestamp_3
1171 | ```
1172 |
1173 | ### Interactive Multi-Agent Example: Taboo Game
1174 |
1175 | Here's a more complex example where two agents play the word-guessing game Taboo.
1176 | One agent provides hints while avoiding forbidden words, and another agent tries to guess the target word:
1177 |
1178 | ```python
1179 | class AsyncHinter(AsyncNode):
1180 | async def prep_async(self, shared):
1181 | guess = await shared["hinter_queue"].get()
1182 | if guess == "GAME_OVER":
1183 | return None
1184 | return shared["target_word"], shared["forbidden_words"], shared.get("past_guesses", [])
1185 |
1186 | async def exec_async(self, inputs):
1187 | if inputs is None:
1188 | return None
1189 | target, forbidden, past_guesses = inputs
1190 | prompt = f"Generate hint for '{target}'\nForbidden words: {forbidden}"
1191 | if past_guesses:
1192 | prompt += f"\nPrevious wrong guesses: {past_guesses}\nMake hint more specific."
1193 | prompt += "\nUse at most 5 words."
1194 |
1195 | hint = call_llm(prompt)
1196 | print(f"\nHinter: Here's your hint - {hint}")
1197 | return hint
1198 |
1199 | async def post_async(self, shared, prep_res, exec_res):
1200 | if exec_res is None:
1201 | return "end"
1202 | await shared["guesser_queue"].put(exec_res)
1203 | return "continue"
1204 |
1205 | class AsyncGuesser(AsyncNode):
1206 | async def prep_async(self, shared):
1207 | hint = await shared["guesser_queue"].get()
1208 | return hint, shared.get("past_guesses", [])
1209 |
1210 | async def exec_async(self, inputs):
1211 | hint, past_guesses = inputs
1212 | prompt = f"Given hint: {hint}, past wrong guesses: {past_guesses}, make a new guess. Directly reply a single word:"
1213 | guess = call_llm(prompt)
1214 | print(f"Guesser: I guess it's - {guess}")
1215 | return guess
1216 |
1217 | async def post_async(self, shared, prep_res, exec_res):
1218 | if exec_res.lower() == shared["target_word"].lower():
1219 | print("Game Over - Correct guess!")
1220 | await shared["hinter_queue"].put("GAME_OVER")
1221 | return "end"
1222 |
1223 | if "past_guesses" not in shared:
1224 | shared["past_guesses"] = []
1225 | shared["past_guesses"].append(exec_res)
1226 |
1227 | await shared["hinter_queue"].put(exec_res)
1228 | return "continue"
1229 |
1230 | async def main():
1231 | # Set up game
1232 | shared = {
1233 | "target_word": "nostalgia",
1234 | "forbidden_words": ["memory", "past", "remember", "feeling", "longing"],
1235 | "hinter_queue": asyncio.Queue(),
1236 | "guesser_queue": asyncio.Queue()
1237 | }
1238 |
1239 | print("Game starting!")
1240 | print(f"Target word: {shared['target_word']}")
1241 | print(f"Forbidden words: {shared['forbidden_words']}")
1242 |
1243 | # Initialize by sending empty guess to hinter
1244 | await shared["hinter_queue"].put("")
1245 |
1246 | # Create nodes and flows
1247 | hinter = AsyncHinter()
1248 | guesser = AsyncGuesser()
1249 |
1250 | # Set up flows
1251 | hinter_flow = AsyncFlow(start=hinter)
1252 | guesser_flow = AsyncFlow(start=guesser)
1253 |
1254 | # Connect nodes to themselves
1255 | hinter - "continue" >> hinter
1256 | guesser - "continue" >> guesser
1257 |
1258 | # Run both agents concurrently
1259 | await asyncio.gather(
1260 | hinter_flow.run_async(shared),
1261 | guesser_flow.run_async(shared)
1262 | )
1263 |
1264 | asyncio.run(main())
1265 | ```
1266 |
1267 | The Output:
1268 |
1269 | ```
1270 | Game starting!
1271 | Target word: nostalgia
1272 | Forbidden words: ['memory', 'past', 'remember', 'feeling', 'longing']
1273 |
1274 | Hinter: Here's your hint - Thinking of childhood summer days
1275 | Guesser: I guess it's - popsicle
1276 |
1277 | Hinter: Here's your hint - When childhood cartoons make you emotional
1278 | Guesser: I guess it's - nostalgic
1279 |
1280 | Hinter: Here's your hint - When old songs move you
1281 | Guesser: I guess it's - memories
1282 |
1283 | Hinter: Here's your hint - That warm emotion about childhood
1284 | Guesser: I guess it's - nostalgia
1285 | Game Over - Correct guess!
1286 | ```
1287 |
1288 | ================================================
1289 | File: docs/design_pattern/rag.md
1290 | ================================================
1291 | ---
1292 | layout: default
1293 | title: "RAG"
1294 | parent: "Design Pattern"
1295 | nav_order: 4
1296 | ---
1297 |
1298 | # RAG (Retrieval Augmented Generation)
1299 |
1300 | For certain LLM tasks like answering questions, providing relevant context is essential. One common architecture is a **two-stage** RAG pipeline:
1301 |
1302 |
1303 |
1304 |
1305 |
1306 | 1. **Offline stage**: Preprocess and index documents ("building the index").
1307 | 2. **Online stage**: Given a question, generate answers by retrieving the most relevant context.
1308 |
1309 | ---
1310 | ## Stage 1: Offline Indexing
1311 |
1312 | We create three Nodes:
1313 | 1. `ChunkDocs` – [chunks](../utility_function/chunking.md) raw text.
1314 | 2. `EmbedDocs` – [embeds](../utility_function/embedding.md) each chunk.
1315 | 3. `StoreIndex` – stores embeddings into a [vector database](../utility_function/vector.md).
1316 |
1317 | ```python
1318 | class ChunkDocs(BatchNode):
1319 | def prep(self, shared):
1320 | # A list of file paths in shared["files"]. We process each file.
1321 | return shared["files"]
1322 |
1323 | def exec(self, filepath):
1324 | # read file content. In real usage, do error handling.
1325 | with open(filepath, "r", encoding="utf-8") as f:
1326 | text = f.read()
1327 | # chunk by 100 chars each
1328 | chunks = []
1329 | size = 100
1330 | for i in range(0, len(text), size):
1331 | chunks.append(text[i : i + size])
1332 | return chunks
1333 |
1334 | def post(self, shared, prep_res, exec_res_list):
1335 | # exec_res_list is a list of chunk-lists, one per file.
1336 | # flatten them all into a single list of chunks.
1337 | all_chunks = []
1338 | for chunk_list in exec_res_list:
1339 | all_chunks.extend(chunk_list)
1340 | shared["all_chunks"] = all_chunks
1341 |
1342 | class EmbedDocs(BatchNode):
1343 | def prep(self, shared):
1344 | return shared["all_chunks"]
1345 |
1346 | def exec(self, chunk):
1347 | return get_embedding(chunk)
1348 |
1349 | def post(self, shared, prep_res, exec_res_list):
1350 | # Store the list of embeddings.
1351 | shared["all_embeds"] = exec_res_list
1352 | print(f"Total embeddings: {len(exec_res_list)}")
1353 |
1354 | class StoreIndex(Node):
1355 | def prep(self, shared):
1356 | # We'll read all embeds from shared.
1357 | return shared["all_embeds"]
1358 |
1359 | def exec(self, all_embeds):
1360 | # Create a vector index (faiss or other DB in real usage).
1361 | index = create_index(all_embeds)
1362 | return index
1363 |
1364 | def post(self, shared, prep_res, index):
1365 | shared["index"] = index
1366 |
1367 | # Wire them in sequence
1368 | chunk_node = ChunkDocs()
1369 | embed_node = EmbedDocs()
1370 | store_node = StoreIndex()
1371 |
1372 | chunk_node >> embed_node >> store_node
1373 |
1374 | OfflineFlow = Flow(start=chunk_node)
1375 | ```
1376 |
1377 | Usage example:
1378 |
1379 | ```python
1380 | shared = {
1381 | "files": ["doc1.txt", "doc2.txt"], # any text files
1382 | }
1383 | OfflineFlow.run(shared)
1384 | ```
1385 |
1386 | ---
1387 | ## Stage 2: Online Query & Answer
1388 |
1389 | We have 3 nodes:
1390 | 1. `EmbedQuery` – embeds the user’s question.
1391 | 2. `RetrieveDocs` – retrieves top chunk from the index.
1392 | 3. `GenerateAnswer` – calls the LLM with the question + chunk to produce the final answer.
1393 |
1394 | ```python
1395 | class EmbedQuery(Node):
1396 | def prep(self, shared):
1397 | return shared["question"]
1398 |
1399 | def exec(self, question):
1400 | return get_embedding(question)
1401 |
1402 | def post(self, shared, prep_res, q_emb):
1403 | shared["q_emb"] = q_emb
1404 |
1405 | class RetrieveDocs(Node):
1406 | def prep(self, shared):
1407 | # We'll need the query embedding, plus the offline index/chunks
1408 | return shared["q_emb"], shared["index"], shared["all_chunks"]
1409 |
1410 | def exec(self, inputs):
1411 | q_emb, index, chunks = inputs
1412 | I, D = search_index(index, q_emb, top_k=1)
1413 | best_id = I[0][0]
1414 | relevant_chunk = chunks[best_id]
1415 | return relevant_chunk
1416 |
1417 | def post(self, shared, prep_res, relevant_chunk):
1418 | shared["retrieved_chunk"] = relevant_chunk
1419 | print("Retrieved chunk:", relevant_chunk[:60], "...")
1420 |
1421 | class GenerateAnswer(Node):
1422 | def prep(self, shared):
1423 | return shared["question"], shared["retrieved_chunk"]
1424 |
1425 | def exec(self, inputs):
1426 | question, chunk = inputs
1427 | prompt = f"Question: {question}\nContext: {chunk}\nAnswer:"
1428 | return call_llm(prompt)
1429 |
1430 | def post(self, shared, prep_res, answer):
1431 | shared["answer"] = answer
1432 | print("Answer:", answer)
1433 |
1434 | embed_qnode = EmbedQuery()
1435 | retrieve_node = RetrieveDocs()
1436 | generate_node = GenerateAnswer()
1437 |
1438 | embed_qnode >> retrieve_node >> generate_node
1439 | OnlineFlow = Flow(start=embed_qnode)
1440 | ```
1441 |
1442 | Usage example:
1443 |
1444 | ```python
1445 | # Suppose we already ran OfflineFlow and have:
1446 | # shared["all_chunks"], shared["index"], etc.
1447 | shared["question"] = "Why do people like cats?"
1448 |
1449 | OnlineFlow.run(shared)
1450 | # final answer in shared["answer"]
1451 | ```
1452 |
1453 | ================================================
1454 | File: docs/design_pattern/structure.md
1455 | ================================================
1456 | ---
1457 | layout: default
1458 | title: "Structured Output"
1459 | parent: "Design Pattern"
1460 | nav_order: 1
1461 | ---
1462 |
1463 | # Structured Output
1464 |
1465 | In many use cases, you may want the LLM to output a specific structure, such as a list or a dictionary with predefined keys.
1466 |
1467 | There are several approaches to achieve a structured output:
1468 | - **Prompting** the LLM to strictly return a defined structure.
1469 | - Using LLMs that natively support **schema enforcement**.
1470 | - **Post-processing** the LLM's response to extract structured content.
1471 |
1472 | In practice, **Prompting** is simple and reliable for modern LLMs.
1473 |
1474 | ### Example Use Cases
1475 |
1476 | - Extracting Key Information
1477 |
1478 | ```yaml
1479 | product:
1480 | name: Widget Pro
1481 | price: 199.99
1482 | description: |
1483 | A high-quality widget designed for professionals.
1484 | Recommended for advanced users.
1485 | ```
1486 |
1487 | - Summarizing Documents into Bullet Points
1488 |
1489 | ```yaml
1490 | summary:
1491 | - This product is easy to use.
1492 | - It is cost-effective.
1493 | - Suitable for all skill levels.
1494 | ```
1495 |
1496 | - Generating Configuration Files
1497 |
1498 | ```yaml
1499 | server:
1500 | host: 127.0.0.1
1501 | port: 8080
1502 | ssl: true
1503 | ```
1504 |
1505 | ## Prompt Engineering
1506 |
1507 | When prompting the LLM to produce **structured** output:
1508 | 1. **Wrap** the structure in code fences (e.g., `yaml`).
1509 | 2. **Validate** that all required fields exist (and let `Node` handles retry).
1510 |
1511 | ### Example Text Summarization
1512 |
1513 | ```python
1514 | class SummarizeNode(Node):
1515 | def exec(self, prep_res):
1516 | # Suppose `prep_res` is the text to summarize.
1517 | prompt = f"""
1518 | Please summarize the following text as YAML, with exactly 3 bullet points
1519 |
1520 | {prep_res}
1521 |
1522 | Now, output:
1523 | ```yaml
1524 | summary:
1525 | - bullet 1
1526 | - bullet 2
1527 | - bullet 3
1528 | ```"""
1529 | response = call_llm(prompt)
1530 | yaml_str = response.split("```yaml")[1].split("```")[0].strip()
1531 |
1532 | import yaml
1533 | structured_result = yaml.safe_load(yaml_str)
1534 |
1535 | assert "summary" in structured_result
1536 | assert isinstance(structured_result["summary"], list)
1537 |
1538 | return structured_result
1539 | ```
1540 |
1541 | > Besides using `assert` statements, another popular way to validate schemas is [Pydantic](https://github.com/pydantic/pydantic)
1542 | {: .note }
1543 |
1544 | ### Why YAML instead of JSON?
1545 |
1546 | Current LLMs struggle with escaping. YAML is easier with strings since they don't always need quotes.
1547 |
1548 | **In JSON**
1549 |
1550 | ```json
1551 | {
1552 | "dialogue": "Alice said: \"Hello Bob.\\nHow are you?\\nI am good.\""
1553 | }
1554 | ```
1555 |
1556 | - Every double quote inside the string must be escaped with `\"`.
1557 | - Each newline in the dialogue must be represented as `\n`.
1558 |
1559 | **In YAML**
1560 |
1561 | ```yaml
1562 | dialogue: |
1563 | Alice said: "Hello Bob.
1564 | How are you?
1565 | I am good."
1566 | ```
1567 |
1568 | - No need to escape interior quotes—just place the entire text under a block literal (`|`).
1569 | - Newlines are naturally preserved without needing `\n`.
1570 |
1571 | ================================================
1572 | File: docs/design_pattern/workflow.md
1573 | ================================================
1574 | ---
1575 | layout: default
1576 | title: "Workflow"
1577 | parent: "Design Pattern"
1578 | nav_order: 2
1579 | ---
1580 |
1581 | # Workflow
1582 |
1583 | Many real-world tasks are too complex for one LLM call. The solution is to decompose them into a [chain](../core_abstraction/flow.md) of multiple Nodes.
1584 |
1585 | > - You don't want to make each task **too coarse**, because it may be *too complex for one LLM call*.
1586 | > - You don't want to make each task **too granular**, because then *the LLM call doesn't have enough context* and results are *not consistent across nodes*.
1587 | >
1588 | > You usually need multiple *iterations* to find the *sweet spot*. If the task has too many *edge cases*, consider using [Agents](./agent.md).
1589 | {: .best-practice }
1590 |
1591 | ### Example: Article Writing
1592 |
1593 | ```python
1594 | class GenerateOutline(Node):
1595 | def prep(self, shared): return shared["topic"]
1596 | def exec(self, topic): return call_llm(f"Create a detailed outline for an article about {topic}")
1597 | def post(self, shared, prep_res, exec_res): shared["outline"] = exec_res
1598 |
1599 | class WriteSection(Node):
1600 | def prep(self, shared): return shared["outline"]
1601 | def exec(self, outline): return call_llm(f"Write content based on this outline: {outline}")
1602 | def post(self, shared, prep_res, exec_res): shared["draft"] = exec_res
1603 |
1604 | class ReviewAndRefine(Node):
1605 | def prep(self, shared): return shared["draft"]
1606 | def exec(self, draft): return call_llm(f"Review and improve this draft: {draft}")
1607 | def post(self, shared, prep_res, exec_res): shared["final_article"] = exec_res
1608 |
1609 | # Connect nodes
1610 | outline = GenerateOutline()
1611 | write = WriteSection()
1612 | review = ReviewAndRefine()
1613 |
1614 | outline >> write >> review
1615 |
1616 | # Create and run flow
1617 | writing_flow = Flow(start=outline)
1618 | shared = {"topic": "AI Safety"}
1619 | writing_flow.run(shared)
1620 | ```
1621 |
1622 | For *dynamic cases*, consider using [Agents](./agent.md).
1623 |
1624 | ================================================
1625 | File: docs/utility_function/llm.md
1626 | ================================================
1627 | ---
1628 | layout: default
1629 | title: "LLM Wrapper"
1630 | parent: "Utility Function"
1631 | nav_order: 1
1632 | ---
1633 |
1634 | # LLM Wrappers
1635 |
1636 | We **don't** provide built-in LLM wrappers. Instead, please implement your own, for example by asking an assistant like ChatGPT or Claude. If you ask ChatGPT to "implement a `call_llm` function that takes a prompt and returns the LLM response," you shall get something like:
1637 |
1638 | ```python
1639 | def call_llm(prompt):
1640 | from openai import OpenAI
1641 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1642 | r = client.chat.completions.create(
1643 | model="gpt-4o",
1644 | messages=[{"role": "user", "content": prompt}]
1645 | )
1646 | return r.choices[0].message.content
1647 |
1648 | # Example usage
1649 | call_llm("How are you?")
1650 | ```
1651 |
1652 | > Store the API key in an environment variable like OPENAI_API_KEY for security.
1653 | {: .note }
1654 |
1655 | ## Improvements
1656 | Feel free to enhance your `call_llm` function as needed. Here are examples:
1657 |
1658 | - Handle chat history:
1659 |
1660 | ```python
1661 | def call_llm(messages):
1662 | from openai import OpenAI
1663 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1664 | r = client.chat.completions.create(
1665 | model="gpt-4o",
1666 | messages=messages
1667 | )
1668 | return r.choices[0].message.content
1669 | ```
1670 |
1671 | - Add in-memory caching
1672 |
1673 | ```python
1674 | from functools import lru_cache
1675 |
1676 | @lru_cache(maxsize=1000)
1677 | def call_llm(prompt):
1678 | # Your implementation here
1679 | pass
1680 | ```
1681 |
1682 | > ⚠️ Caching conflicts with Node retries, as retries yield the same result.
1683 | >
1684 | > To address this, you could use cached results only if not retried.
1685 | {: .warning }
1686 |
1687 |
1688 | ```python
1689 | from functools import lru_cache
1690 |
1691 | @lru_cache(maxsize=1000)
1692 | def cached_call(prompt):
1693 | pass
1694 |
1695 | def call_llm(prompt, use_cache):
1696 | if use_cache:
1697 | return cached_call(prompt)
1698 | # Call the underlying function directly
1699 | return cached_call.__wrapped__(prompt)
1700 |
1701 | class SummarizeNode(Node):
1702 | def exec(self, text):
1703 | return call_llm(f"Summarize: {text}", self.cur_retry==0)
1704 | ```
1705 |
1706 | - Enable logging:
1707 |
1708 | ```python
1709 | def call_llm(prompt):
1710 | import logging
1711 | logging.info(f"Prompt: {prompt}")
1712 | response = ... # Your implementation here
1713 | logging.info(f"Response: {response}")
1714 | return response
1715 | ```
1716 |
1717 | ## Why Not Provide Built-in LLM Wrappers?
1718 | I believe it is a **bad practice** to provide LLM-specific implementations in a general framework:
1719 | - **LLM APIs change frequently**. Hardcoding them makes maintenance a nightmare.
1720 | - You may need **flexibility** to switch vendors, use fine-tuned models, or deploy local LLMs.
1721 | - You may need **optimizations** like prompt caching, request batching, or response streaming.
1722 |
1723 | ================================================
1724 | File: docs/utility_function/tool.md
1725 | ================================================
1726 | ---
1727 | layout: default
1728 | title: "Tool"
1729 | parent: "Utility Function"
1730 | nav_order: 2
1731 | ---
1732 |
1733 | # Tool
1734 |
1735 | Similar to LLM wrappers, we **don't** provide built-in tools. Here, we recommend some *minimal* (and incomplete) implementations of commonly used tools. These examples can serve as a starting point for your own tooling.
1736 |
1737 | ---
1738 |
1739 | ## 1. Embedding Calls
1740 |
1741 | ```python
1742 | def get_embedding(text):
1743 | from openai import OpenAI
1744 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1745 | r = client.embeddings.create(
1746 | model="text-embedding-ada-002",
1747 | input=text
1748 | )
1749 | return r.data[0].embedding
1750 |
1751 | get_embedding("What's the meaning of life?")
1752 | ```
1753 |
1754 | ---
1755 |
1756 | ## 2. Vector Database (Faiss)
1757 |
1758 | ```python
1759 | import faiss
1760 | import numpy as np
1761 |
1762 | def create_index(embeddings):
1763 | dim = len(embeddings[0])
1764 | index = faiss.IndexFlatL2(dim)
1765 | index.add(np.array(embeddings).astype('float32'))
1766 | return index
1767 |
1768 | def search_index(index, query_embedding, top_k=5):
1769 | D, I = index.search(
1770 | np.array([query_embedding]).astype('float32'),
1771 | top_k
1772 | )
1773 | return I, D
1774 |
1775 | index = create_index(embeddings)
1776 | search_index(index, query_embedding)
1777 | ```
1778 |
1779 | ---
1780 |
1781 | ## 3. Local Database
1782 |
1783 | ```python
1784 | import sqlite3
1785 |
1786 | def execute_sql(query):
1787 | conn = sqlite3.connect("mydb.db")
1788 | cursor = conn.cursor()
1789 | cursor.execute(query)
1790 | result = cursor.fetchall()
1791 | conn.commit()
1792 | conn.close()
1793 | return result
1794 | ```
1795 |
1796 | > ⚠️ Beware of SQL injection risk
1797 | {: .warning }
1798 |
1799 | ---
1800 |
1801 | ## 4. Python Function Execution
1802 |
1803 | ```python
1804 | def run_code(code_str):
1805 | env = {}
1806 | exec(code_str, env)
1807 | return env
1808 |
1809 | run_code("print('Hello, world!')")
1810 | ```
1811 |
1812 | > ⚠️ exec() is dangerous with untrusted input
1813 | {: .warning }
1814 |
1815 |
1816 | ---
1817 |
1818 | ## 5. PDF Extraction
1819 |
1820 | If your PDFs are text-based, use PyMuPDF:
1821 |
1822 | ```python
1823 | import fitz # PyMuPDF
1824 |
1825 | def extract_text(pdf_path):
1826 | doc = fitz.open(pdf_path)
1827 | text = ""
1828 | for page in doc:
1829 | text += page.get_text()
1830 | doc.close()
1831 | return text
1832 |
1833 | extract_text("document.pdf")
1834 | ```
1835 |
1836 | For image-based PDFs (e.g., scanned), OCR is needed. A easy and fast option is using an LLM with vision capabilities:
1837 |
1838 | ```python
1839 | from openai import OpenAI
1840 | import base64
1841 |
1842 | def call_llm_vision(prompt, image_data):
1843 | client = OpenAI(api_key="YOUR_API_KEY_HERE")
1844 | img_base64 = base64.b64encode(image_data).decode('utf-8')
1845 |
1846 | response = client.chat.completions.create(
1847 | model="gpt-4o",
1848 | messages=[{
1849 | "role": "user",
1850 | "content": [
1851 | {"type": "text", "text": prompt},
1852 | {"type": "image_url",
1853 | "image_url": {"url": f"data:image/png;base64,{img_base64}"}}
1854 | ]
1855 | }]
1856 | )
1857 |
1858 | return response.choices[0].message.content
1859 |
1860 | pdf_document = fitz.open("document.pdf")
1861 | page_num = 0
1862 | page = pdf_document[page_num]
1863 | pix = page.get_pixmap()
1864 | img_data = pix.tobytes("png")
1865 |
1866 | call_llm_vision("Extract text from this image", img_data)
1867 | ```
1868 |
1869 | ---
1870 |
1871 | ## 6. Web Crawling
1872 |
1873 | ```python
1874 | def crawl_web(url):
1875 | import requests
1876 | from bs4 import BeautifulSoup
1877 | html = requests.get(url).text
1878 | soup = BeautifulSoup(html, "html.parser")
1879 | return soup.title.string, soup.get_text()
1880 | ```
1881 |
1882 | ---
1883 |
1884 | ## 7. Basic Search (SerpAPI example)
1885 |
1886 | ```python
1887 | def search_google(query):
1888 | import requests
1889 | params = {
1890 | "engine": "google",
1891 | "q": query,
1892 | "api_key": "YOUR_API_KEY"
1893 | }
1894 | r = requests.get("https://serpapi.com/search", params=params)
1895 | return r.json()
1896 | ```
1897 |
1898 | ---
1899 |
1900 |
1901 | ## 8. Audio Transcription (OpenAI Whisper)
1902 |
1903 | ```python
1904 | def transcribe_audio(file_path):
1905 | import openai
1906 | audio_file = open(file_path, "rb")
1907 | transcript = openai.Audio.transcribe("whisper-1", audio_file)
1908 | return transcript["text"]
1909 | ```
1910 |
1911 | ---
1912 |
1913 | ## 9. Text-to-Speech (TTS)
1914 |
1915 | ```python
1916 | def text_to_speech(text):
1917 | import pyttsx3
1918 | engine = pyttsx3.init()
1919 | engine.say(text)
1920 | engine.runAndWait()
1921 | ```
1922 |
1923 | ---
1924 |
1925 | ## 10. Sending Email
1926 |
1927 | ```python
1928 | def send_email(to_address, subject, body, from_address, password):
1929 | import smtplib
1930 | from email.mime.text import MIMEText
1931 |
1932 | msg = MIMEText(body)
1933 | msg["Subject"] = subject
1934 | msg["From"] = from_address
1935 | msg["To"] = to_address
1936 |
1937 | with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
1938 | server.login(from_address, password)
1939 | server.sendmail(from_address, [to_address], msg.as_string())
1940 | ```
1941 |
1942 | ================================================
1943 | File: docs/utility_function/viz.md
1944 | ================================================
1945 | ---
1946 | layout: default
1947 | title: "Viz and Debug"
1948 | parent: "Utility Function"
1949 | nav_order: 3
1950 | ---
1951 |
1952 | # Visualization and Debugging
1953 |
1954 | Similar to LLM wrappers, we **don't** provide built-in visualization and debugging. Here, we recommend some *minimal* (and incomplete) implementations These examples can serve as a starting point for your own tooling.
1955 |
1956 | ## 1. Visualization with Mermaid
1957 |
1958 | This code recursively traverses the nested graph, assigns unique IDs to each node, and treats Flow nodes as subgraphs to generate Mermaid syntax for a hierarchical visualization.
1959 |
1960 | {% raw %}
1961 | ```python
1962 | def build_mermaid(start):
1963 | ids, visited, lines = {}, set(), ["graph LR"]
1964 | ctr = 1
1965 | def get_id(n):
1966 | nonlocal ctr
1967 | return ids[n] if n in ids else (ids.setdefault(n, f"N{ctr}"), (ctr := ctr + 1))[0]
1968 | def link(a, b):
1969 | lines.append(f" {a} --> {b}")
1970 | def walk(node, parent=None):
1971 | if node in visited:
1972 | return parent and link(parent, get_id(node))
1973 | visited.add(node)
1974 | if isinstance(node, Flow):
1975 | node.start and parent and link(parent, get_id(node.start))
1976 | lines.append(f"\n subgraph sub_flow_{get_id(node)}[{type(node).__name__}]")
1977 | node.start and walk(node.start)
1978 | for nxt in node.successors.values():
1979 | node.start and walk(nxt, get_id(node.start)) or (parent and link(parent, get_id(nxt))) or walk(nxt)
1980 | lines.append(" end\n")
1981 | else:
1982 | lines.append(f" {(nid := get_id(node))}['{type(node).__name__}']")
1983 | parent and link(parent, nid)
1984 | [walk(nxt, nid) for nxt in node.successors.values()]
1985 | walk(start)
1986 | return "\n".join(lines)
1987 | ```
1988 | {% endraw %}
1989 |
1990 |
1991 | For example, suppose we have a complex Flow for data science:
1992 |
1993 | ```python
1994 | class DataPrepBatchNode(BatchNode):
1995 | def prep(self,shared): return []
1996 | class ValidateDataNode(Node): pass
1997 | class FeatureExtractionNode(Node): pass
1998 | class TrainModelNode(Node): pass
1999 | class EvaluateModelNode(Node): pass
2000 | class ModelFlow(Flow): pass
2001 | class DataScienceFlow(Flow):pass
2002 |
2003 | feature_node = FeatureExtractionNode()
2004 | train_node = TrainModelNode()
2005 | evaluate_node = EvaluateModelNode()
2006 | feature_node >> train_node >> evaluate_node
2007 | model_flow = ModelFlow(start=feature_node)
2008 | data_prep_node = DataPrepBatchNode()
2009 | validate_node = ValidateDataNode()
2010 | data_prep_node >> validate_node >> model_flow
2011 | data_science_flow = DataScienceFlow(start=data_prep_node)
2012 | result = build_mermaid(start=data_science_flow)
2013 | ```
2014 |
2015 | The code generates a Mermaid diagram:
2016 |
2017 | ```mermaid
2018 | graph LR
2019 | subgraph sub_flow_N1[DataScienceFlow]
2020 | N2['DataPrepBatchNode']
2021 | N3['ValidateDataNode']
2022 | N2 --> N3
2023 | N3 --> N4
2024 |
2025 | subgraph sub_flow_N5[ModelFlow]
2026 | N4['FeatureExtractionNode']
2027 | N6['TrainModelNode']
2028 | N4 --> N6
2029 | N7['EvaluateModelNode']
2030 | N6 --> N7
2031 | end
2032 |
2033 | end
2034 | ```
2035 |
2036 | ## 2. Call Stack Debugging
2037 |
2038 | It would be useful to print the Node call stacks for debugging. This can be achieved by inspecting the runtime call stack:
2039 |
2040 | ```python
2041 | import inspect
2042 |
2043 | def get_node_call_stack():
2044 | stack = inspect.stack()
2045 | node_names = []
2046 | seen_ids = set()
2047 | for frame_info in stack[1:]:
2048 | local_vars = frame_info.frame.f_locals
2049 | if 'self' in local_vars:
2050 | caller_self = local_vars['self']
2051 | if isinstance(caller_self, BaseNode) and id(caller_self) not in seen_ids:
2052 | seen_ids.add(id(caller_self))
2053 | node_names.append(type(caller_self).__name__)
2054 | return node_names
2055 | ```
2056 |
2057 | For example, suppose we have a complex Flow for data science:
2058 |
2059 | ```python
2060 | class DataPrepBatchNode(BatchNode):
2061 | def prep(self, shared): return []
2062 | class ValidateDataNode(Node): pass
2063 | class FeatureExtractionNode(Node): pass
2064 | class TrainModelNode(Node): pass
2065 | class EvaluateModelNode(Node):
2066 | def prep(self, shared):
2067 | stack = get_node_call_stack()
2068 | print("Call stack:", stack)
2069 | class ModelFlow(Flow): pass
2070 | class DataScienceFlow(Flow):pass
2071 |
2072 | feature_node = FeatureExtractionNode()
2073 | train_node = TrainModelNode()
2074 | evaluate_node = EvaluateModelNode()
2075 | feature_node >> train_node >> evaluate_node
2076 | model_flow = ModelFlow(start=feature_node)
2077 | data_prep_node = DataPrepBatchNode()
2078 | validate_node = ValidateDataNode()
2079 | data_prep_node >> validate_node >> model_flow
2080 | data_science_flow = DataScienceFlow(start=data_prep_node)
2081 | data_science_flow.run({})
2082 | ```
2083 |
2084 | The output would be: `Call stack: ['EvaluateModelNode', 'ModelFlow', 'DataScienceFlow']`
--------------------------------------------------------------------------------