├── .aider.conf.yml ├── .claude └── commands │ └── prime.md ├── .env.sample ├── .gitignore ├── .mcp.sample.json ├── README.md ├── ai_docs ├── anthropic_web_search_tool.md ├── claude-code-tutorials.md ├── claude_code_best_practices.md ├── claude_code_tech.md ├── fc_openai_agents.md └── uv-single-file-scripts.md ├── aider_is_programmable_1.sh ├── aider_is_programmable_2.js ├── aider_is_programmable_2.py ├── anthropic_search.py ├── bonus ├── claude_code_inside_openai_agent_sdk_4_bonus.py └── starter_notion_agent.py ├── claude_code_is_programmable_1.sh ├── claude_code_is_programmable_2.js ├── claude_code_is_programmable_2.py ├── claude_code_is_programmable_3.py ├── claude_code_is_programmable_4.py ├── claude_testing_v1.py ├── images ├── programmable-agentic-coding.png └── voice-to-claude-code.png ├── reset.sh ├── tests └── test_claude_testing_v1.py └── voice_to_claude_code.py /.aider.conf.yml: -------------------------------------------------------------------------------- 1 | ########################################################## 2 | # Sample .aider.conf.yml 3 | # This file lists *all* the valid configuration entries. 4 | # Place in your home dir, or at the root of your git repo. 5 | ########################################################## 6 | 7 | # Note: You can only put OpenAI and Anthropic API keys in the yaml 8 | # config file. Keys for all APIs can be stored in a .env file 9 | # https://aider.chat/docs/config/dotenv.html 10 | 11 | ########## 12 | # options: 13 | 14 | ## show this help message and exit 15 | #help: xxx 16 | 17 | ############# 18 | # Main model: 19 | 20 | ## Specify the model to use for the main chat 21 | #model: xxx 22 | 23 | ######################## 24 | # API Keys and settings: 25 | 26 | ## Specify the OpenAI API key 27 | #openai-api-key: xxx 28 | 29 | ## Specify the Anthropic API key 30 | #anthropic-api-key: xxx 31 | 32 | ## Specify the api base url 33 | #openai-api-base: xxx 34 | 35 | ## (deprecated, use --set-env OPENAI_API_TYPE=) 36 | #openai-api-type: xxx 37 | 38 | ## (deprecated, use --set-env OPENAI_API_VERSION=) 39 | #openai-api-version: xxx 40 | 41 | ## (deprecated, use --set-env OPENAI_API_DEPLOYMENT_ID=) 42 | #openai-api-deployment-id: xxx 43 | 44 | ## (deprecated, use --set-env OPENAI_ORGANIZATION=) 45 | #openai-organization-id: xxx 46 | 47 | ## Set an environment variable (to control API settings, can be used multiple times) 48 | #set-env: xxx 49 | ## Specify multiple values like this: 50 | #set-env: 51 | # - xxx 52 | # - yyy 53 | # - zzz 54 | 55 | ## Set an API key for a provider (eg: --api-key provider= sets PROVIDER_API_KEY=) 56 | #api-key: xxx 57 | ## Specify multiple values like this: 58 | #api-key: 59 | # - xxx 60 | # - yyy 61 | # - zzz 62 | 63 | ################# 64 | # Model settings: 65 | 66 | ## List known models which match the (partial) MODEL name 67 | #list-models: xxx 68 | 69 | ## Specify a file with aider model settings for unknown models 70 | #model-settings-file: .aider.model.settings.yml 71 | 72 | ## Specify a file with context window and costs for unknown models 73 | #model-metadata-file: .aider.model.metadata.json 74 | 75 | ## Add a model alias (can be used multiple times) 76 | #alias: xxx 77 | ## Specify multiple values like this: 78 | #alias: 79 | # - xxx 80 | # - yyy 81 | # - zzz 82 | 83 | ## Set the reasoning_effort API parameter (default: not set) 84 | #reasoning-effort: xxx 85 | 86 | ## Set the thinking token budget for models that support it (default: not set) 87 | #thinking-tokens: xxx 88 | 89 | ## Verify the SSL cert when connecting to models (default: True) 90 | #verify-ssl: true 91 | 92 | ## Timeout in seconds for API calls (default: None) 93 | #timeout: xxx 94 | 95 | ## Specify what edit format the LLM should use (default depends on model) 96 | #edit-format: xxx 97 | 98 | ## Use architect edit format for the main chat 99 | #architect: false 100 | 101 | ## Enable/disable automatic acceptance of architect changes (default: True) 102 | #auto-accept-architect: true 103 | 104 | ## Specify the model to use for commit messages and chat history summarization (default depends on --model) 105 | #weak-model: xxx 106 | 107 | ## Specify the model to use for editor tasks (default depends on --model) 108 | #editor-model: xxx 109 | 110 | ## Specify the edit format for the editor model (default: depends on editor model) 111 | #editor-edit-format: xxx 112 | 113 | ## Only work with models that have meta-data available (default: True) 114 | #show-model-warnings: true 115 | 116 | ## Check if model accepts settings like reasoning_effort/thinking_tokens (default: True) 117 | #check-model-accepts-settings: true 118 | 119 | ## Soft limit on tokens for chat history, after which summarization begins. If unspecified, defaults to the model's max_chat_history_tokens. 120 | #max-chat-history-tokens: xxx 121 | 122 | ################# 123 | # Cache settings: 124 | 125 | ## Enable caching of prompts (default: False) 126 | #cache-prompts: false 127 | 128 | ## Number of times to ping at 5min intervals to keep prompt cache warm (default: 0) 129 | #cache-keepalive-pings: false 130 | 131 | ################### 132 | # Repomap settings: 133 | 134 | ## Suggested number of tokens to use for repo map, use 0 to disable 135 | #map-tokens: xxx 136 | 137 | ## Control how often the repo map is refreshed. Options: auto, always, files, manual (default: auto) 138 | #map-refresh: auto 139 | 140 | ## Multiplier for map tokens when no files are specified (default: 2) 141 | #map-multiplier-no-files: true 142 | 143 | ################ 144 | # History Files: 145 | 146 | ## Specify the chat input history file (default: .aider.input.history) 147 | #input-history-file: .aider.input.history 148 | 149 | ## Specify the chat history file (default: .aider.chat.history.md) 150 | #chat-history-file: .aider.chat.history.md 151 | 152 | ## Restore the previous chat history messages (default: False) 153 | #restore-chat-history: false 154 | 155 | ## Log the conversation with the LLM to this file (for example, .aider.llm.history) 156 | #llm-history-file: xxx 157 | 158 | ################## 159 | # Output settings: 160 | 161 | ## Use colors suitable for a dark terminal background (default: False) 162 | #dark-mode: false 163 | 164 | ## Use colors suitable for a light terminal background (default: False) 165 | #light-mode: false 166 | 167 | ## Enable/disable pretty, colorized output (default: True) 168 | #pretty: true 169 | 170 | ## Enable/disable streaming responses (default: True) 171 | #stream: true 172 | 173 | ## Set the color for user input (default: #00cc00) 174 | #user-input-color: "#00cc00" 175 | 176 | ## Set the color for tool output (default: None) 177 | #tool-output-color: "xxx" 178 | 179 | ## Set the color for tool error messages (default: #FF2222) 180 | #tool-error-color: "#FF2222" 181 | 182 | ## Set the color for tool warning messages (default: #FFA500) 183 | #tool-warning-color: "#FFA500" 184 | 185 | ## Set the color for assistant output (default: #0088ff) 186 | #assistant-output-color: "#0088ff" 187 | 188 | ## Set the color for the completion menu (default: terminal's default text color) 189 | #completion-menu-color: "xxx" 190 | 191 | ## Set the background color for the completion menu (default: terminal's default background color) 192 | #completion-menu-bg-color: "xxx" 193 | 194 | ## Set the color for the current item in the completion menu (default: terminal's default background color) 195 | #completion-menu-current-color: "xxx" 196 | 197 | ## Set the background color for the current item in the completion menu (default: terminal's default text color) 198 | #completion-menu-current-bg-color: "xxx" 199 | 200 | ## Set the markdown code theme (default: default, other options include monokai, solarized-dark, solarized-light, or a Pygments builtin style, see https://pygments.org/styles for available themes) 201 | #code-theme: default 202 | 203 | ## Show diffs when committing changes (default: False) 204 | #show-diffs: false 205 | 206 | ############### 207 | # Git settings: 208 | 209 | ## Enable/disable looking for a git repo (default: True) 210 | #git: true 211 | 212 | ## Enable/disable adding .aider* to .gitignore (default: True) 213 | #gitignore: true 214 | 215 | ## Specify the aider ignore file (default: .aiderignore in git root) 216 | #aiderignore: .aiderignore 217 | 218 | ## Only consider files in the current subtree of the git repository 219 | #subtree-only: false 220 | 221 | ## Enable/disable auto commit of LLM changes (default: True) 222 | auto-commits: false 223 | 224 | ## Enable/disable commits when repo is found dirty (default: True) 225 | # dirty-commits: false 226 | 227 | ## Attribute aider code changes in the git author name (default: True) 228 | # attribute-author: false 229 | 230 | ## Attribute aider commits in the git committer name (default: True) 231 | #attribute-committer: true 232 | 233 | ## Prefix commit messages with 'aider: ' if aider authored the changes (default: False) 234 | #attribute-commit-message-author: false 235 | 236 | ## Prefix all commit messages with 'aider: ' (default: False) 237 | #attribute-commit-message-committer: false 238 | 239 | ## Enable/disable git pre-commit hooks with --no-verify (default: False) 240 | #git-commit-verify: false 241 | 242 | ## Commit all pending changes with a suitable commit message, then exit 243 | #commit: false 244 | 245 | ## Specify a custom prompt for generating commit messages 246 | #commit-prompt: xxx 247 | 248 | ## Perform a dry run without modifying files (default: False) 249 | #dry-run: false 250 | 251 | ## Skip the sanity check for the git repository (default: False) 252 | #skip-sanity-check-repo: false 253 | 254 | ## Enable/disable watching files for ai coding comments (default: False) 255 | #watch-files: false 256 | 257 | ######################## 258 | # Fixing and committing: 259 | 260 | ## Lint and fix provided files, or dirty files if none provided 261 | #lint: false 262 | 263 | ## Specify lint commands to run for different languages, eg: "python: flake8 --select=..." (can be used multiple times) 264 | #lint-cmd: xxx 265 | ## Specify multiple values like this: 266 | #lint-cmd: 267 | # - xxx 268 | # - yyy 269 | # - zzz 270 | 271 | ## Enable/disable automatic linting after changes (default: True) 272 | #auto-lint: true 273 | 274 | ## Specify command to run tests 275 | #test-cmd: xxx 276 | 277 | ## Enable/disable automatic testing after changes (default: False) 278 | #auto-test: false 279 | 280 | ## Run tests, fix problems found and then exit 281 | #test: false 282 | 283 | ############ 284 | # Analytics: 285 | 286 | ## Enable/disable analytics for current session (default: random) 287 | #analytics: xxx 288 | 289 | ## Specify a file to log analytics events 290 | #analytics-log: xxx 291 | 292 | ## Permanently disable analytics 293 | #analytics-disable: false 294 | 295 | ############ 296 | # Upgrading: 297 | 298 | ## Check for updates and return status in the exit code 299 | #just-check-update: false 300 | 301 | ## Check for new aider versions on launch 302 | #check-update: true 303 | 304 | ## Show release notes on first run of new version (default: None, ask user) 305 | #show-release-notes: xxx 306 | 307 | ## Install the latest version from the main branch 308 | #install-main-branch: false 309 | 310 | ## Upgrade aider to the latest version from PyPI 311 | #upgrade: false 312 | 313 | ## Show the version number and exit 314 | #version: xxx 315 | 316 | ######## 317 | # Modes: 318 | 319 | ## Specify a single message to send the LLM, process reply then exit (disables chat mode) 320 | #message: xxx 321 | 322 | ## Specify a file containing the message to send the LLM, process reply, then exit (disables chat mode) 323 | #message-file: xxx 324 | 325 | ## Run aider in your browser (default: False) 326 | #gui: false 327 | 328 | ## Enable automatic copy/paste of chat between aider and web UI (default: False) 329 | #copy-paste: false 330 | 331 | ## Apply the changes from the given file instead of running the chat (debug) 332 | #apply: xxx 333 | 334 | ## Apply clipboard contents as edits using the main model's editor format 335 | #apply-clipboard-edits: false 336 | 337 | ## Do all startup activities then exit before accepting user input (debug) 338 | #exit: false 339 | 340 | ## Print the repo map and exit (debug) 341 | #show-repo-map: false 342 | 343 | ## Print the system prompts and exit (debug) 344 | #show-prompts: false 345 | 346 | ################# 347 | # Voice settings: 348 | 349 | ## Audio format for voice recording (default: wav). webm and mp3 require ffmpeg 350 | #voice-format: wav 351 | 352 | ## Specify the language for voice using ISO 639-1 code (default: auto) 353 | #voice-language: en 354 | 355 | ## Specify the input device name for voice recording 356 | #voice-input-device: xxx 357 | 358 | ################# 359 | # Other settings: 360 | 361 | ## specify a file to edit (can be used multiple times) 362 | #file: xxx 363 | ## Specify multiple values like this: 364 | #file: 365 | # - xxx 366 | # - yyy 367 | # - zzz 368 | 369 | ## specify a read-only file (can be used multiple times) 370 | #read: xxx 371 | ## Specify multiple values like this: 372 | #read: 373 | # - xxx 374 | # - yyy 375 | # - zzz 376 | 377 | ## Use VI editing mode in the terminal (default: False) 378 | #vim: false 379 | 380 | ## Specify the language to use in the chat (default: None, uses system settings) 381 | #chat-language: xxx 382 | 383 | ## Always say yes to every confirmation 384 | yes-always: true 385 | 386 | ## Enable verbose output 387 | # verbose: false 388 | 389 | ## Load and execute /commands from a file on launch 390 | #load: xxx 391 | 392 | ## Specify the encoding for input and output (default: utf-8) 393 | #encoding: utf-8 394 | 395 | ## Line endings to use when writing files (default: platform) 396 | #line-endings: platform 397 | 398 | ## Specify the config file (default: search for .aider.conf.yml in git root, cwd or home directory) 399 | #config: xxx 400 | 401 | ## Specify the .env file to load (default: .env in git root) 402 | #env-file: .env 403 | 404 | ## Enable/disable suggesting shell commands (default: True) 405 | suggest-shell-commands: false 406 | 407 | ## Enable/disable fancy input with history and completion (default: True) 408 | #fancy-input: true 409 | 410 | ## Enable/disable multi-line input mode with Meta-Enter to submit (default: False) 411 | #multiline: false 412 | 413 | ## Enable/disable terminal bell notifications when LLM responses are ready (default: False) 414 | #notifications: false 415 | 416 | ## Specify a command to run for notifications instead of the terminal bell. If not specified, a default command for your OS may be used. 417 | #notifications-command: xxx 418 | 419 | ## Enable/disable detection and offering to add URLs to chat (default: True) 420 | detect-urls: false 421 | 422 | ## Specify which editor to use for the /editor command 423 | #editor: xxx 424 | 425 | ############################ 426 | # Deprecated model settings: 427 | 428 | ## Use claude-3-opus-20240229 model for the main chat (deprecated, use --model) 429 | #opus: false 430 | 431 | ## Use anthropic/claude-3-7-sonnet-20250219 model for the main chat (deprecated, use --model) 432 | #sonnet: false 433 | 434 | ## Use claude-3-5-haiku-20241022 model for the main chat (deprecated, use --model) 435 | #haiku: false 436 | 437 | ## Use gpt-4-0613 model for the main chat (deprecated, use --model) 438 | #4: false 439 | 440 | ## Use gpt-4o model for the main chat (deprecated, use --model) 441 | #4o: false 442 | 443 | ## Use gpt-4o-mini model for the main chat (deprecated, use --model) 444 | #mini: false 445 | 446 | ## Use gpt-4-1106-preview model for the main chat (deprecated, use --model) 447 | #4-turbo: false 448 | 449 | ## Use gpt-3.5-turbo model for the main chat (deprecated, use --model) 450 | #35turbo: false 451 | 452 | ## Use deepseek/deepseek-chat model for the main chat (deprecated, use --model) 453 | #deepseek: false 454 | 455 | ## Use o1-mini model for the main chat (deprecated, use --model) 456 | #o1-mini: false 457 | 458 | ## Use o1-preview model for the main chat (deprecated, use --model) 459 | #o1-preview: false 460 | -------------------------------------------------------------------------------- /.claude/commands/prime.md: -------------------------------------------------------------------------------- 1 | # Context Prime 2 | > Follow the instructions precisely. If it wasn't specified, don't do it. 3 | 4 | ## RUN the following commands: 5 | 6 | `eza . --tree --git-ignore` 7 | 8 | ## PARALLEL READ the following files: 9 | 10 | README.md 11 | aider_is_*.py 12 | claude_code_*.py -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | # API Keys for Various Services 2 | # Replace these values with your actual API keys 3 | 4 | # Notion API key for internal integration 5 | NOTION_INTERNAL_INTEGRATION_SECRET=your_notion_integration_secret 6 | 7 | # Anthropic API key for Claude models 8 | ANTHROPIC_API_KEY=your_anthropic_api_key 9 | 10 | # OpenAI API key for GPT models 11 | OPENAI_API_KEY=your_openai_api_key -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Nuxt dev/build outputs 2 | .output 3 | .data 4 | .nuxt 5 | .nitro 6 | .cache 7 | dist 8 | 9 | # Node dependencies 10 | node_modules 11 | 12 | # Logs 13 | logs 14 | *.log 15 | 16 | # Misc 17 | .DS_Store 18 | .fleet 19 | .idea 20 | 21 | # Local env files 22 | .env 23 | .env 24 | !.env.example 25 | .aider* 26 | !.aider.conf.yml 27 | 28 | package-lock.json 29 | 30 | hello.js 31 | .mcp.json 32 | 33 | specs/ 34 | 35 | **/.claude/settings.local.json 36 | 37 | output/ -------------------------------------------------------------------------------- /.mcp.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "mcpServers": { 3 | "notionApi": { 4 | "command": "npx", 5 | "args": ["-y", "@notionhq/notion-mcp-server"], 6 | "env": { 7 | "OPENAPI_MCP_HEADERS": "{\"Authorization\": \"Bearer ntn_****\", \"Notion-Version\": \"2022-06-28\" }" 8 | } 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Claude Code is Programmable 2 | 3 | This repository demonstrates how to use Claude Code programmatically, showcasing examples in different programming languages. Watch [this video](https://youtu.be/2TIXl2rlA6Q) to understand why this is so important for next generation engineering. Check out the [voice to Claude Code](https://youtu.be/LvkZuY7rJOM) video to see how to use the `voice_to_claude_code.py` script. 4 | 5 | Voice to Claude Code 6 | 7 | Claude Code is Programmable 8 | 9 | ## Quick Start 10 | 11 | First off - run these right away to understand how important this is: 12 | 13 | ```bash 14 | # Claude Code example (with only Write and Edit tools allowed) 15 | claude -p "make a hello.js script that prints hello" --allowedTools "Write" "Edit" 16 | 17 | # Aider equivalent example 18 | aider --message "make a hello.js script that prints hello" hello.js 19 | ``` 20 | 21 | Here's the big trick - with Claude Code, you can call ANY TOOL IN ANY ORDER IN NATURAL LANGUAGE. 22 | 23 | Check out the other examples in the repo to understand how to scale your impact with this feature. 24 | 25 | Watch [this video](https://youtu.be/2TIXl2rlA6Q) to internalize how important this is for next generation engineering. View the brief anthropic documentation [here](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/tutorials#use-claude-as-a-unix-style-utility) and a more comprehensive write up on [agentic coding here](https://www.anthropic.com/engineering/claude-code-best-practices). The Claude Code team is doing incredible work. 26 | 27 | You can also use [Aider](https://aider.chat/) as a programmable ai coding tool to do similar things although it's strictly limited to AI Coding (which is still incredibly useful). Check out the documentation [here](https://aider.chat/docs/scripting.html). 28 | 29 | ## Setup 30 | 31 | ### Configuration Files 32 | 33 | 1. **MCP (Multi-call Protocol) Configuration** 34 | - Copy the sample configuration file to create your own: 35 | ```bash 36 | cp .mcp.sample.json .mcp.json 37 | ``` 38 | - Edit `.mcp.json` to add your Notion API key in the `OPENAPI_MCP_HEADERS` section: 39 | ```json 40 | { 41 | "mcpServers": { 42 | "notionApi": { 43 | "command": "npx", 44 | "args": ["-y", "@notionhq/notion-mcp-server"], 45 | "env": { 46 | "OPENAPI_MCP_HEADERS": "{\"Authorization\": \"Bearer YOUR_NOTION_API_KEY\", \"Notion-Version\": \"2022-06-28\" }" 47 | } 48 | } 49 | } 50 | } 51 | ``` 52 | 53 | 2. **Environment Variables** 54 | - Copy the sample environment file: 55 | ```bash 56 | cp .env.sample .env 57 | ``` 58 | - Add the following API keys to your `.env` file: 59 | ``` 60 | NOTION_INTERNAL_INTEGRATION_SECRET=your_notion_integration_secret 61 | ANTHROPIC_API_KEY=your_anthropic_api_key 62 | OPENAI_API_KEY=your_openai_api_key 63 | ``` 64 | - Note: The voice_to_claude_code.py script specifically requires both ANTHROPIC_API_KEY and OPENAI_API_KEY to be set. 65 | - The anthropic_search.py script requires ANTHROPIC_API_KEY to be set. 66 | 67 | ## File Descriptions 68 | 69 | ### Shell Scripts 70 | - `claude_code_is_programmable_1.sh`: Simple shell script that uses Claude Code's CLI to generate a basic "hello.js" script with limited allowed tools. 71 | ```bash 72 | sh claude_code_is_programmable_1.sh 73 | ``` 74 | - `aider_is_programmable_1.sh`: Similar script using Aider to create a "hello.js" file. 75 | ```bash 76 | sh aider_is_programmable_1.sh 77 | ``` 78 | - `reset.sh`: Utility script to clean up branches and directories created by the demo scripts. 79 | ```bash 80 | sh reset.sh 81 | ``` 82 | 83 | ### Python Files 84 | - `claude_code_is_programmable_2.py`: Python script that executes Claude Code to create a TypeScript CLI todo app, with permissions for Edit, Replace, Bash, and Create tools. 85 | ```bash 86 | uv run claude_code_is_programmable_2.py 87 | ``` 88 | - `claude_code_is_programmable_3.py`: Advanced Python script integrating Claude Code with Notion API for todo management, including rich console output and streaming results. Requires a Notion page name as an argument. 89 | ```bash 90 | uv run claude_code_is_programmable_3.py "My Notion Page" 91 | ``` 92 | - `aider_is_programmable_2.py`: Python script that uses Aider to create a TypeScript todo application with git operations. 93 | ```bash 94 | uv run aider_is_programmable_2.py 95 | ``` 96 | - `anthropic_search.py`: A self-contained Python script for searching the web using Anthropic's Claude AI with web search capabilities. 97 | ```bash 98 | ./anthropic_search.py "your search query" 99 | ``` 100 | 101 | ### JavaScript Files 102 | - `claude_code_is_programmable_2.js`: JavaScript version of the Claude Code script that creates a TypeScript todo app, with permissions for Edit, Replace, Bash, and Create tools. 103 | ```bash 104 | bun claude_code_is_programmable_2.js 105 | ``` 106 | - `aider_is_programmable_2.js`: JavaScript version of the Aider script for creating a TypeScript todo app with git operations. 107 | ```bash 108 | bun aider_is_programmable_2.js 109 | ``` 110 | 111 | ### Voice to Claude Code 112 | - `voice_to_claude_code.py`: A voice-enabled Claude Code assistant that allows you to interact with Claude Code using speech commands. Combines RealtimeSTT for speech recognition and OpenAI TTS for speech output. 113 | ```bash 114 | uv run voice_to_claude_code.py 115 | 116 | # With a specific conversation ID 117 | uv run voice_to_claude_code.py --id "my-chat-id" 118 | 119 | # With an initial prompt 120 | uv run voice_to_claude_code.py --prompt "create a hello world script" 121 | 122 | # With both ID and prompt 123 | uv run voice_to_claude_code.py --id "my-chat-id" --prompt "create a hello world script" 124 | ``` 125 | 126 | ### Bonus Directory 127 | - `starter_notion_agent.py`: A starter template for creating a Notion agent using the OpenAI Agent SDK. 128 | ```bash 129 | uv run bonus/starter_notion_agent.py 130 | ``` 131 | - `claude_code_inside_openai_agent_sdk_4_bonus.py`: An advanced implementation that integrates Claude Code within the OpenAI Agent SDK. Requires a Notion page name as an argument. 132 | ```bash 133 | uv run bonus/claude_code_inside_openai_agent_sdk_4_bonus.py "My Notion Page" 134 | ``` 135 | 136 | ## Core Tools Available in Claude Code 137 | 138 | - Task: Launch an agent to perform complex tasks 139 | - Bash: Execute bash commands in a shell 140 | - Batch: Run multiple tools in parallel 141 | - Glob: Find files matching patterns 142 | - Grep: Search file contents with regex 143 | - LS: **List** directory contents 144 | - Read: Read file contents 145 | - Edit: Make targeted edits to files 146 | - Write: Create or overwrite files 147 | - NotebookRead/Edit: Work with Jupyter notebooks 148 | - WebFetch: Get content from websites 149 | 150 | ## Claude Code response formats 151 | 152 | ```sh 153 | claude -p 'hello, run git ls-files, how many files are in the current directory' --output-format text > test.txt 154 | claude -p 'hello, run git ls-files, how many files are in the current directory' --output-format json > test.json 155 | claude -p --continue 'hello, run git ls-files, how many files are in the current directory' --output-format stream-json > test.stream.json 156 | ``` 157 | 158 | ## Anthropic Web Search Tool 159 | > See the [anthropic_search.py](anthropic_search.py) file for more details. 160 | 161 | A command-line utility for searching the web using Anthropic's Claude AI with their web search tool capability. 162 | 163 | ### Prerequisites 164 | 165 | - Python 3.8+ 166 | - UV package manager (`pip install uv`) 167 | - Anthropic API key 168 | 169 | ### Setup 170 | 171 | Make the script executable: 172 | ``` 173 | chmod +x anthropic_search.py 174 | ``` 175 | 176 | ### Usage 177 | 178 | Basic search: 179 | ``` 180 | ./anthropic_search.py "your search query" 181 | ``` 182 | 183 | With domain filtering (only include results from these domains): 184 | ``` 185 | ./anthropic_search.py "javascript best practices" --domains "developer.mozilla.org,javascript.info" 186 | ``` 187 | 188 | Block specific domains: 189 | ``` 190 | ./anthropic_search.py "climate change" --blocked "unreliablesource.com,fakenews.org" 191 | ``` 192 | 193 | With location context: 194 | ``` 195 | ./anthropic_search.py "local restaurants" --location "US,California,San Francisco" --timezone "America/Los_Angeles" 196 | ``` 197 | 198 | Increase maximum searches: 199 | ``` 200 | ./anthropic_search.py "complex research topic" --max-uses 5 201 | ``` 202 | 203 | Use a different Claude model: 204 | ``` 205 | ./anthropic_search.py "your query" --model "claude-3-5-sonnet-latest" 206 | ``` 207 | 208 | ### Output 209 | 210 | The script produces: 211 | 1. The search query used 212 | 2. Claude's response with inline citations marked as [1], [2], etc. 213 | 3. A list of sources at the end, numbered to match the citations 214 | 4. Usage information showing how many web searches were performed 215 | 216 | ### Notes 217 | 218 | - Web search is available on Claude 3.7 Sonnet, Claude 3.5 Sonnet, and Claude 3.5 Haiku 219 | - Each search counts as one use, regardless of the number of results returned 220 | - Searches cost $10 per 1,000 searches, plus standard token costs for search-generated content 221 | - Domain filtering doesn't need https:// prefixes and automatically includes subdomains 222 | 223 | Built with ❤️ by [IndyDevDan](https://www.youtube.com/@indydevdan) with [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview), and [Principled AI Coding](https://agenticengineer.com/principled-ai-coding) 224 | 225 | -------------------------------------------------------------------------------- /ai_docs/anthropic_web_search_tool.md: -------------------------------------------------------------------------------- 1 | # Web Search Tool 2 | 3 | The web search tool gives Claude direct access to real-time web content, allowing it to answer questions with up-to-date information beyond its knowledge cutoff. Claude automatically cites sources from search results as part of its answer. 4 | 5 | ## Supported Models 6 | 7 | Web search is available on: 8 | 9 | - Claude 3.7 Sonnet (`claude-3-7-sonnet-20250219`) 10 | - Claude 3.5 Sonnet (new) (`claude-3-5-sonnet-latest`) 11 | - Claude 3.5 Haiku (`claude-3-5-haiku-latest`) 12 | 13 | ## How Web Search Works 14 | 15 | When you add the web search tool to your API request: 16 | 17 | 1. Claude decides when to search based on the prompt. 18 | 2. The API executes the searches and provides Claude with the results. This process may repeat multiple times throughout a single request. 19 | 3. At the end of its turn, Claude provides a final response with cited sources. 20 | 21 | ## How to Use Web Search 22 | 23 | Your organization's administrator must enable web search in [Console](https://console.anthropic.com/settings/privacy). 24 | 25 | Provide the web search tool in your API request: 26 | 27 | ```bash 28 | curl https://api.anthropic.com/v1/messages \ 29 | --header "x-api-key: $ANTHROPIC_API_KEY" \ 30 | --header "anthropic-version: 2023-06-01" \ 31 | --header "content-type: application/json" \ 32 | --data '{ 33 | "model": "claude-3-7-sonnet-latest", 34 | "max_tokens": 1024, 35 | "messages": [ 36 | { 37 | "role": "user", 38 | "content": "How do I update a web app to TypeScript 5.5?" 39 | } 40 | ], 41 | "tools": [{ 42 | "type": "web_search_20250305", 43 | "name": "web_search", 44 | "max_uses": 5 45 | }] 46 | }' 47 | ``` 48 | 49 | ### Tool Definition 50 | 51 | The web search tool supports the following parameters: 52 | 53 | ```json 54 | { 55 | "type": "web_search_20250305", 56 | "name": "web_search", 57 | 58 | // Optional: Limit the number of searches per request 59 | "max_uses": 5, 60 | 61 | // Optional: Only include results from these domains 62 | "allowed_domains": ["example.com", "trusteddomain.org"], 63 | 64 | // Optional: Never include results from these domains 65 | "blocked_domains": ["untrustedsource.com"], 66 | 67 | // Optional: Localize search results 68 | "user_location": { 69 | "type": "approximate", 70 | "city": "San Francisco", 71 | "region": "California", 72 | "country": "US", 73 | "timezone": "America/Los_Angeles" 74 | } 75 | } 76 | ``` 77 | 78 | #### Max Uses 79 | 80 | The `max_uses` parameter limits the number of searches performed. If Claude attempts more searches than allowed, the `web_search_tool_result` will be an error with the `max_uses_exceeded` error code. 81 | 82 | #### Domain Filtering 83 | 84 | When using domain filters: 85 | 86 | - Domains should not include the HTTP/HTTPS scheme (use `example.com` instead of `https://example.com`) 87 | - Subdomains are automatically included (`example.com` covers `docs.example.com`) 88 | - Subpaths are supported (`example.com/blog`) 89 | - You can use either `allowed_domains` or `blocked_domains`, but not both in the same request. 90 | 91 | #### Localization 92 | 93 | The `user_location` parameter allows you to localize search results based on a user's location. 94 | 95 | - `type`: The type of location (must be `approximate`) 96 | - `city`: The city name 97 | - `region`: The region or state 98 | - `country`: The country 99 | - `timezone`: The [IANA timezone ID](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). 100 | 101 | ### Response Structure 102 | 103 | Here's an example response structure: 104 | 105 | ```json 106 | { 107 | "role": "assistant", 108 | "content": [ 109 | // 1. Claude's decision to search 110 | { 111 | "type": "text", 112 | "text": "I'll search for when Claude Shannon was born." 113 | }, 114 | // 2. The search query used 115 | { 116 | "type": "server_tool_use", 117 | "id": "srvtoolu_01WYG3ziw53XMcoyKL4XcZmE", 118 | "name": "web_search", 119 | "input": { 120 | "query": "claude shannon birth date" 121 | } 122 | }, 123 | // 3. Search results 124 | { 125 | "type": "web_search_tool_result", 126 | "tool_use_id": "srvtoolu_01WYG3ziw53XMcoyKL4XcZmE", 127 | "content": [ 128 | { 129 | "type": "web_search_result", 130 | "url": "https://en.wikipedia.org/wiki/Claude_Shannon", 131 | "title": "Claude Shannon - Wikipedia", 132 | "encrypted_content": "EqgfCioIARgBIiQ3YTAwMjY1Mi1mZjM5LTQ1NGUtODgxNC1kNjNjNTk1ZWI3Y...", 133 | "page_age": "April 30, 2025" 134 | } 135 | ] 136 | }, 137 | { 138 | "text": "Based on the search results, ", 139 | "type": "text" 140 | }, 141 | // 4. Claude's response with citations 142 | { 143 | "text": "Claude Shannon was born on April 30, 1916, in Petoskey, Michigan", 144 | "type": "text", 145 | "citations": [ 146 | { 147 | "type": "web_search_result_location", 148 | "url": "https://en.wikipedia.org/wiki/Claude_Shannon", 149 | "title": "Claude Shannon - Wikipedia", 150 | "encrypted_index": "Eo8BCioIAhgBIiQyYjQ0OWJmZi1lNm..", 151 | "cited_text": "Claude Elwood Shannon (April 30, 1916 – February 24, 2001) was an American mathematician, electrical engineer, computer scientist, cryptographer and i..." 152 | } 153 | ] 154 | } 155 | ], 156 | "id": "msg_a930390d3a", 157 | "usage": { 158 | "input_tokens": 6039, 159 | "output_tokens": 931, 160 | "server_tool_use": { 161 | "web_search_requests": 1 162 | } 163 | }, 164 | "stop_reason": "end_turn" 165 | } 166 | ``` 167 | 168 | #### Search Results 169 | 170 | Search results include: 171 | 172 | - `url`: The URL of the source page 173 | - `title`: The title of the source page 174 | - `page_age`: When the site was last updated 175 | - `encrypted_content`: Encrypted content that must be passed back in multi-turn conversations for citations 176 | 177 | #### Citations 178 | 179 | Citations are always enabled for web search, and each `web_search_result_location` includes: 180 | 181 | - `url`: The URL of the cited source 182 | - `title`: The title of the cited source 183 | - `encrypted_index`: A reference that must be passed back for multi-turn conversations. 184 | - `cited_text`: Up to 150 characters of the cited content 185 | 186 | The web search citation fields `cited_text`, `title`, and `url` do not count towards input or output token usage. 187 | 188 | #### Errors 189 | 190 | If an error occurs during web search, you'll receive a response that takes the following form: 191 | 192 | ```json 193 | { 194 | "type": "web_search_tool_result", 195 | "tool_use_id": "servertoolu_a93jad", 196 | "content": { 197 | "type": "web_search_tool_result_error", 198 | "error_code": "max_uses_exceeded" 199 | } 200 | } 201 | ``` 202 | 203 | These are the possible error codes: 204 | 205 | - `too_many_requests`: Rate limit exceeded 206 | - `invalid_input`: Invalid search query parameter 207 | - `max_uses_exceeded`: Maximum web search tool uses exceeded 208 | - `query_too_long`: Query exceeds maximum length 209 | - `unavailable`: An internal error occurred 210 | 211 | #### `pause_turn` Stop Reason 212 | 213 | The response may include a `pause_turn` stop reason, which indicates that the API paused a long-running turn. You may provide the response back as-is in a subsequent request to let Claude continue its turn, or modify the content if you wish to interrupt the conversation. 214 | 215 | ## Prompt Caching 216 | 217 | Web search works with [prompt caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching). To enable prompt caching, add at least one `cache_control` breakpoint in your request. The system will automatically cache up until the last `web_search_tool_result` block when executing the tool. 218 | 219 | For multi-turn conversations, set a `cache_control` breakpoint on or after the last `web_search_tool_result` block to reuse cached content. 220 | 221 | ## Streaming 222 | 223 | With streaming enabled, you'll receive search events as part of the stream. There will be a pause while the search executes. 224 | 225 | ## Batch Requests 226 | 227 | You can include the web search tool in the [Messages Batches API](https://docs.anthropic.com/en/docs/build-with-claude/batch-processing). Web search tool calls through the Messages Batches API are priced the same as those in regular Messages API requests. 228 | 229 | ## Usage and Pricing 230 | 231 | Web search usage is charged in addition to token usage: 232 | 233 | ```json 234 | "usage": { 235 | "input_tokens": 105, 236 | "output_tokens": 6039, 237 | "cache_read_input_tokens": 7123, 238 | "cache_creation_input_tokens": 7345, 239 | "server_tool_use": { 240 | "web_search_requests": 1 241 | } 242 | } 243 | ``` 244 | 245 | Web search is available on the Anthropic API for $10 per 1,000 searches, plus standard token costs for search-generated content. Web search results retrieved throughout a conversation are counted as input tokens, in search iterations executed during a single turn and in subsequent conversation turns. 246 | 247 | Each web search counts as one use, regardless of the number of results returned. If an error occurs during web search, the web search will not be billed. -------------------------------------------------------------------------------- /ai_docs/claude-code-tutorials.md: -------------------------------------------------------------------------------- 1 | # Claude Code Tutorials 2 | 3 | This document provides step-by-step tutorials for common workflows with Claude Code, extracted from [Anthropic's official documentation](https://docs.anthropic.com/en/docs/claude-code/tutorials). 4 | 5 | ## Table of contents 6 | 7 | - [Resume previous conversations](#resume-previous-conversations) 8 | - [Understand new codebases](#understand-new-codebases) 9 | - [Fix bugs efficiently](#fix-bugs-efficiently) 10 | - [Refactor code](#refactor-code) 11 | - [Work with tests](#work-with-tests) 12 | - [Create pull requests](#create-pull-requests) 13 | - [Handle documentation](#handle-documentation) 14 | - [Work with images](#work-with-images) 15 | - [Use extended thinking](#use-extended-thinking) 16 | - [Set up project memory](#set-up-project-memory) 17 | - [Set up Model Context Protocol (MCP)](#set-up-model-context-protocol-mcp) 18 | - [Use Claude as a unix-style utility](#use-claude-as-a-unix-style-utility) 19 | - [Create custom slash commands](#create-custom-slash-commands) 20 | - [Run parallel Claude Code sessions with Git worktrees](#run-parallel-claude-code-sessions-with-git-worktrees) 21 | 22 | ## Resume previous conversations 23 | 24 | ### Continue your work seamlessly 25 | 26 | **When to use:** You've been working on a task with Claude Code and need to continue where you left off in a later session. 27 | 28 | Claude Code provides two options for resuming previous conversations: 29 | 30 | - `--continue` to automatically continue the most recent conversation 31 | - `--resume` to display a conversation picker 32 | 33 | 1. Continue the most recent conversation: 34 | ```bash 35 | claude --continue 36 | ``` 37 | 38 | 2. Continue in non-interactive mode: 39 | ```bash 40 | claude --continue --print "Continue with my task" 41 | ``` 42 | 43 | 3. Show conversation picker: 44 | ```bash 45 | claude --resume 46 | ``` 47 | 48 | **Tips:** 49 | - Conversation history is stored locally on your machine 50 | - Use `--continue` for quick access to your most recent conversation 51 | - Use `--resume` when you need to select a specific past conversation 52 | - When resuming, you'll see the entire conversation history before continuing 53 | - The resumed conversation starts with the same model and configuration as the original 54 | 55 | ## Understand new codebases 56 | 57 | ### Get a quick codebase overview 58 | 59 | **When to use:** You've just joined a new project and need to understand its structure quickly. 60 | 61 | 1. Navigate to the project root directory: 62 | ```bash 63 | cd /path/to/project 64 | ``` 65 | 66 | 2. Start Claude Code: 67 | ```bash 68 | claude 69 | ``` 70 | 71 | 3. Ask for a high-level overview: 72 | ``` 73 | > give me an overview of this codebase 74 | ``` 75 | 76 | 4. Dive deeper into specific components: 77 | ``` 78 | > explain the main architecture patterns used here 79 | > what are the key data models? 80 | > how is authentication handled? 81 | ``` 82 | 83 | **Tips:** 84 | - Start with broad questions, then narrow down to specific areas 85 | - Ask about coding conventions and patterns used in the project 86 | - Request a glossary of project-specific terms 87 | 88 | ### Find relevant code 89 | 90 | **When to use:** You need to locate code related to a specific feature or functionality. 91 | 92 | 1. Ask Claude to find relevant files: 93 | ``` 94 | > find the files that handle user authentication 95 | ``` 96 | 97 | 2. Get context on how components interact: 98 | ``` 99 | > how do these authentication files work together? 100 | ``` 101 | 102 | 3. Understand the execution flow: 103 | ``` 104 | > trace the login process from front-end to database 105 | ``` 106 | 107 | ## Fix bugs efficiently 108 | 109 | ### Diagnose error messages 110 | 111 | **When to use:** You've encountered an error message and need to find and fix its source. 112 | 113 | 1. Share the error with Claude: 114 | ``` 115 | > I'm seeing an error when I run npm test 116 | ``` 117 | 118 | 2. Ask for fix recommendations: 119 | ``` 120 | > suggest a few ways to fix the @ts-ignore in user.ts 121 | ``` 122 | 123 | 3. Apply the fix: 124 | ``` 125 | > update user.ts to add the null check you suggested 126 | ``` 127 | 128 | **Tips:** 129 | - Tell Claude the command to reproduce the issue and get a stack trace 130 | - Mention any steps to reproduce the error 131 | - Let Claude know if the error is intermittent or consistent 132 | 133 | ## Refactor code 134 | 135 | ### Modernize legacy code 136 | 137 | **When to use:** You need to update old code to use modern patterns and practices. 138 | 139 | 1. Identify legacy code for refactoring: 140 | ``` 141 | > find deprecated API usage in our codebase 142 | ``` 143 | 144 | 2. Get refactoring recommendations: 145 | ``` 146 | > suggest how to refactor utils.js to use modern JavaScript features 147 | ``` 148 | 149 | 3. Apply the changes safely: 150 | ``` 151 | > refactor utils.js to use ES2024 features while maintaining the same behavior 152 | ``` 153 | 154 | 4. Verify the refactoring: 155 | ``` 156 | > run tests for the refactored code 157 | ``` 158 | 159 | **Tips:** 160 | - Ask Claude to explain the benefits of the modern approach 161 | - Request that changes maintain backward compatibility when needed 162 | - Do refactoring in small, testable increments 163 | 164 | ## Work with tests 165 | 166 | ### Add test coverage 167 | 168 | **When to use:** You need to add tests for uncovered code. 169 | 170 | 1. Identify untested code: 171 | ``` 172 | > find functions in NotificationsService.swift that are not covered by tests 173 | ``` 174 | 175 | 2. Generate test scaffolding: 176 | ``` 177 | > add tests for the notification service 178 | ``` 179 | 180 | 3. Add meaningful test cases: 181 | ``` 182 | > add test cases for edge conditions in the notification service 183 | ``` 184 | 185 | 4. Run and verify tests: 186 | ``` 187 | > run the new tests and fix any failures 188 | ``` 189 | 190 | **Tips:** 191 | - Ask for tests that cover edge cases and error conditions 192 | - Request both unit and integration tests when appropriate 193 | - Have Claude explain the testing strategy 194 | 195 | ## Create pull requests 196 | 197 | ### Generate comprehensive PRs 198 | 199 | **When to use:** You need to create a well-documented pull request for your changes. 200 | 201 | 1. Summarize your changes: 202 | ``` 203 | > summarize the changes I've made to the authentication module 204 | ``` 205 | 206 | 2. Generate a PR with Claude: 207 | ``` 208 | > create a pr 209 | ``` 210 | 211 | 3. Review and refine: 212 | ``` 213 | > enhance the PR description with more context about the security improvements 214 | ``` 215 | 216 | 4. Add testing details: 217 | ``` 218 | > add information about how these changes were tested 219 | ``` 220 | 221 | **Tips:** 222 | - Ask Claude directly to make a PR for you 223 | - Review Claude's generated PR before submitting 224 | - Ask Claude to highlight potential risks or considerations 225 | 226 | ## Handle documentation 227 | 228 | ### Generate code documentation 229 | 230 | **When to use:** You need to add or update documentation for your code. 231 | 232 | 1. Identify undocumented code: 233 | ``` 234 | > find functions without proper JSDoc comments in the auth module 235 | ``` 236 | 237 | 2. Generate documentation: 238 | ``` 239 | > add JSDoc comments to the undocumented functions in auth.js 240 | ``` 241 | 242 | 3. Review and enhance: 243 | ``` 244 | > improve the generated documentation with more context and examples 245 | ``` 246 | 247 | 4. Verify documentation: 248 | ``` 249 | > check if the documentation follows our project standards 250 | ``` 251 | 252 | **Tips:** 253 | - Specify the documentation style you want (JSDoc, docstrings, etc.) 254 | - Ask for examples in the documentation 255 | - Request documentation for public APIs, interfaces, and complex logic 256 | 257 | ## Work with images 258 | 259 | ### Analyze images and screenshots 260 | 261 | **When to use:** You need to work with images in your codebase or get Claude's help analyzing image content. 262 | 263 | 1. Add an image to the conversation using one of these methods: 264 | - Drag and drop an image into the Claude Code window 265 | - Copy an image and paste it into the CLI with cmd+v (on Mac) 266 | - Provide an image path: `claude "Analyze this image: /path/to/your/image.png"` 267 | 268 | 2. Ask Claude to analyze the image: 269 | ``` 270 | > What does this image show? 271 | > Describe the UI elements in this screenshot 272 | > Are there any problematic elements in this diagram? 273 | ``` 274 | 275 | 3. Use images for context: 276 | ``` 277 | > Here's a screenshot of the error. What's causing it? 278 | > This is our current database schema. How should we modify it for the new feature? 279 | ``` 280 | 281 | 4. Get code suggestions from visual content: 282 | ``` 283 | > Generate CSS to match this design mockup 284 | > What HTML structure would recreate this component? 285 | ``` 286 | 287 | **Tips:** 288 | - Use images when text descriptions would be unclear or cumbersome 289 | - Include screenshots of errors, UI designs, or diagrams for better context 290 | - You can work with multiple images in a conversation 291 | - Image analysis works with diagrams, screenshots, mockups, and more 292 | 293 | ## Use extended thinking 294 | 295 | ### Leverage Claude's extended thinking for complex tasks 296 | 297 | **When to use:** When working on complex architectural decisions, challenging bugs, or planning multi-step implementations that require deep reasoning. 298 | 299 | 1. Provide context and ask Claude to think: 300 | ``` 301 | > I need to implement a new authentication system using OAuth2 for our API. Think deeply about the best approach for implementing this in our codebase. 302 | ``` 303 | 304 | 2. Refine the thinking with follow-up prompts: 305 | ``` 306 | > think about potential security vulnerabilities in this approach 307 | > think harder about edge cases we should handle 308 | ``` 309 | 310 | **Tips to get the most value out of extended thinking:** 311 | 312 | Extended thinking is most valuable for complex tasks such as: 313 | - Planning complex architectural changes 314 | - Debugging intricate issues 315 | - Creating implementation plans for new features 316 | - Understanding complex codebases 317 | - Evaluating tradeoffs between different approaches 318 | 319 | The way you prompt for thinking results in varying levels of thinking depth: 320 | - "think" triggers basic extended thinking 321 | - intensifying phrases such as "think more", "think a lot", "think harder", or "think longer" triggers deeper thinking 322 | 323 | Claude will display its thinking process as italic gray text above the response. 324 | 325 | ## Set up project memory 326 | 327 | ### Create an effective CLAUDE.md file 328 | 329 | **When to use:** You want to set up a CLAUDE.md file to store important project information, conventions, and frequently used commands. 330 | 331 | 1. Bootstrap a CLAUDE.md for your codebase: 332 | ``` 333 | > /init 334 | ``` 335 | 336 | **Tips:** 337 | - Include frequently used commands (build, test, lint) to avoid repeated searches 338 | - Document code style preferences and naming conventions 339 | - Add important architectural patterns specific to your project 340 | - CLAUDE.md memories can be used for both instructions shared with your team and for your individual preferences 341 | 342 | ## Set up Model Context Protocol (MCP) 343 | 344 | Model Context Protocol (MCP) is an open protocol that enables LLMs to access external tools and data sources. 345 | 346 | ### Configure MCP servers 347 | 348 | **When to use:** You want to enhance Claude's capabilities by connecting it to specialized tools and external servers using the Model Context Protocol. 349 | 350 | 1. Add an MCP Stdio Server: 351 | ```bash 352 | # Basic syntax 353 | claude mcp add [args...] 354 | 355 | # Example: Adding a local server 356 | claude mcp add my-server -e API_KEY=123 -- /path/to/server arg1 arg2 357 | ``` 358 | 359 | 2. Add an MCP SSE Server: 360 | ```bash 361 | # Basic syntax 362 | claude mcp add --transport sse 363 | 364 | # Example: Adding an SSE server 365 | claude mcp add --transport sse sse-server https://example.com/sse-endpoint 366 | ``` 367 | 368 | 3. Manage your MCP servers: 369 | ```bash 370 | # List all configured servers 371 | claude mcp list 372 | 373 | # Get details for a specific server 374 | claude mcp get my-server 375 | 376 | # Remove a server 377 | claude mcp remove my-server 378 | ``` 379 | 380 | **Tips:** 381 | - Use the `-s` or `--scope` flag to specify where the configuration is stored: 382 | - `local` (default): Available only to you in the current project 383 | - `project`: Shared with everyone in the project via `.mcp.json` file 384 | - `user`: Available to you across all projects 385 | - Set environment variables with `-e` or `--env` flags (e.g., `-e KEY=value`) 386 | - Configure MCP server startup timeout using the MCP_TIMEOUT environment variable 387 | - Check MCP server status any time using the `/mcp` command within Claude Code 388 | 389 | ### Understanding MCP server scopes 390 | 391 | **When to use:** You want to understand how different MCP scopes work and how to share servers with your team. 392 | 393 | 1. Local-scoped MCP servers: 394 | ```bash 395 | # Add a local-scoped server (default) 396 | claude mcp add my-private-server /path/to/server 397 | 398 | # Explicitly specify local scope 399 | claude mcp add my-private-server -s local /path/to/server 400 | ``` 401 | 402 | 2. Project-scoped MCP servers (.mcp.json): 403 | ```bash 404 | # Add a project-scoped server 405 | claude mcp add shared-server -s project /path/to/server 406 | ``` 407 | 408 | 3. User-scoped MCP servers: 409 | ```bash 410 | # Add a user server 411 | claude mcp add my-user-server -s user /path/to/server 412 | ``` 413 | 414 | **Tips:** 415 | - Local-scoped servers take precedence over project-scoped and user-scoped servers with the same name 416 | - Project-scoped servers (in `.mcp.json`) take precedence over user-scoped servers with the same name 417 | - Before using project-scoped servers from `.mcp.json`, Claude Code will prompt you to approve them for security 418 | - The `.mcp.json` file is intended to be checked into version control to share MCP servers with your team 419 | 420 | ## Use Claude as a unix-style utility 421 | 422 | ### Add Claude to your verification process 423 | 424 | **When to use:** You want to use Claude Code as a linter or code reviewer. 425 | 426 | Add Claude to your build script: 427 | ```json 428 | // package.json 429 | { 430 | ... 431 | "scripts": { 432 | ... 433 | "lint:claude": "claude -p 'you are a linter. please look at the changes vs. main and report any issues related to typos. report the filename and line number on one line, and a description of the issue on the second line. do not return any other text.'" 434 | } 435 | } 436 | ``` 437 | 438 | ### Pipe in, pipe out 439 | 440 | **When to use:** You want to pipe data into Claude, and get back data in a structured format. 441 | 442 | Pipe data through Claude: 443 | ```bash 444 | cat build-error.txt | claude -p 'concisely explain the root cause of this build error' > output.txt 445 | ``` 446 | 447 | ### Control output format 448 | 449 | **When to use:** You need Claude's output in a specific format, especially when integrating Claude Code into scripts or other tools. 450 | 451 | 1. Use text format (default): 452 | ```bash 453 | cat data.txt | claude -p 'summarize this data' --output-format text > summary.txt 454 | ``` 455 | 456 | 2. Use JSON format: 457 | ```bash 458 | cat code.py | claude -p 'analyze this code for bugs' --output-format json > analysis.json 459 | ``` 460 | 461 | 3. Use streaming JSON format: 462 | ```bash 463 | cat log.txt | claude -p 'parse this log file for errors' --output-format stream-json 464 | ``` 465 | 466 | **Tips:** 467 | - Use `--output-format text` for simple integrations where you just need Claude's response 468 | - Use `--output-format json` when you need the full conversation log 469 | - Use `--output-format stream-json` for real-time output of each conversation turn 470 | 471 | ## Create custom slash commands 472 | 473 | Claude Code supports custom slash commands that you can create to quickly execute specific prompts or tasks. 474 | 475 | ### Create project-specific commands 476 | 477 | **When to use:** You want to create reusable slash commands for your project that all team members can use. 478 | 479 | 1. Create a commands directory in your project: 480 | ```bash 481 | mkdir -p .claude/commands 482 | ``` 483 | 484 | 2. Create a Markdown file for each command: 485 | ```bash 486 | echo "Analyze the performance of this code and suggest three specific optimizations:" > .claude/commands/optimize.md 487 | ``` 488 | 489 | 3. Use your custom command in Claude Code: 490 | ```bash 491 | claude > /project:optimize 492 | ``` 493 | 494 | **Tips:** 495 | - Command names are derived from the filename (e.g., `optimize.md` becomes `/project:optimize`) 496 | - You can organize commands in subdirectories 497 | - Project commands are available to everyone who clones the repository 498 | - The Markdown file content becomes the prompt sent to Claude when the command is invoked 499 | 500 | ### Add command arguments with $ARGUMENTS 501 | 502 | **When to use:** You want to create flexible slash commands that can accept additional input from users. 503 | 504 | 1. Create a command file with the $ARGUMENTS placeholder: 505 | ```bash 506 | echo "Find and fix issue #$ARGUMENTS. Follow these steps: 1. Understand the issue described in the ticket 2. Locate the relevant code in our codebase 3. Implement a solution that addresses the root cause 4. Add appropriate tests 5. Prepare a concise PR description" > .claude/commands/fix-issue.md 507 | ``` 508 | 509 | 2. Use the command with an issue number: 510 | ```bash 511 | claude > /project:fix-issue 123 512 | ``` 513 | 514 | ## Run parallel Claude Code sessions with Git worktrees 515 | 516 | ### Use worktrees for isolated coding environments 517 | 518 | **When to use:** You need to work on multiple tasks simultaneously with complete code isolation between Claude Code instances. 519 | 520 | 1. Create a new worktree: 521 | ```bash 522 | # Create a new worktree with a new branch 523 | git worktree add ../project-feature-a feature-a 524 | 525 | # Or create a worktree with an existing branch 526 | git worktree add ../project-bugfix bugfix-123 527 | ``` 528 | 529 | 2. Run Claude Code in each worktree: 530 | ```bash 531 | # Navigate to your worktree 532 | cd ../project-feature-a 533 | # Run Claude Code in this isolated environment 534 | claude 535 | ``` 536 | 537 | 3. In another terminal: 538 | ```bash 539 | cd ../project-bugfix 540 | claude 541 | ``` 542 | 543 | 4. Manage your worktrees: 544 | ```bash 545 | # List all worktrees 546 | git worktree list 547 | # Remove a worktree when done 548 | git worktree remove ../project-feature-a 549 | ``` 550 | 551 | **Tips:** 552 | - Each worktree has its own independent file state, making it perfect for parallel Claude Code sessions 553 | - Changes made in one worktree won't affect others, preventing Claude instances from interfering with each other 554 | - All worktrees share the same Git history and remote connections 555 | - For long-running tasks, you can have Claude working in one worktree while you continue development in another -------------------------------------------------------------------------------- /ai_docs/claude_code_best_practices.md: -------------------------------------------------------------------------------- 1 | Claude Code: Best practices for agentic coding 2 | Published Apr 18, 2025 3 | 4 | Claude Code is a command line tool for agentic coding. This post covers tips and tricks that have proven effective for using Claude Code across various codebases, languages, and environments. 5 | 6 | We recently released Claude Code, a command line tool for agentic coding. Developed as a research project, Claude Code gives Anthropic engineers and researchers a more native way to integrate Claude into their coding workflows. 7 | 8 | Claude Code is intentionally low-level and unopinionated, providing close to raw model access without forcing specific workflows. This design philosophy creates a flexible, customizable, scriptable, and safe power tool. While powerful, this flexibility presents a learning curve for engineers new to agentic coding tools—at least until they develop their own best practices. 9 | 10 | This post outlines general patterns that have proven effective, both for Anthropic's internal teams and for external engineers using Claude Code across various codebases, languages, and environments. Nothing in this list is set in stone nor universally applicable; consider these suggestions as starting points. We encourage you to experiment and find what works best for you! 11 | 12 | Looking for more detailed information? Our comprehensive documentation at claude.ai/code covers all the features mentioned in this post and provides additional examples, implementation details, and advanced techniques. 13 | 14 | 15 | 1. Customize your setup 16 | Claude Code is an agentic coding assistant that automatically pulls context into prompts. This context gathering consumes time and tokens, but you can optimize it through environment tuning. 17 | 18 | a. Create CLAUDE.md files 19 | CLAUDE.md is a special file that Claude automatically pulls into context when starting a conversation. This makes it an ideal place for documenting: 20 | 21 | Common bash commands 22 | Core files and utility functions 23 | Code style guidelines 24 | Testing instructions 25 | Repository etiquette (e.g., branch naming, merge vs. rebase, etc.) 26 | Developer environment setup (e.g., pyenv use, which compilers work) 27 | Any unexpected behaviors or warnings particular to the project 28 | Other information you want Claude to remember 29 | There’s no required format for CLAUDE.md files. We recommend keeping them concise and human-readable. For example: 30 | 31 | # Bash commands 32 | - npm run build: Build the project 33 | - npm run typecheck: Run the typechecker 34 | 35 | # Code style 36 | - Use ES modules (import/export) syntax, not CommonJS (require) 37 | - Destructure imports when possible (eg. import { foo } from 'bar') 38 | 39 | # Workflow 40 | - Be sure to typecheck when you’re done making a series of code changes 41 | - Prefer running single tests, and not the whole test suite, for performance 42 | 43 | Copy 44 | You can place CLAUDE.md files in several locations: 45 | 46 | The root of your repo, or wherever you run claude from (the most common usage). Name it CLAUDE.md and check it into git so that you can share it across sessions and with your team (recommended), or name it CLAUDE.local.md and .gitignore it 47 | Any parent of the directory where you run claude. This is most useful for monorepos, where you might run claude from root/foo, and have CLAUDE.md files in both root/CLAUDE.md and root/foo/CLAUDE.md. Both of these will be pulled into context automatically 48 | Any child of the directory where you run claude. This is the inverse of the above, and in this case, Claude will pull in CLAUDE.md files on demand when you work with files in child directories 49 | Your home folder (~/.claude/CLAUDE.md), which applies it to all your claude sessions 50 | When you run the /init command, Claude will automatically generate a CLAUDE.md for you. 51 | 52 | b. Tune your CLAUDE.md files 53 | Your CLAUDE.md files become part of Claude’s prompts, so they should be refined like any frequently used prompt. A common mistake is adding extensive content without iterating on its effectiveness. Take time to experiment and determine what produces the best instruction following from the model. 54 | 55 | You can add content to your CLAUDE.md manually or press the # key to give Claude an instruction that it will automatically incorporate into the relevant CLAUDE.md. Many engineers use # frequently to document commands, files, and style guidelines while coding, then include CLAUDE.md changes in commits so team members benefit as well. 56 | 57 | At Anthropic, we occasionally run CLAUDE.md files through the prompt improver and often tune instructions (e.g. adding emphasis with "IMPORTANT" or "YOU MUST") to improve adherence. 58 | 59 | Claude Code tool allowlist 60 | c. Curate Claude's list of allowed tools 61 | By default, Claude Code requests permission for any action that might modify your system: file writes, many bash commands, MCP tools, etc. We designed Claude Code with this deliberately conservative approach to prioritize safety. You can customize the allowlist to permit additional tools that you know are safe, or to allow potentially unsafe tools that are easy to undo (e.g., file editing, git commit). 62 | 63 | There are four ways to manage allowed tools: 64 | 65 | Select "Always allow" when prompted during a session. 66 | Use the /allowed-tools command after starting Claude Code to add or remove tools from the allowlist. For example, you can add Edit to always allow file edits, Bash(git commit:*) to allow git commits, or mcp__puppeteer__puppeteer_navigate to allow navigating with the Puppeteer MCP server. 67 | Manually edit your .claude/settings.json or ~/.claude.json (we recommend checking the former into source control to share with your team). 68 | Use the --allowedTools CLI flag for session-specific permissions. 69 | d. If using GitHub, install the gh CLI 70 | Claude knows how to use the gh CLI to interact with GitHub for creating issues, opening pull requests, reading comments, and more. Without gh installed, Claude can still use the GitHub API or MCP server (if you have it installed). 71 | 72 | 2. Give Claude more tools 73 | Claude has access to your shell environment, where you can build up sets of convenience scripts and functions for it just like you would for yourself. It can also leverage more complex tools through MCP and REST APIs. 74 | 75 | a. Use Claude with bash tools 76 | Claude Code inherits your bash environment, giving it access to all your tools. While Claude knows common utilities like unix tools and gh, it won't know about your custom bash tools without instructions: 77 | 78 | Tell Claude the tool name with usage examples 79 | Tell Claude to run --help to see tool documentation 80 | Document frequently used tools in CLAUDE.md 81 | b. Use Claude with MCP 82 | Claude Code functions as both an MCP server and client. As a client, it can connect to any number of MCP servers to access their tools in three ways: 83 | 84 | In project config (available when running Claude Code in that directory) 85 | In global config (available in all projects) 86 | In a checked-in .mcp.json file (available to anyone working in your codebase). For example, you can add Puppeteer and Sentry servers to your .mcp.json, so that every engineer working on your repo can use these out of the box. 87 | When working with MCP, it can also be helpful to launch Claude with the --mcp-debug flag to help identify configuration issues. 88 | 89 | c. Use custom slash commands 90 | For repeated workflows—debugging loops, log analysis, etc.—store prompt templates in Markdown files within the .claude/commands folder. These become available through the slash commands menu when you type /. You can check these commands into git to make them available for the rest of your team. 91 | 92 | Custom slash commands can include the special keyword $ARGUMENTS to pass parameters from command invocation. 93 | 94 | For example, here’s a slash command that you could use to automatically pull and fix a Github issue: 95 | 96 | Please analyze and fix the GitHub issue: $ARGUMENTS. 97 | 98 | Follow these steps: 99 | 100 | 1. Use `gh issue view` to get the issue details 101 | 2. Understand the problem described in the issue 102 | 3. Search the codebase for relevant files 103 | 4. Implement the necessary changes to fix the issue 104 | 5. Write and run tests to verify the fix 105 | 6. Ensure code passes linting and type checking 106 | 7. Create a descriptive commit message 107 | 8. Push and create a PR 108 | 109 | Remember to use the GitHub CLI (`gh`) for all GitHub-related tasks. 110 | 111 | Copy 112 | Putting the above content into .claude/commands/fix-github-issue.md makes it available as the /project:fix-github-issue command in Claude Code. You could then for example use /project:fix-github-issue 1234 to have Claude fix issue #1234. Similarly, you can add your own personal commands to the ~/.claude/commands folder for commands you want available in all of your sessions. 113 | 114 | 3. Try common workflows 115 | Claude Code doesn’t impose a specific workflow, giving you the flexibility to use it how you want. Within the space this flexibility affords, several successful patterns for effectively using Claude Code have emerged across our community of users: 116 | 117 | a. Explore, plan, code, commit 118 | This versatile workflow suits many problems: 119 | 120 | Ask Claude to read relevant files, images, or URLs, providing either general pointers ("read the file that handles logging") or specific filenames ("read logging.py"), but explicitly tell it not to write any code just yet. 121 | This is the part of the workflow where you should consider strong use of subagents, especially for complex problems. Telling Claude to use subagents to verify details or investigate particular questions it might have, especially early on in a conversation or task, tends to preserve context availability without much downside in terms of lost efficiency. 122 | Ask Claude to make a plan for how to approach a specific problem. We recommend using the word "think" to trigger extended thinking mode, which gives Claude additional computation time to evaluate alternatives more thoroughly. These specific phrases are mapped directly to increasing levels of thinking budget in the system: "think" < "think hard" < "think harder" < "ultrathink." Each level allocates progressively more thinking budget for Claude to use. 123 | If the results of this step seem reasonable, you can have Claude create a document or a GitHub issue with its plan so that you can reset to this spot if the implementation (step 3) isn’t what you want. 124 | Ask Claude to implement its solution in code. This is also a good place to ask it to explicitly verify the reasonableness of its solution as it implements pieces of the solution. 125 | Ask Claude to commit the result and create a pull request. If relevant, this is also a good time to have Claude update any READMEs or changelogs with an explanation of what it just did. 126 | Steps #1-#2 are crucial—without them, Claude tends to jump straight to coding a solution. While sometimes that's what you want, asking Claude to research and plan first significantly improves performance for problems requiring deeper thinking upfront. 127 | 128 | b. Write tests, commit; code, iterate, commit 129 | This is an Anthropic-favorite workflow for changes that are easily verifiable with unit, integration, or end-to-end tests. Test-driven development (TDD) becomes even more powerful with agentic coding: 130 | 131 | Ask Claude to write tests based on expected input/output pairs. Be explicit about the fact that you’re doing test-driven development so that it avoids creating mock implementations, even for functionality that doesn’t exist yet in the codebase. 132 | Tell Claude to run the tests and confirm they fail. Explicitly telling it not to write any implementation code at this stage is often helpful. 133 | Ask Claude to commit the tests when you’re satisfied with them. 134 | Ask Claude to write code that passes the tests, instructing it not to modify the tests. Tell Claude to keep going until all tests pass. It will usually take a few iterations for Claude to write code, run the tests, adjust the code, and run the tests again. 135 | At this stage, it can help to ask it to verify with independent subagents that the implementation isn’t overfitting to the tests 136 | Ask Claude to commit the code once you’re satisfied with the changes. 137 | Claude performs best when it has a clear target to iterate against—a visual mock, a test case, or another kind of output. By providing expected outputs like tests, Claude can make changes, evaluate results, and incrementally improve until it succeeds. 138 | 139 | c. Write code, screenshot result, iterate 140 | Similar to the testing workflow, you can provide Claude with visual targets: 141 | 142 | Give Claude a way to take browser screenshots (e.g., with the Puppeteer MCP server, an iOS simulator MCP server, or manually copy / paste screenshots into Claude). 143 | Give Claude a visual mock by copying / pasting or drag-dropping an image, or giving Claude the image file path. 144 | Ask Claude to implement the design in code, take screenshots of the result, and iterate until its result matches the mock. 145 | Ask Claude to commit when you're satisfied. 146 | Like humans, Claude's outputs tend to improve significantly with iteration. While the first version might be good, after 2-3 iterations it will typically look much better. Give Claude the tools to see its outputs for best results. 147 | 148 | Safe yolo mode 149 | d. Safe YOLO mode 150 | Instead of supervising Claude, you can use claude --dangerously-skip-permissions to bypass all permission checks and let Claude work uninterrupted until completion. This works well for workflows like fixing lint errors or generating boilerplate code. 151 | 152 | Letting Claude run arbitrary commands is risky and can result in data loss, system corruption, or even data exfiltration (e.g., via prompt injection attacks). To minimize these risks, use --dangerously-skip-permissions in a container without internet access. You can follow this reference implementation using Docker Dev Containers. 153 | 154 | e. Codebase Q&A 155 | When onboarding to a new codebase, use Claude Code for learning and exploration. You can ask Claude the same sorts of questions you would ask another engineer on the project when pair programming. Claude can agentically search the codebase to answer general questions like: 156 | 157 | How does logging work? 158 | How do I make a new API endpoint? 159 | What does async move { ... } do on line 134 of foo.rs? 160 | What edge cases does CustomerOnboardingFlowImpl handle? 161 | Why are we calling foo() instead of bar() on line 333? 162 | What’s the equivalent of line 334 of baz.py in Java? 163 | At Anthropic, using Claude Code in this way has become our core onboarding workflow, significantly improving ramp-up time and reducing load on other engineers. No special prompting is required! Simply ask questions, and Claude will explore the code to find answers. 164 | 165 | Use Claude to interact with git 166 | f. Use Claude to interact with git 167 | Claude can effectively handle many git operations. Many Anthropic engineers use Claude for 90%+ of our git interactions: 168 | 169 | Searching git history to answer questions like "What changes made it into v1.2.3?", "Who owns this particular feature?", or "Why was this API designed this way?" It helps to explicitly prompt Claude to look through git history to answer queries like these. 170 | Writing commit messages. Claude will look at your changes and recent history automatically to compose a message taking all the relevant context into account 171 | Handling complex git operations like reverting files, resolving rebase conflicts, and comparing and grafting patches 172 | g. Use Claude to interact with GitHub 173 | Claude Code can manage many GitHub interactions: 174 | 175 | Creating pull requests: Claude understands the shorthand "pr" and will generate appropriate commit messages based on the diff and surrounding context. 176 | Implementing one-shot resolutions for simple code review comments: just tell it to fix comments on your PR (optionally, give it more specific instructions) and push back to the PR branch when it's done. 177 | Fixing failing builds or linter warnings 178 | Categorizing and triaging open issues by asking Claude to loop over open GitHub issues 179 | This eliminates the need to remember gh command line syntax while automating routine tasks. 180 | 181 | h. Use Claude to work with Jupyter notebooks 182 | Researchers and data scientists at Anthropic use Claude Code to read and write Jupyter notebooks. Claude can interpret outputs, including images, providing a fast way to explore and interact with data. There are no required prompts or workflows, but a workflow we recommend is to have Claude Code and a .ipynb file open side-by-side in VS Code. 183 | 184 | You can also ask Claude to clean up or make aesthetic improvements to your Jupyter notebook before you show it to colleagues. Specifically telling it to make the notebook or its data visualizations “aesthetically pleasing” tends to help remind it that it’s optimizing for a human viewing experience. 185 | 186 | 4. Optimize your workflow 187 | The suggestions below apply across all workflows: 188 | 189 | a. Be specific in your instructions 190 | Claude Code’s success rate improves significantly with more specific instructions, especially on first attempts. Giving clear directions upfront reduces the need for course corrections later. 191 | 192 | For example: 193 | 194 | Poor Good 195 | add tests for foo.py write a new test case for foo.py, covering the edge case where the user is logged out. avoid mocks 196 | why does ExecutionFactory have such a weird api? look through ExecutionFactory's git history and summarize how its api came to be 197 | add a calendar widget look at how existing widgets are implemented on the home page to understand the patterns and specifically how code and interfaces are separated out. HotDogWidget.php is a good example to start with. then, follow the pattern to implement a new calendar widget that lets the user select a month and paginate forwards/backwards to pick a year. Build from scratch without libraries other than the ones already used in the rest of the codebase. 198 | Claude can infer intent, but it can't read minds. Specificity leads to better alignment with expectations. 199 | 200 | Give Claude images 201 | b. Give Claude images 202 | Claude excels with images and diagrams through several methods: 203 | 204 | Paste screenshots (pro tip: hit cmd+ctrl+shift+4 in macOS to screenshot to clipboard and ctrl+v to paste. Note that this is not cmd+v like you would usually use to paste on mac and does not work remotely.) 205 | Drag and drop images directly into the prompt input 206 | Provide file paths for images 207 | This is particularly useful when working with design mocks as reference points for UI development, and visual charts for analysis and debugging. If you are not adding visuals to context, it can still be helpful to be clear with Claude about how important it is for the result to be visually appealing. 208 | 209 | Mention files you want Claude to look at or work on 210 | c. Mention files you want Claude to look at or work on 211 | Use tab-completion to quickly reference files or folders anywhere in your repository, helping Claude find or update the right resources. 212 | 213 | Give Claude URLs 214 | d. Give Claude URLs 215 | Paste specific URLs alongside your prompts for Claude to fetch and read. To avoid permission prompts for the same domains (e.g., docs.foo.com), use /allowed-tools to add domains to your allowlist. 216 | 217 | e. Course correct early and often 218 | While auto-accept mode (shift+tab to toggle) lets Claude work autonomously, you'll typically get better results by being an active collaborator and guiding Claude's approach. You can get the best results by thoroughly explaining the task to Claude at the beginning, but you can also course correct Claude at any time. 219 | 220 | These four tools help with course correction: 221 | 222 | Ask Claude to make a plan before coding. Explicitly tell it not to code until you’ve confirmed its plan looks good. 223 | Press Escape to interrupt Claude during any phase (thinking, tool calls, file edits), preserving context so you can redirect or expand instructions. 224 | Double-tap Escape to jump back in history, edit a previous prompt, and explore a different direction. You can edit the prompt and repeat until you get the result you're looking for. 225 | Ask Claude to undo changes, often in conjunction with option #2 to take a different approach. 226 | Though Claude Code occasionally solves problems perfectly on the first attempt, using these correction tools generally produces better solutions faster. 227 | 228 | f. Use /clear to keep context focused 229 | During long sessions, Claude's context window can fill with irrelevant conversation, file contents, and commands. This can reduce performance and sometimes distract Claude. Use the /clear command frequently between tasks to reset the context window. 230 | 231 | g. Use checklists and scratchpads for complex workflows 232 | For large tasks with multiple steps or requiring exhaustive solutions—like code migrations, fixing numerous lint errors, or running complex build scripts—improve performance by having Claude use a Markdown file (or even a GitHub issue!) as a checklist and working scratchpad: 233 | 234 | For example, to fix a large number of lint issues, you can do the following: 235 | 236 | Tell Claude to run the lint command and write all resulting errors (with filenames and line numbers) to a Markdown checklist 237 | Instruct Claude to address each issue one by one, fixing and verifying before checking it off and moving to the next 238 | h. Pass data into Claude 239 | Several methods exist for providing data to Claude: 240 | 241 | Copy and paste directly into your prompt (most common approach) 242 | Pipe into Claude Code (e.g., cat foo.txt | claude), particularly useful for logs, CSVs, and large data 243 | Tell Claude to pull data via bash commands, MCP tools, or custom slash commands 244 | Ask Claude to read files or fetch URLs (works for images too) 245 | Most sessions involve a combination of these approaches. For example, you can pipe in a log file, then tell Claude to use a tool to pull in additional context to debug the logs. 246 | 247 | 5. Use headless mode to automate your infra 248 | Claude Code includes headless mode for non-interactive contexts like CI, pre-commit hooks, build scripts, and automation. Use the -p flag with a prompt to enable headless mode, and --output-format stream-json for streaming JSON output. 249 | 250 | Note that headless mode does not persist between sessions. You have to trigger it each session. 251 | 252 | a. Use Claude for issue triage 253 | Headless mode can power automations triggered by GitHub events, such as when a new issue is created in your repository. For example, the public Claude Code repository uses Claude to inspect new issues as they come in and assign appropriate labels. 254 | 255 | b. Use Claude as a linter 256 | Claude Code can provide subjective code reviews beyond what traditional linting tools detect, identifying issues like typos, stale comments, misleading function or variable names, and more. 257 | 258 | 6. Uplevel with multi-Claude workflows 259 | Beyond standalone usage, some of the most powerful applications involve running multiple Claude instances in parallel: 260 | 261 | a. Have one Claude write code; use another Claude to verify 262 | A simple but effective approach is to have one Claude write code while another reviews or tests it. Similar to working with multiple engineers, sometimes having separate context is beneficial: 263 | 264 | Use Claude to write code 265 | Run /clear or start a second Claude in another terminal 266 | Have the second Claude review the first Claude's work 267 | Start another Claude (or /clear again) to read both the code and review feedback 268 | Have this Claude edit the code based on the feedback 269 | You can do something similar with tests: have one Claude write tests, then have another Claude write code to make the tests pass. You can even have your Claude instances communicate with each other by giving them separate working scratchpads and telling them which one to write to and which one to read from. 270 | 271 | This separation often yields better results than having a single Claude handle everything. 272 | 273 | b. Have multiple checkouts of your repo 274 | Rather than waiting for Claude to complete each step, something many engineers at Anthropic do is: 275 | 276 | Create 3-4 git checkouts in separate folders 277 | Open each folder in separate terminal tabs 278 | Start Claude in each folder with different tasks 279 | Cycle through to check progress and approve/deny permission requests 280 | c. Use git worktrees 281 | This approach shines for multiple independent tasks, offering a lighter-weight alternative to multiple checkouts. Git worktrees allow you to check out multiple branches from the same repository into separate directories. Each worktree has its own working directory with isolated files, while sharing the same Git history and reflog. 282 | 283 | Using git worktrees enables you to run multiple Claude sessions simultaneously on different parts of your project, each focused on its own independent task. For instance, you might have one Claude refactoring your authentication system while another builds a completely unrelated data visualization component. Since the tasks don't overlap, each Claude can work at full speed without waiting for the other's changes or dealing with merge conflicts: 284 | 285 | Create worktrees: git worktree add ../project-feature-a feature-a 286 | Launch Claude in each worktree: cd ../project-feature-a && claude 287 | Create additional worktrees as needed (repeat steps 1-2 in new terminal tabs) 288 | Some tips: 289 | 290 | Use consistent naming conventions 291 | Maintain one terminal tab per worktree 292 | If you’re using iTerm2 on Mac, set up notifications for when Claude needs attention 293 | Use separate IDE windows for different worktrees 294 | Clean up when finished: git worktree remove ../project-feature-a 295 | d. Use headless mode with a custom harness 296 | claude -p (headless mode) integrates Claude Code programmatically into larger workflows while leveraging its built-in tools and system prompt. There are two primary patterns for using headless mode: 297 | 298 | 1. Fanning out handles large migrations or analyses (e.g., analyzing sentiment in hundreds of logs or analyzing thousands of CSVs): 299 | 300 | Have Claude write a script to generate a task list. For example, generate a list of 2k files that need to be migrated from framework A to framework B. 301 | Loop through tasks, calling Claude programmatically for each and giving it a task and a set of tools it can use. For example: claude -p “migrate foo.py from React to Vue. When you are done, you MUST return the string OK if you succeeded, or FAIL if the task failed.” --allowedTools Edit Bash(git commit:*) 302 | Run the script several times and refine your prompt to get the desired outcome. 303 | 2. Pipelining integrates Claude into existing data/processing pipelines: 304 | 305 | Call claude -p “” --json | your_command, where your_command is the next step of your processing pipeline 306 | That’s it! JSON output (optional) can help provide structure for easier automated processing. 307 | For both of these use cases, it can be helpful to use the --verbose flag for debugging the Claude invocation. We generally recommend turning verbose mode off in production for cleaner output. 308 | 309 | What are your tips and best practices for working with Claude Code? Tag @AnthropicAI so we can see what you're building! 310 | 311 | Acknowledgements 312 | Written by Boris Cherny. This work draws upon best practices from across the broader Claude Code user community, whose creative approaches and workflows continue to inspire us. Special thanks also to Daisy Hollman, Ashwin Bhat, Cat Wu, Sid Bidasaria, Cal Rueb, Nodir Turakulov, Barry Zhang, Drew Hodun and many other Anthropic engineers whose valuable insights and practical experience with Claude Code helped shape these recommendations. 313 | 314 | Product 315 | Claude overview 316 | Claude Code 317 | Claude team plan 318 | Claude enterprise plan 319 | Claude education plan 320 | Download Claude apps 321 | Claude.ai pricing plans 322 | Claude.ai login 323 | API Platform 324 | API overview 325 | Developer docs 326 | Claude in Amazon Bedrock 327 | Claude on Google Cloud's Vertex AI 328 | Pricing 329 | Console login 330 | Research 331 | Research overview 332 | Economic Index 333 | Claude models 334 | Claude 3.7 Sonnet 335 | Claude 3.5 Haiku 336 | Claude 3 Opus 337 | Commitments 338 | Transparency 339 | Responsible scaling policy 340 | Security and compliance 341 | Solutions 342 | AI agents 343 | Coding 344 | Customer support 345 | Learn 346 | Anthropic Academy 347 | Customer stories 348 | Engineering at Anthropic 349 | Explore 350 | About us 351 | Become a partner 352 | Careers 353 | News 354 | Help and security 355 | Status 356 | Availability 357 | Support center 358 | Terms and policies 359 | Privacy choices 360 | Privacy policy 361 | Responsible disclosure policy 362 | Terms of service - consumer 363 | Terms of service - commercial 364 | Usage policy 365 | © 2025 Anthropic PBC 366 | -------------------------------------------------------------------------------- /ai_docs/claude_code_tech.md: -------------------------------------------------------------------------------- 1 | # Claude Code: Advanced Techniques for AI/Agentic Coding 2 | 3 | ## Quick Techniques Guide 4 | 5 | 1. **Context Engineering**: Instead of just prompt engineering, focus on the entire context for the AI model: 6 | - Create and refine CLAUDE.md files to provide consistent guidelines 7 | - Use thinking commands (`think`, `think hard`, `think harder`, `ultrathink`) to trigger deeper analysis 8 | - Mention specific files and use tab-completion for accurate file references 9 | - Use images and URLs alongside your prompts for richer context 10 | 11 | 2. **Workflow Patterns**: 12 | - **Explore → Plan → Code → Commit**: Make Claude read and understand before implementing 13 | - **Tests → Commit → Code → Iterate → Commit**: Test-driven development with AI 14 | - **Code → Screenshot → Iterate**: Visual feedback loops for UI development 15 | - **Safe YOLO Mode**: For trusted operations in safe environments 16 | 17 | 3. **Multi-Claude Approaches**: 18 | - Writer/Reviewer Pattern: One Claude writes, another reviews 19 | - Parallel Processing: Multiple Claude instances working on different parts of a project 20 | - Git Worktrees: Different instances on different branches 21 | 22 | 4. **Headless Automation**: 23 | - Issue triage 24 | - Custom linting 25 | - Large-scale migrations via fan-out pattern 26 | - Data pipeline integration 27 | 28 | 5. **Tool Extension**: 29 | - Customize allowlists for operations like editing and git commands 30 | - Install and document custom CLI tools 31 | - Connect MCP servers for specialized capabilities 32 | - Create custom slash commands for repeated workflows 33 | 34 | 6. **Optimization Techniques**: 35 | - Be specific in instructions 36 | - Course-correct early with interrupts (Escape key) 37 | - Use `/clear` to keep context focused 38 | - Create checklists for complex multi-stage tasks 39 | 40 | --- 41 | 42 | ## 1. Context Engineering Approach 43 | 44 | ### Creating Effective CLAUDE.md Files 45 | 46 | CLAUDE.md files are automatically included in your context and can dramatically improve Claude's effectiveness. These files can be placed in: 47 | 48 | - Repository root (most common) 49 | - Parent directories (useful for monorepos) 50 | - Child directories (loaded on demand) 51 | - Home folder (~/.claude/CLAUDE.md) for session-wide settings 52 | 53 | **Example CLAUDE.md Content:** 54 | ```markdown 55 | # Bash commands 56 | - npm run build: Build the project 57 | - npm run typecheck: Run the typechecker 58 | 59 | # Code style 60 | - Use ES modules (import/export) syntax, not CommonJS (require) 61 | - Destructure imports when possible (eg. import { foo } from 'bar') 62 | 63 | # Workflow 64 | - Be sure to typecheck when you're done making a series of code changes 65 | - Prefer running single tests, not the whole test suite, for performance 66 | ``` 67 | 68 | **Tips for Effective CLAUDE.md Files:** 69 | - Keep them concise and human-readable 70 | - Iterate on effectiveness, like any prompt 71 | - Use emphasis words like "IMPORTANT" or "YOU MUST" for critical instructions 72 | - Add content while working using the `#` key 73 | - Include CLAUDE.md in commits to benefit your team 74 | 75 | ### Customizing Tool Allowlists 76 | 77 | By default, Claude Code requests permission for system-modifying actions. You can customize what's allowed: 78 | 79 | - Select "Always allow" during a session 80 | - Use `/allowed-tools` command 81 | - Edit your `.claude/settings.json` or `~/.claude.json` 82 | - Use the `--allowedTools` CLI flag 83 | 84 | ### Using Extended Thinking 85 | 86 | Claude Code has special thinking mode triggers that allocate progressively more computation time: 87 | - `think` < `think hard` < `think harder` < `ultrathink` 88 | 89 | Explicitly mentioning these in your prompts gives Claude more time to consider alternatives and develop plans. 90 | 91 | ## 2. Workflow Patterns 92 | 93 | ### Explore, Plan, Code, Commit 94 | 95 | This versatile workflow works for many problems: 96 | 97 | 1. Ask Claude to read relevant files, images, or URLs without writing code yet 98 | 2. Request a plan, using thinking mode triggers as needed 99 | 3. Have Claude implement the solution in code 100 | 4. Ask Claude to commit and create a pull request 101 | 102 | **Key insight**: Steps 1-2 are crucial for complex tasks, as they prevent Claude from jumping straight to coding without proper understanding. 103 | 104 | ### Test-Driven Development (TDD) 105 | 106 | 1. Ask Claude to write tests based on expected input/output pairs 107 | 2. Have Claude run the tests to confirm they fail 108 | 3. Ask Claude to commit the tests 109 | 4. Request code implementation that passes the tests 110 | 5. Commit the passing implementation 111 | 112 | This approach is particularly effective because Claude performs best when it has a clear target to iterate against. 113 | 114 | ### Visual Development Loop 115 | 116 | 1. Give Claude a way to take screenshots (MCP servers, manual screenshots) 117 | 2. Provide a visual mock or design reference 118 | 3. Have Claude implement the design, take screenshots, and iterate 119 | 4. Commit when satisfied 120 | 121 | **Pro tip**: Claude's outputs typically improve significantly with 2-3 iterations of visual feedback. 122 | 123 | ### Safe YOLO Mode 124 | 125 | For trusted operations in controlled environments: 126 | - Use `claude --dangerously-skip-permissions` to bypass permission checks 127 | - Best used for routine tasks like fixing lint errors or generating boilerplate 128 | - For safety, run in a container without internet access 129 | 130 | ## 3. Multi-Claude Workflows 131 | 132 | ### Writer/Reviewer Pattern 133 | 134 | Run multiple Claude instances with different roles: 135 | 136 | 1. Have one Claude write code 137 | 2. Run `/clear` or start a second Claude in another terminal 138 | 3. Have the second Claude review the first Claude's work 139 | 4. Start another Claude to integrate feedback and improve the code 140 | 141 | This pattern mimics human code review and often produces better results than a single Claude trying to do everything. 142 | 143 | ### Parallel Processing 144 | 145 | To work on multiple independent tasks simultaneously: 146 | 147 | 1. Create 3-4 git checkouts in separate folders 148 | 2. Open each folder in separate terminal tabs 149 | 3. Start Claude in each folder with different tasks 150 | 4. Cycle through to check progress and approve/deny permission requests 151 | 152 | ### Git Worktrees 153 | 154 | A lighter-weight alternative to multiple checkouts: 155 | 156 | 1. Create worktrees: `git worktree add ../project-feature-a feature-a` 157 | 2. Launch Claude in each worktree: `cd ../project-feature-a && claude` 158 | 3. Create additional worktrees as needed 159 | 4. Clean up when finished: `git worktree remove ../project-feature-a` 160 | 161 | ## 4. Headless Automation 162 | 163 | Claude Code's headless mode (`claude -p`) enables programmatic integration: 164 | 165 | ### Fan-out Pattern for Large-Scale Tasks 166 | 167 | 1. Have Claude write a script to generate a task list 168 | 2. Loop through tasks, calling Claude programmatically for each 169 | 3. Process results and collect metrics 170 | 171 | Example command: 172 | ```bash 173 | claude -p "migrate foo.py from React to Vue. When done, return OK or FAIL" --allowedTools Edit Bash 174 | ``` 175 | 176 | ### Pipeline Integration 177 | 178 | Integrate Claude into data processing pipelines: 179 | ```bash 180 | claude -p "" --json | your_command 181 | ``` 182 | 183 | ### Automated Issue Management 184 | 185 | Use Claude to triage GitHub issues, assign labels, and suggest fixes automatically when issues are created. 186 | 187 | ### Custom Linting 188 | 189 | Claude can provide subjective code reviews beyond traditional linters, identifying: 190 | - Typos 191 | - Stale comments 192 | - Misleading function or variable names 193 | - Inconsistent code styles 194 | 195 | ## 5. Tool Extension Strategies 196 | 197 | ### Using Custom Bash Tools 198 | 199 | Claude inherits your bash environment and can use your custom tools: 200 | - Tell Claude the tool name with usage examples 201 | - Have Claude run `--help` to see documentation 202 | - Document frequently used tools in CLAUDE.md 203 | 204 | ### MCP Integration 205 | 206 | Claude Code functions as both an MCP server and client: 207 | - Add MCP servers to project config 208 | - Configure in global config 209 | - Include in a checked-in `.mcp.json` file 210 | 211 | For debugging, launch Claude with the `--mcp-debug` flag. 212 | 213 | ### Custom Slash Commands 214 | 215 | For repeated workflows, store prompt templates in the `.claude/commands` folder: 216 | - These become available through the slash commands menu 217 | - Can include the `$ARGUMENTS` keyword for parameterization 218 | - Can be checked into git for team sharing 219 | 220 | **Example Slash Command Template:** 221 | ```markdown 222 | Please analyze and fix the GitHub issue: $ARGUMENTS. 223 | 224 | Follow these steps: 225 | 1. Use `gh issue view` to get the issue details 226 | 2. Understand the problem described in the issue 227 | 3. Search the codebase for relevant files 228 | 4. Implement the necessary changes to fix the issue 229 | 5. Write and run tests to verify the fix 230 | 6. Ensure code passes linting and type checking 231 | 7. Create a descriptive commit message 232 | 8. Push and create a PR 233 | ``` 234 | 235 | ## 6. Optimization Strategies 236 | 237 | ### Being Specific in Instructions 238 | 239 | | Poor | Good | 240 | | ------------------------------------------------ | -------------------------------------------------------------------------------------------------- | 241 | | add tests for foo.py | write a new test case for foo.py, covering the edge case where the user is logged out. avoid mocks | 242 | | why does ExecutionFactory have such a weird api? | look through ExecutionFactory's git history and summarize how its api came to be | 243 | 244 | ### Course Correction Tools 245 | 246 | - Ask Claude to make a plan before coding 247 | - Press Escape to interrupt during any phase 248 | - Double-tap Escape to jump back in history 249 | - Ask Claude to undo changes 250 | 251 | ### Using /clear for Context Management 252 | 253 | During long sessions, use the `/clear` command frequently between tasks to reset the context window and maintain focus. 254 | 255 | ### Checklists for Complex Workflows 256 | 257 | For large tasks: 258 | 1. Tell Claude to create a Markdown checklist of subtasks 259 | 2. Instruct Claude to address each issue one by one 260 | 3. Have Claude check off items as they're completed 261 | 262 | ### Working with Visual Data 263 | 264 | Claude excels with images and diagrams through: 265 | - Paste screenshots (macOS: `cmd+ctrl+shift+4` then `ctrl+v`) 266 | - Drag and drop images directly into the prompt 267 | - Provide file paths for images 268 | 269 | ## 7. Specialized Workflows 270 | 271 | ### Git and GitHub Operations 272 | 273 | Claude can effectively handle: 274 | - Searching git history 275 | - Writing commit messages 276 | - Handling complex git operations 277 | - Creating pull requests 278 | - Implementing code review fixes 279 | - Fixing failing builds 280 | - Categorizing and triaging issues 281 | 282 | ### Working with Jupyter Notebooks 283 | 284 | - Have Claude Code and a .ipynb file open side-by-side 285 | - Claude can interpret outputs, including images 286 | - Ask Claude to clean up or make aesthetic improvements 287 | - Tell Claude to make notebooks "aesthetically pleasing" 288 | 289 | ### Codebase Q&A 290 | 291 | Claude excels at answering questions about codebases: 292 | - How does [feature] work? 293 | - How do I make a new [component]? 294 | - What does this code do? 295 | - What edge cases are handled? 296 | - Why is the code structured this way? 297 | 298 | This approach significantly improves onboarding time and reduces load on other engineers. 299 | 300 | --- 301 | 302 | ## Typescript example 303 | 304 | ``` 305 | async function runClaude(prompt: string, dir: string, allowedTools: string, outputFormat?: string): Promise { 306 | console.log(`🔹 Running Claude in ${dir}...`); 307 | 308 | const outputFormatFlag = outputFormat ? `--output-format ${outputFormat}` : ''; 309 | const command = `cd "${dir}" && claude -p "${prompt}" --allowedTools "${allowedTools}" ${outputFormatFlag}`; 310 | 311 | try { 312 | const { stdout, stderr } = await execAsync(command); 313 | const logFile = join(dir, 'claude_output.log'); 314 | writeFileSync(logFile, stdout); 315 | return stdout; 316 | } catch (error) { 317 | console.error(`Error running Claude: ${error}`); 318 | return ''; 319 | } 320 | } 321 | ``` -------------------------------------------------------------------------------- /ai_docs/fc_openai_agents.md: -------------------------------------------------------------------------------- 1 | # OpenAI Agents SDK Documentation 2 | 3 | This file contains documentation for the OpenAI Agents SDK, scraped from the official documentation site. 4 | 5 | ## Overview 6 | 7 | The [OpenAI Agents SDK](https://github.com/openai/openai-agents-python) enables you to build agentic AI apps in a lightweight, easy-to-use package with very few abstractions. It's a production-ready upgrade of the previous experimentation for agents, [Swarm](https://github.com/openai/swarm/tree/main). The Agents SDK has a very small set of primitives: 8 | 9 | - **Agents**, which are LLMs equipped with instructions and tools 10 | - **Handoffs**, which allow agents to delegate to other agents for specific tasks 11 | - **Guardrails**, which enable the inputs to agents to be validated 12 | 13 | In combination with Python, these primitives are powerful enough to express complex relationships between tools and agents, and allow you to build real-world applications without a steep learning curve. In addition, the SDK comes with built-in **tracing** that lets you visualize and debug your agentic flows, as well as evaluate them and even fine-tune models for your application. 14 | 15 | ### Why use the Agents SDK 16 | 17 | The SDK has two driving design principles: 18 | 19 | 1. Enough features to be worth using, but few enough primitives to make it quick to learn. 20 | 2. Works great out of the box, but you can customize exactly what happens. 21 | 22 | Here are the main features of the SDK: 23 | 24 | - Agent loop: Built-in agent loop that handles calling tools, sending results to the LLM, and looping until the LLM is done. 25 | - Python-first: Use built-in language features to orchestrate and chain agents, rather than needing to learn new abstractions. 26 | - Handoffs: A powerful feature to coordinate and delegate between multiple agents. 27 | - Guardrails: Run input validations and checks in parallel to your agents, breaking early if the checks fail. 28 | - Function tools: Turn any Python function into a tool, with automatic schema generation and Pydantic-powered validation. 29 | - Tracing: Built-in tracing that lets you visualize, debug and monitor your workflows, as well as use the OpenAI suite of evaluation, fine-tuning and distillation tools. 30 | 31 | ### Installation 32 | 33 | ```bash 34 | pip install openai-agents 35 | ``` 36 | 37 | ### Hello world example 38 | 39 | ```python 40 | from agents import Agent, Runner 41 | 42 | agent = Agent(name="Assistant", instructions="You are a helpful assistant") 43 | 44 | result = Runner.run_sync(agent, "Write a haiku about recursion in programming.") 45 | print(result.final_output) 46 | 47 | # Code within the code, 48 | # Functions calling themselves, 49 | # Infinite loop's dance. 50 | ``` 51 | 52 | ## Quickstart 53 | 54 | ### Create a project and virtual environment 55 | 56 | ```bash 57 | mkdir my_project 58 | cd my_project 59 | python -m venv .venv 60 | source .venv/bin/activate 61 | pip install openai-agents 62 | export OPENAI_API_KEY=sk-... 63 | ``` 64 | 65 | ### Create your first agent 66 | 67 | ```python 68 | from agents import Agent 69 | 70 | agent = Agent( 71 | name="Math Tutor", 72 | instructions="You provide help with math problems. Explain your reasoning at each step and include examples", 73 | ) 74 | ``` 75 | 76 | ### Add a few more agents 77 | 78 | ```python 79 | from agents import Agent 80 | 81 | history_tutor_agent = Agent( 82 | name="History Tutor", 83 | handoff_description="Specialist agent for historical questions", 84 | instructions="You provide assistance with historical queries. Explain important events and context clearly.", 85 | ) 86 | 87 | math_tutor_agent = Agent( 88 | name="Math Tutor", 89 | handoff_description="Specialist agent for math questions", 90 | instructions="You provide help with math problems. Explain your reasoning at each step and include examples", 91 | ) 92 | ``` 93 | 94 | ### Define your handoffs 95 | 96 | ```python 97 | triage_agent = Agent( 98 | name="Triage Agent", 99 | instructions="You determine which agent to use based on the user's homework question", 100 | handoffs=[history_tutor_agent, math_tutor_agent] 101 | ) 102 | ``` 103 | 104 | ### Run the agent orchestration 105 | 106 | ```python 107 | from agents import Runner 108 | 109 | async def main(): 110 | result = await Runner.run(triage_agent, "What is the capital of France?") 111 | print(result.final_output) 112 | ``` 113 | 114 | ### Add a guardrail 115 | 116 | ```python 117 | from agents import GuardrailFunctionOutput, Agent, Runner 118 | from pydantic import BaseModel 119 | 120 | class HomeworkOutput(BaseModel): 121 | is_homework: bool 122 | reasoning: str 123 | 124 | guardrail_agent = Agent( 125 | name="Guardrail check", 126 | instructions="Check if the user is asking about homework.", 127 | output_type=HomeworkOutput, 128 | ) 129 | 130 | async def homework_guardrail(ctx, agent, input_data): 131 | result = await Runner.run(guardrail_agent, input_data, context=ctx.context) 132 | final_output = result.final_output_as(HomeworkOutput) 133 | return GuardrailFunctionOutput( 134 | output_info=final_output, 135 | tripwire_triggered=not final_output.is_homework, 136 | ) 137 | ``` 138 | 139 | ### Put it all together 140 | 141 | ```python 142 | from agents import Agent, InputGuardrail,GuardrailFunctionOutput, Runner 143 | from pydantic import BaseModel 144 | import asyncio 145 | 146 | class HomeworkOutput(BaseModel): 147 | is_homework: bool 148 | reasoning: str 149 | 150 | guardrail_agent = Agent( 151 | name="Guardrail check", 152 | instructions="Check if the user is asking about homework.", 153 | output_type=HomeworkOutput, 154 | ) 155 | 156 | math_tutor_agent = Agent( 157 | name="Math Tutor", 158 | handoff_description="Specialist agent for math questions", 159 | instructions="You provide help with math problems. Explain your reasoning at each step and include examples", 160 | ) 161 | 162 | history_tutor_agent = Agent( 163 | name="History Tutor", 164 | handoff_description="Specialist agent for historical questions", 165 | instructions="You provide assistance with historical queries. Explain important events and context clearly.", 166 | ) 167 | 168 | async def homework_guardrail(ctx, agent, input_data): 169 | result = await Runner.run(guardrail_agent, input_data, context=ctx.context) 170 | final_output = result.final_output_as(HomeworkOutput) 171 | return GuardrailFunctionOutput( 172 | output_info=final_output, 173 | tripwire_triggered=not final_output.is_homework, 174 | ) 175 | 176 | triage_agent = Agent( 177 | name="Triage Agent", 178 | instructions="You determine which agent to use based on the user's homework question", 179 | handoffs=[history_tutor_agent, math_tutor_agent], 180 | input_guardrails=[ 181 | InputGuardrail(guardrail_function=homework_guardrail), 182 | ], 183 | ) 184 | 185 | async def main(): 186 | result = await Runner.run(triage_agent, "who was the first president of the united states?") 187 | print(result.final_output) 188 | 189 | result = await Runner.run(triage_agent, "what is life") 190 | print(result.final_output) 191 | 192 | if __name__ == "__main__": 193 | asyncio.run(main()) 194 | ``` 195 | 196 | ## Agents 197 | 198 | Agents are the core building block in your apps. An agent is a large language model (LLM), configured with instructions and tools. 199 | 200 | ### Basic configuration 201 | 202 | The most common properties of an agent you'll configure are: 203 | 204 | - `instructions`: also known as a developer message or system prompt. 205 | - `model`: which LLM to use, and optional `model_settings` to configure model tuning parameters like temperature, top_p, etc. 206 | - `tools`: Tools that the agent can use to achieve its tasks. 207 | 208 | ```python 209 | from agents import Agent, ModelSettings, function_tool 210 | 211 | @function_tool 212 | def get_weather(city: str) -> str: 213 | return f"The weather in {city} is sunny" 214 | 215 | agent = Agent( 216 | name="Haiku agent", 217 | instructions="Always respond in haiku form", 218 | model="o3-mini", 219 | tools=[get_weather], 220 | ) 221 | ``` 222 | 223 | ### Context 224 | 225 | Agents are generic on their `context` type. Context is a dependency-injection tool: it's an object you create and pass to `Runner.run()`, that is passed to every agent, tool, handoff etc, and it serves as a grab bag of dependencies and state for the agent run. You can provide any Python object as the context. 226 | 227 | ### Output types 228 | 229 | By default, agents produce plain text (i.e. `str`) outputs. If you want the agent to produce a particular type of output, you can use the `output_type` parameter. 230 | 231 | ```python 232 | from pydantic import BaseModel 233 | 234 | class HomeworkOutput(BaseModel): 235 | is_homework: bool 236 | reasoning: str 237 | 238 | agent = Agent( 239 | name="Homework assistant", 240 | instructions="Check if the user is asking about homework.", 241 | output_type=HomeworkOutput, 242 | ) 243 | ``` 244 | 245 | 246 | 247 | ### Handoffs 248 | 249 | Handoffs are sub-agents that the agent can delegate to. You provide a list of handoffs, and the agent can choose to delegate to them if relevant. 250 | 251 | ### Dynamic instructions 252 | 253 | In most cases, you can provide instructions when you create the agent. However, you can also provide dynamic instructions via a function. 254 | 255 | ### Lifecycle events (hooks) 256 | 257 | Sometimes, you want to observe the lifecycle of an agent. For example, you may want to log events, or pre-fetch data when certain events occur. 258 | 259 | ### Guardrails 260 | 261 | Guardrails allow you to run checks/validations on user input, in parallel to the agent running. 262 | 263 | ### Cloning/copying agents 264 | 265 | By using the `clone()` method on an agent, you can duplicate an Agent, and optionally change any properties you like. 266 | 267 | ## Handoffs 268 | 269 | Handoffs allow an agent to delegate tasks to another agent. This is particularly useful in scenarios where different agents specialize in distinct areas. 270 | 271 | ### Creating a handoff 272 | 273 | All agents have a `handoffs` param, which can either take an `Agent` directly, or a `Handoff` object that customizes the Handoff. 274 | 275 | ### Basic Usage 276 | 277 | ```python 278 | from agents import Agent, handoff 279 | 280 | billing_agent = Agent(name="Billing agent") 281 | refund_agent = Agent(name="Refund agent") 282 | 283 | triage_agent = Agent(name="Triage agent", handoffs=[billing_agent, handoff(refund_agent)]) 284 | ``` 285 | 286 | ### Customizing handoffs 287 | 288 | The `handoff()` function lets you customize various aspects like tool name, description, callbacks, and input filtering. 289 | 290 | ### Handoff inputs 291 | 292 | You can have the LLM provide data when calling a handoff, which is useful for logging or other purposes. 293 | 294 | ### Input filters 295 | 296 | When a handoff occurs, the new agent sees the entire previous conversation history by default. Input filters allow you to modify this behavior. 297 | 298 | ### Recommended prompts 299 | 300 | To ensure LLMs understand handoffs properly, include information about handoffs in your agent instructions. 301 | 302 | ## Tools 303 | 304 | Tools let agents take actions: things like fetching data, running code, calling external APIs, and even using a computer. There are three classes of tools in the Agent SDK: 305 | 306 | - Hosted tools: run on LLM servers alongside the AI models 307 | - Function calling: allow you to use any Python function as a tool 308 | - Agents as tools: allow you to use an agent as a tool 309 | 310 | ### Hosted tools 311 | 312 | OpenAI offers built-in tools like `WebSearchTool`, `FileSearchTool`, and `ComputerTool`. 313 | 314 | ### Function tools 315 | 316 | You can use any Python function as a tool. The Agents SDK will automatically set up the tool with appropriate name, description and schema. 317 | 318 | ```python 319 | import json 320 | from typing_extensions import TypedDict 321 | 322 | from agents import Agent, FunctionTool, RunContextWrapper, function_tool 323 | 324 | class Location(TypedDict): 325 | lat: float 326 | long: float 327 | 328 | @function_tool 329 | async def fetch_weather(location: Location) -> str: 330 | """Fetch the weather for a given location. 331 | 332 | Args: 333 | location: The location to fetch the weather for. 334 | """ 335 | # In real life, we'd fetch the weather from a weather API 336 | return "sunny" 337 | 338 | @function_tool(name_override="fetch_data") 339 | def read_file(ctx: RunContextWrapper[Any], path: str, directory: str | None = None) -> str: 340 | """Read the contents of a file.""" 341 | # In real life, we'd read the file from the file system 342 | return "" 343 | ``` 344 | 345 | ### Agents as tools 346 | 347 | In some workflows, you may want a central agent to orchestrate a network of specialized agents, instead of handing off control. 348 | 349 | ### Handling errors in function tools 350 | 351 | You can customize error handling for function tools using the `failure_error_function` parameter. 352 | 353 | ## Results 354 | 355 | When you call the `Runner.run` methods, you get either a `RunResult` or `RunResultStreaming` object containing information about the agent run. 356 | 357 | ### Final output 358 | 359 | The `final_output` property contains the final output of the last agent that ran. 360 | 361 | ### Inputs for the next turn 362 | 363 | You can use `result.to_input_list()` to turn the result into an input list that concatenates the original input you provided with items generated during the agent run. 364 | 365 | ### Last agent 366 | 367 | The `last_agent` property contains the last agent that ran, which can be useful for subsequent user interactions. 368 | 369 | ### New items 370 | 371 | The `new_items` property contains the new items generated during the run, including messages, tool calls, handoffs, etc. 372 | 373 | ## Running agents 374 | 375 | You can run agents via the `Runner` class with three options: 376 | 377 | 1. `Runner.run()` - async method returning a `RunResult` 378 | 2. `Runner.run_sync()` - sync wrapper around `run()` 379 | 3. `Runner.run_streamed()` - async method that streams LLM events as they occur 380 | 381 | ### The agent loop 382 | 383 | When you use the run method, the runner executes a loop: 384 | 385 | 1. Call the LLM for the current agent with the current input 386 | 2. Process the LLM output: 387 | - If it's a final output, end the loop and return the result 388 | - If it's a handoff, update the current agent and input, and re-run the loop 389 | - If it's tool calls, run the tools, append results, and re-run the loop 390 | 3. If max_turns is exceeded, raise an exception 391 | 392 | ### Run config 393 | 394 | The `run_config` parameter lets you configure various global settings for the agent run. 395 | 396 | ### Conversations/chat threads 397 | 398 | Each run represents a single logical turn in a chat conversation. You can use `RunResultBase.to_input_list()` to get inputs for the next turn. 399 | 400 | ## Tracing 401 | 402 | The Agents SDK includes built-in tracing, collecting a comprehensive record of events during an agent run: LLM generations, tool calls, handoffs, guardrails, and custom events. 403 | 404 | ### Traces and spans 405 | 406 | - **Traces** represent a single end-to-end operation of a "workflow" 407 | - **Spans** represent operations that have a start and end time 408 | 409 | ### Default tracing 410 | 411 | By default, the SDK traces the entire run, each agent execution, LLM generations, function tool calls, guardrails, and handoffs. 412 | 413 | ### Higher level traces 414 | 415 | Sometimes, you might want multiple calls to `run()` to be part of a single trace: 416 | 417 | ```python 418 | from agents import Agent, Runner, trace 419 | 420 | async def main(): 421 | agent = Agent(name="Joke generator", instructions="Tell funny jokes.") 422 | 423 | with trace("Joke workflow"): 424 | first_result = await Runner.run(agent, "Tell me a joke") 425 | second_result = await Runner.run(agent, f"Rate this joke: {first_result.final_output}") 426 | print(f"Joke: {first_result.final_output}") 427 | print(f"Rating: {second_result.final_output}") 428 | ``` 429 | 430 | ### Custom trace processors 431 | 432 | You can customize tracing to send traces to alternative or additional backends: 433 | 434 | 1. `add_trace_processor()` adds an additional processor alongside the default one 435 | 2. `set_trace_processors()` replaces the default processor entirely 436 | 437 | ## Context Management 438 | 439 | Context is an overloaded term with two main aspects: 440 | 441 | 1. **Local context**: Data and dependencies available to your code during tool function execution, callbacks, lifecycle hooks, etc. 442 | 2. **LLM context**: Data the LLM sees when generating a response 443 | 444 | ### Local context 445 | 446 | This is represented via the `RunContextWrapper` class and allows you to pass any Python object to be available throughout the agent run: 447 | 448 | ```python 449 | import asyncio 450 | from dataclasses import dataclass 451 | 452 | from agents import Agent, RunContextWrapper, Runner, function_tool 453 | 454 | @dataclass 455 | class UserInfo: 456 | name: str 457 | uid: int 458 | 459 | @function_tool 460 | async def fetch_user_age(wrapper: RunContextWrapper[UserInfo]) -> str: 461 | return f"User {wrapper.context.name} is 47 years old" 462 | 463 | async def main(): 464 | user_info = UserInfo(name="John", uid=123) 465 | 466 | agent = Agent[UserInfo]( 467 | name="Assistant", 468 | tools=[fetch_user_age], 469 | ) 470 | 471 | result = await Runner.run( 472 | starting_agent=agent, 473 | input="What is the age of the user?", 474 | context=user_info, 475 | ) 476 | 477 | print(result.final_output) 478 | # The user John is 47 years old. 479 | ``` 480 | 481 | ### Agent/LLM context 482 | 483 | When an LLM is called, it can only see data from the conversation history. There are several ways to make data available: 484 | 485 | 1. Add it to the Agent `instructions` (system prompt) 486 | 2. Add it to the `input` when calling `Runner.run` 487 | 3. Expose it via function tools for on-demand access 488 | 4. Use retrieval or web search tools to fetch relevant contextual data 489 | 490 | ## Model Context Protocol (MCP) 491 | 492 | The [Model Context Protocol](https://modelcontextprotocol.io/introduction) (aka MCP) is a way to provide tools and context to the LLM. MCP provides a standardized way to connect AI models to different data sources and tools. 493 | 494 | ### MCP Servers 495 | 496 | The Agents SDK supports two types of MCP servers: 497 | 498 | 1. **stdio servers** run as a subprocess of your application (locally) 499 | 2. **HTTP over SSE servers** run remotely (connect via URL) 500 | 501 | You can use `MCPServerStdio` and `MCPServerSse` classes to connect to these servers: 502 | 503 | ```python 504 | from agents.mcp.server import MCPServerStdio, MCPServerSse 505 | 506 | # Example using the filesystem MCP server 507 | async with MCPServerStdio( 508 | params={ 509 | "command": "npx", 510 | "args": ["-y", "@modelcontextprotocol/server-filesystem", samples_dir], 511 | } 512 | ) as server: 513 | tools = await server.list_tools() 514 | ``` 515 | 516 | ### Using MCP Servers with Agents 517 | 518 | MCP servers can be added directly to Agents: 519 | 520 | ```python 521 | agent = Agent( 522 | name="Assistant", 523 | instructions="Use the tools to achieve the task", 524 | mcp_servers=[mcp_server_1, mcp_server_2] 525 | ) 526 | ``` 527 | 528 | When the Agent runs, it will automatically call `list_tools()` on all MCP servers, making the LLM aware of all available tools. When the LLM calls a tool from an MCP server, the SDK handles calling `call_tool()` on that server. 529 | 530 | ### Caching Tool Lists 531 | 532 | For better performance, especially with remote servers, you can cache the list of tools: 533 | 534 | ```python 535 | mcp_server = MCPServerSse( 536 | url="https://example.com/mcp", 537 | cache_tools_list=True # Enable caching 538 | ) 539 | 540 | # Later, if needed, clear the cache 541 | mcp_server.invalidate_tools_cache() 542 | ``` 543 | 544 | Only use caching when you're certain the tool list will not change during execution. 545 | 546 | ### Tracing MCP Operations 547 | 548 | The Agents SDK's tracing system automatically captures MCP operations, including: 549 | 550 | 1. Calls to MCP servers to list tools 551 | 2. MCP-related information on function calls 552 | 553 | This makes it easier to debug and analyze your agent's interactions with MCP tools. 554 | 555 | ### Use a different LLM 556 | 557 | ```python 558 | import asyncio 559 | import os 560 | 561 | from openai import AsyncOpenAI 562 | 563 | from agents import Agent, OpenAIChatCompletionsModel, Runner, function_tool, set_tracing_disabled 564 | 565 | BASE_URL = os.getenv("EXAMPLE_BASE_URL") or "" 566 | API_KEY = os.getenv("EXAMPLE_API_KEY") or "" 567 | MODEL_NAME = os.getenv("EXAMPLE_MODEL_NAME") or "" 568 | 569 | if not BASE_URL or not API_KEY or not MODEL_NAME: 570 | raise ValueError( 571 | "Please set EXAMPLE_BASE_URL, EXAMPLE_API_KEY, EXAMPLE_MODEL_NAME via env var or code." 572 | ) 573 | 574 | """This example uses a custom provider for a specific agent. Steps: 575 | 1. Create a custom OpenAI client. 576 | 2. Create a `Model` that uses the custom client. 577 | 3. Set the `model` on the Agent. 578 | 579 | Note that in this example, we disable tracing under the assumption that you don't have an API key 580 | from platform.openai.com. If you do have one, you can either set the `OPENAI_API_KEY` env var 581 | or call set_tracing_export_api_key() to set a tracing specific key. 582 | """ 583 | client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY) 584 | set_tracing_disabled(disabled=True) 585 | 586 | # An alternate approach that would also work: 587 | # PROVIDER = OpenAIProvider(openai_client=client) 588 | # agent = Agent(..., model="some-custom-model") 589 | # Runner.run(agent, ..., run_config=RunConfig(model_provider=PROVIDER)) 590 | 591 | 592 | @function_tool 593 | def get_weather(city: str): 594 | print(f"[debug] getting weather for {city}") 595 | return f"The weather in {city} is sunny." 596 | 597 | 598 | async def main(): 599 | # This agent will use the custom LLM provider 600 | agent = Agent( 601 | name="Assistant", 602 | instructions="You only respond in haikus.", 603 | model=OpenAIChatCompletionsModel(model=MODEL_NAME, openai_client=client), 604 | tools=[get_weather], 605 | ) 606 | 607 | result = await Runner.run(agent, "What's the weather in Tokyo?") 608 | print(result.final_output) 609 | 610 | 611 | if __name__ == "__main__": 612 | asyncio.run(main()) 613 | ``` 614 | 615 | ## Model Settings 616 | 617 | ```md 618 | Model settings 619 | ModelSettings dataclass 620 | Settings to use when calling an LLM. 621 | 622 | This class holds optional model configuration parameters (e.g. temperature, top_p, penalties, truncation, etc.). 623 | 624 | Not all models/providers support all of these parameters, so please check the API documentation for the specific model and provider you are using. 625 | 626 | Source code in src/agents/model_settings.py 627 | 628 | @dataclass 629 | class ModelSettings: 630 | """Settings to use when calling an LLM. 631 | 632 | This class holds optional model configuration parameters (e.g. temperature, 633 | top_p, penalties, truncation, etc.). 634 | 635 | Not all models/providers support all of these parameters, so please check the API documentation 636 | for the specific model and provider you are using. 637 | """ 638 | 639 | temperature: float | None = None 640 | """The temperature to use when calling the model.""" 641 | 642 | top_p: float | None = None 643 | """The top_p to use when calling the model.""" 644 | 645 | frequency_penalty: float | None = None 646 | """The frequency penalty to use when calling the model.""" 647 | 648 | presence_penalty: float | None = None 649 | """The presence penalty to use when calling the model.""" 650 | 651 | tool_choice: Literal["auto", "required", "none"] | str | None = None 652 | """The tool choice to use when calling the model.""" 653 | 654 | parallel_tool_calls: bool | None = None 655 | """Whether to use parallel tool calls when calling the model. 656 | Defaults to False if not provided.""" 657 | 658 | truncation: Literal["auto", "disabled"] | None = None 659 | """The truncation strategy to use when calling the model.""" 660 | 661 | max_tokens: int | None = None 662 | """The maximum number of output tokens to generate.""" 663 | 664 | reasoning: Reasoning | None = None 665 | """Configuration options for 666 | [reasoning models](https://platform.openai.com/docs/guides/reasoning). 667 | """ 668 | 669 | metadata: dict[str, str] | None = None 670 | """Metadata to include with the model response call.""" 671 | 672 | store: bool | None = None 673 | """Whether to store the generated model response for later retrieval. 674 | Defaults to True if not provided.""" 675 | 676 | include_usage: bool | None = None 677 | """Whether to include usage chunk. 678 | Defaults to True if not provided.""" 679 | 680 | def resolve(self, override: ModelSettings | None) -> ModelSettings: 681 | """Produce a new ModelSettings by overlaying any non-None values from the 682 | override on top of this instance.""" 683 | if override is None: 684 | return self 685 | 686 | changes = { 687 | field.name: getattr(override, field.name) 688 | for field in fields(self) 689 | if getattr(override, field.name) is not None 690 | } 691 | return replace(self, **changes) 692 | temperature class-attribute instance-attribute 693 | 694 | temperature: float | None = None 695 | The temperature to use when calling the model. 696 | 697 | top_p class-attribute instance-attribute 698 | 699 | top_p: float | None = None 700 | The top_p to use when calling the model. 701 | 702 | frequency_penalty class-attribute instance-attribute 703 | 704 | frequency_penalty: float | None = None 705 | The frequency penalty to use when calling the model. 706 | 707 | presence_penalty class-attribute instance-attribute 708 | 709 | presence_penalty: float | None = None 710 | The presence penalty to use when calling the model. 711 | 712 | tool_choice class-attribute instance-attribute 713 | 714 | tool_choice: ( 715 | Literal["auto", "required", "none"] | str | None 716 | ) = None 717 | The tool choice to use when calling the model. 718 | 719 | parallel_tool_calls class-attribute instance-attribute 720 | 721 | parallel_tool_calls: bool | None = None 722 | Whether to use parallel tool calls when calling the model. Defaults to False if not provided. 723 | 724 | truncation class-attribute instance-attribute 725 | 726 | truncation: Literal['auto', 'disabled'] | None = None 727 | The truncation strategy to use when calling the model. 728 | 729 | max_tokens class-attribute instance-attribute 730 | 731 | max_tokens: int | None = None 732 | The maximum number of output tokens to generate. 733 | 734 | reasoning class-attribute instance-attribute 735 | 736 | reasoning: Reasoning | None = None 737 | Configuration options for reasoning models. 738 | 739 | metadata class-attribute instance-attribute 740 | 741 | metadata: dict[str, str] | None = None 742 | Metadata to include with the model response call. 743 | 744 | store class-attribute instance-attribute 745 | 746 | store: bool | None = None 747 | Whether to store the generated model response for later retrieval. Defaults to True if not provided. 748 | 749 | include_usage class-attribute instance-attribute 750 | 751 | include_usage: bool | None = None 752 | Whether to include usage chunk. Defaults to True if not provided. 753 | 754 | resolve 755 | 756 | resolve(override: ModelSettings | None) -> ModelSettings 757 | Produce a new ModelSettings by overlaying any non-None values from the override on top of this instance. 758 | 759 | Source code in src/agents/model_settings.py 760 | ``` 761 | 762 | ## Dynamic System Prompts 763 | 764 | ``` 765 | import asyncio 766 | import random 767 | from typing import Literal 768 | 769 | from agents import Agent, RunContextWrapper, Runner 770 | 771 | 772 | class CustomContext: 773 | def __init__(self, style: Literal["haiku", "pirate", "robot"]): 774 | self.style = style 775 | 776 | 777 | def custom_instructions( 778 | run_context: RunContextWrapper[CustomContext], agent: Agent[CustomContext] 779 | ) -> str: 780 | context = run_context.context 781 | if context.style == "haiku": 782 | return "Only respond in haikus." 783 | elif context.style == "pirate": 784 | return "Respond as a pirate." 785 | else: 786 | return "Respond as a robot and say 'beep boop' a lot." 787 | 788 | 789 | agent = Agent( 790 | name="Chat agent", 791 | instructions=custom_instructions, 792 | ) 793 | 794 | 795 | async def main(): 796 | choice: Literal["haiku", "pirate", "robot"] = random.choice(["haiku", "pirate", "robot"]) 797 | context = CustomContext(style=choice) 798 | print(f"Using style: {choice}\n") 799 | 800 | user_message = "Tell me a joke." 801 | print(f"User: {user_message}") 802 | result = await Runner.run(agent, user_message, context=context) 803 | 804 | print(f"Assistant: {result.final_output}") 805 | 806 | 807 | if __name__ == "__main__": 808 | asyncio.run(main()) 809 | 810 | """ 811 | $ python examples/basic/dynamic_system_prompt.py 812 | 813 | Using style: haiku 814 | 815 | User: Tell me a joke. 816 | Assistant: Why don't eggs tell jokes? 817 | They might crack each other's shells, 818 | leaving yolk on face. 819 | 820 | $ python examples/basic/dynamic_system_prompt.py 821 | Using style: robot 822 | 823 | User: Tell me a joke. 824 | Assistant: Beep boop! Why was the robot so bad at soccer? Beep boop... because it kept kicking up a debug! Beep boop! 825 | 826 | $ python examples/basic/dynamic_system_prompt.py 827 | Using style: pirate 828 | 829 | User: Tell me a joke. 830 | Assistant: Why did the pirate go to school? 831 | 832 | To improve his arrr-ticulation! Har har har! 🏴‍☠️ 833 | """ 834 | ``` 835 | 836 | ## Life cycle events (hooks) 837 | 838 | ``` 839 | import asyncio 840 | import random 841 | from typing import Any 842 | 843 | from pydantic import BaseModel 844 | 845 | from agents import Agent, RunContextWrapper, RunHooks, Runner, Tool, Usage, function_tool 846 | 847 | 848 | class ExampleHooks(RunHooks): 849 | def __init__(self): 850 | self.event_counter = 0 851 | 852 | def _usage_to_str(self, usage: Usage) -> str: 853 | return f"{usage.requests} requests, {usage.input_tokens} input tokens, {usage.output_tokens} output tokens, {usage.total_tokens} total tokens" 854 | 855 | async def on_agent_start(self, context: RunContextWrapper, agent: Agent) -> None: 856 | self.event_counter += 1 857 | print( 858 | f"### {self.event_counter}: Agent {agent.name} started. Usage: {self._usage_to_str(context.usage)}" 859 | ) 860 | 861 | async def on_agent_end(self, context: RunContextWrapper, agent: Agent, output: Any) -> None: 862 | self.event_counter += 1 863 | print( 864 | f"### {self.event_counter}: Agent {agent.name} ended with output {output}. Usage: {self._usage_to_str(context.usage)}" 865 | ) 866 | 867 | async def on_tool_start(self, context: RunContextWrapper, agent: Agent, tool: Tool) -> None: 868 | self.event_counter += 1 869 | print( 870 | f"### {self.event_counter}: Tool {tool.name} started. Usage: {self._usage_to_str(context.usage)}" 871 | ) 872 | 873 | async def on_tool_end( 874 | self, context: RunContextWrapper, agent: Agent, tool: Tool, result: str 875 | ) -> None: 876 | self.event_counter += 1 877 | print( 878 | f"### {self.event_counter}: Tool {tool.name} ended with result {result}. Usage: {self._usage_to_str(context.usage)}" 879 | ) 880 | 881 | async def on_handoff( 882 | self, context: RunContextWrapper, from_agent: Agent, to_agent: Agent 883 | ) -> None: 884 | self.event_counter += 1 885 | print( 886 | f"### {self.event_counter}: Handoff from {from_agent.name} to {to_agent.name}. Usage: {self._usage_to_str(context.usage)}" 887 | ) 888 | 889 | 890 | hooks = ExampleHooks() 891 | 892 | ### 893 | 894 | 895 | @function_tool 896 | def random_number(max: int) -> int: 897 | """Generate a random number up to the provided max.""" 898 | return random.randint(0, max) 899 | 900 | 901 | @function_tool 902 | def multiply_by_two(x: int) -> int: 903 | """Return x times two.""" 904 | return x * 2 905 | 906 | 907 | class FinalResult(BaseModel): 908 | number: int 909 | 910 | 911 | multiply_agent = Agent( 912 | name="Multiply Agent", 913 | instructions="Multiply the number by 2 and then return the final result.", 914 | tools=[multiply_by_two], 915 | output_type=FinalResult, 916 | ) 917 | 918 | start_agent = Agent( 919 | name="Start Agent", 920 | instructions="Generate a random number. If it's even, stop. If it's odd, hand off to the multiplier agent.", 921 | tools=[random_number], 922 | output_type=FinalResult, 923 | handoffs=[multiply_agent], 924 | ) 925 | 926 | 927 | async def main() -> None: 928 | user_input = input("Enter a max number: ") 929 | await Runner.run( 930 | start_agent, 931 | hooks=hooks, 932 | input=f"Generate a random number between 0 and {user_input}.", 933 | ) 934 | 935 | print("Done!") 936 | 937 | 938 | if __name__ == "__main__": 939 | asyncio.run(main()) 940 | """ 941 | $ python examples/basic/lifecycle_example.py 942 | 943 | Enter a max number: 250 944 | ### 1: Agent Start Agent started. Usage: 0 requests, 0 input tokens, 0 output tokens, 0 total tokens 945 | ### 2: Tool random_number started. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens 946 | ### 3: Tool random_number ended with result 101. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens 947 | ### 4: Agent Start Agent started. Usage: 1 requests, 148 input tokens, 15 output tokens, 163 total tokens 948 | ### 5: Handoff from Start Agent to Multiply Agent. Usage: 2 requests, 323 input tokens, 30 output tokens, 353 total tokens 949 | ### 6: Agent Multiply Agent started. Usage: 2 requests, 323 input tokens, 30 output tokens, 353 total tokens 950 | ### 7: Tool multiply_by_two started. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens 951 | ### 8: Tool multiply_by_two ended with result 202. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens 952 | ### 9: Agent Multiply Agent started. Usage: 3 requests, 504 input tokens, 46 output tokens, 550 total tokens 953 | ### 10: Agent Multiply Agent ended with output number=202. Usage: 4 requests, 714 input tokens, 63 output tokens, 777 total tokens 954 | Done! 955 | 956 | """ 957 | ``` 958 | 959 | ### Lifecycle events details 960 | 961 | ```md 962 | Lifecycle 963 | RunHooks 964 | Bases: Generic[TContext] 965 | 966 | A class that receives callbacks on various lifecycle events in an agent run. Subclass and override the methods you need. 967 | 968 | on_agent_start async 969 | 970 | on_agent_start( 971 | context: RunContextWrapper[TContext], 972 | agent: Agent[TContext], 973 | ) -> None 974 | Called before the agent is invoked. Called each time the current agent changes. 975 | 976 | on_agent_end async 977 | 978 | on_agent_end( 979 | context: RunContextWrapper[TContext], 980 | agent: Agent[TContext], 981 | output: Any, 982 | ) -> None 983 | Called when the agent produces a final output. 984 | 985 | on_handoff async 986 | 987 | on_handoff( 988 | context: RunContextWrapper[TContext], 989 | from_agent: Agent[TContext], 990 | to_agent: Agent[TContext], 991 | ) -> None 992 | Called when a handoff occurs. 993 | 994 | on_tool_start async 995 | 996 | on_tool_start( 997 | context: RunContextWrapper[TContext], 998 | agent: Agent[TContext], 999 | tool: Tool, 1000 | ) -> None 1001 | Called before a tool is invoked. 1002 | 1003 | on_tool_end async 1004 | 1005 | on_tool_end( 1006 | context: RunContextWrapper[TContext], 1007 | agent: Agent[TContext], 1008 | tool: Tool, 1009 | result: str, 1010 | ) -> None 1011 | Called after a tool is invoked. 1012 | 1013 | AgentHooks 1014 | Bases: Generic[TContext] 1015 | 1016 | A class that receives callbacks on various lifecycle events for a specific agent. You can set this on agent.hooks to receive events for that specific agent. 1017 | 1018 | Subclass and override the methods you need. 1019 | 1020 | on_start async 1021 | 1022 | on_start( 1023 | context: RunContextWrapper[TContext], 1024 | agent: Agent[TContext], 1025 | ) -> None 1026 | Called before the agent is invoked. Called each time the running agent is changed to this agent. 1027 | 1028 | on_end async 1029 | 1030 | on_end( 1031 | context: RunContextWrapper[TContext], 1032 | agent: Agent[TContext], 1033 | output: Any, 1034 | ) -> None 1035 | Called when the agent produces a final output. 1036 | 1037 | on_handoff async 1038 | 1039 | on_handoff( 1040 | context: RunContextWrapper[TContext], 1041 | agent: Agent[TContext], 1042 | source: Agent[TContext], 1043 | ) -> None 1044 | Called when the agent is being handed off to. The source is the agent that is handing off to this agent. 1045 | 1046 | on_tool_start async 1047 | 1048 | on_tool_start( 1049 | context: RunContextWrapper[TContext], 1050 | agent: Agent[TContext], 1051 | tool: Tool, 1052 | ) -> None 1053 | Called before a tool is invoked. 1054 | 1055 | on_tool_end async 1056 | 1057 | on_tool_end( 1058 | context: RunContextWrapper[TContext], 1059 | agent: Agent[TContext], 1060 | tool: Tool, 1061 | result: str, 1062 | ) -> None 1063 | Called after a tool is invoked. 1064 | ``` 1065 | 1066 | ## Model Context Protocol Python Example 1067 | 1068 | ```python 1069 | import asyncio 1070 | import os 1071 | import shutil 1072 | 1073 | from agents import Agent, Runner, gen_trace_id, trace 1074 | from agents.mcp import MCPServer, MCPServerStdio 1075 | 1076 | 1077 | async def run(mcp_server: MCPServer): 1078 | agent = Agent( 1079 | name="Assistant", 1080 | instructions="Use the tools to read the filesystem and answer questions based on those files.", 1081 | mcp_servers=[mcp_server], 1082 | ) 1083 | 1084 | # List the files it can read 1085 | message = "Read the files and list them." 1086 | print(f"Running: {message}") 1087 | result = await Runner.run(starting_agent=agent, input=message) 1088 | print(result.final_output) 1089 | 1090 | # Ask about books 1091 | message = "What is my #1 favorite book?" 1092 | print(f"\n\nRunning: {message}") 1093 | result = await Runner.run(starting_agent=agent, input=message) 1094 | print(result.final_output) 1095 | 1096 | # Ask a question that reads then reasons. 1097 | message = "Look at my favorite songs. Suggest one new song that I might like." 1098 | print(f"\n\nRunning: {message}") 1099 | result = await Runner.run(starting_agent=agent, input=message) 1100 | print(result.final_output) 1101 | 1102 | 1103 | async def main(): 1104 | current_dir = os.path.dirname(os.path.abspath(__file__)) 1105 | samples_dir = os.path.join(current_dir, "sample_files") 1106 | 1107 | async with MCPServerStdio( 1108 | name="Filesystem Server, via npx", 1109 | params={ 1110 | "command": "npx", 1111 | "args": ["-y", "@modelcontextprotocol/server-filesystem", samples_dir], 1112 | }, 1113 | ) as server: 1114 | trace_id = gen_trace_id() 1115 | with trace(workflow_name="MCP Filesystem Example", trace_id=trace_id): 1116 | print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}\n") 1117 | await run(server) 1118 | 1119 | 1120 | if __name__ == "__main__": 1121 | # Let's make sure the user has npx installed 1122 | if not shutil.which("npx"): 1123 | raise RuntimeError("npx is not installed. Please install it with `npm install -g npx`.") 1124 | 1125 | asyncio.run(main()) 1126 | ``` 1127 | -------------------------------------------------------------------------------- /ai_docs/uv-single-file-scripts.md: -------------------------------------------------------------------------------- 1 | # Running scripts with UV 2 | 3 | A Python script is a file intended for standalone execution, e.g., with `python