├── deep_research
├── langgraph.json
├── .env.example
├── research_agent
│ ├── __init__.py
│ ├── tools.py
│ └── prompts.py
├── pyproject.toml
├── agent.py
├── utils.py
└── README.md
├── .gitignore
└── README.md
/deep_research/langgraph.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": ["."],
3 | "graphs": {
4 | "research": "./agent.py:agent"
5 | },
6 | "env": ".env"
7 | }
--------------------------------------------------------------------------------
/deep_research/.env.example:
--------------------------------------------------------------------------------
1 | # API Keys for Deep Research Agent Example
2 | # Copy this file to .env and fill in your actual API keys
3 |
4 | # Anthropic API Key (for Claude Sonnet 4)
5 | ANTHROPIC_API_KEY=your_anthropic_api_key_here
6 |
7 | # OpenAI API Key (for GPT-4o-mini summarization)
8 | OPENAI_API_KEY=your_openai_api_key_here
9 |
10 | # Tavily API Key (for web search)
11 | TAVILY_API_KEY=your_tavily_api_key_here
12 |
13 | # LangSmith API Key (required for LangGraph local server)
14 | # Get your key at: https://smith.langchain.com/settings
15 | LANGSMITH_API_KEY=lsv2_pt_your_api_key_here
16 |
--------------------------------------------------------------------------------
/deep_research/research_agent/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research Agent Example.
2 |
3 | This module demonstrates building a research agent using the deepagents package
4 | with custom tools for web search and strategic thinking.
5 | """
6 |
7 | from research_agent.prompts import (
8 | RESEARCHER_INSTRUCTIONS,
9 | RESEARCH_WORKFLOW_INSTRUCTIONS,
10 | SUBAGENT_DELEGATION_INSTRUCTIONS,
11 | )
12 | from research_agent.tools import tavily_search, think_tool
13 |
14 | __all__ = [
15 | "tavily_search",
16 | "think_tool",
17 | "RESEARCHER_INSTRUCTIONS",
18 | "RESEARCH_WORKFLOW_INSTRUCTIONS",
19 | "SUBAGENT_DELEGATION_INSTRUCTIONS",
20 | ]
21 |
--------------------------------------------------------------------------------
/deep_research/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "deep-research-example"
3 | version = "0.1.0"
4 | description = "Deep research agent example using deepagents package"
5 | requires-python = ">=3.11"
6 | dependencies = [
7 | "langchain-openai>=1.0.2",
8 | "langchain-anthropic>=1.0.3",
9 | "langchain_tavily>=0.2.13",
10 | "pydantic>=2.0.0",
11 | "rich>=14.0.0",
12 | "jupyter>=1.0.0",
13 | "ipykernel>=6.20.0",
14 | "tavily-python>=0.5.0",
15 | "httpx>=0.28.1",
16 | "markdownify>=1.2.0",
17 | "deepagents>=0.2.6",
18 | "python-dotenv>=1.0.0",
19 | "langgraph-cli[inmem]>=0.1.55",
20 | "langchain-google-genai>=3.1.0",
21 | ]
22 |
23 | [project.optional-dependencies]
24 | dev = [
25 | "mypy>=1.11.1",
26 | "ruff>=0.6.1",
27 | ]
28 |
29 | [build-system]
30 | requires = ["setuptools>=73.0.0", "wheel"]
31 | build-backend = "setuptools.build_meta"
32 |
33 | [tool.setuptools]
34 | packages = ["research_agent"]
35 |
36 | [tool.setuptools.package-data]
37 | "*" = ["py.typed"]
38 |
39 | [tool.ruff]
40 | lint.select = [
41 | "E", # pycodestyle
42 | "F", # pyflakes
43 | "I", # isort
44 | "D", # pydocstyle
45 | "D401", # First line should be in imperative mood
46 | "T201",
47 | "UP",
48 | ]
49 | lint.ignore = [
50 | "UP006",
51 | "UP007",
52 | "UP035",
53 | "D417",
54 | "E501",
55 | ]
56 |
57 | [tool.ruff.lint.per-file-ignores]
58 | "tests/*" = ["D", "UP"]
59 |
60 | [tool.ruff.lint.pydocstyle]
61 | convention = "google"
--------------------------------------------------------------------------------
/deep_research/agent.py:
--------------------------------------------------------------------------------
1 | """Research Agent - Standalone script for LangGraph deployment.
2 |
3 | This module creates a deep research agent with custom tools and prompts
4 | for conducting web research with strategic thinking and context management.
5 | """
6 |
7 | from datetime import datetime
8 |
9 | from langchain.chat_models import init_chat_model
10 | from langchain_google_genai import ChatGoogleGenerativeAI
11 | from deepagents import create_deep_agent
12 |
13 | from research_agent.prompts import (
14 | RESEARCHER_INSTRUCTIONS,
15 | RESEARCH_WORKFLOW_INSTRUCTIONS,
16 | SUBAGENT_DELEGATION_INSTRUCTIONS,
17 | )
18 | from research_agent.tools import tavily_search, think_tool
19 |
20 | # Limits
21 | max_concurrent_research_units = 3
22 | max_researcher_iterations = 3
23 |
24 | # Get current date
25 | current_date = datetime.now().strftime("%Y-%m-%d")
26 |
27 | # Combine orchestrator instructions (RESEARCHER_INSTRUCTIONS only for sub-agents)
28 | INSTRUCTIONS = (
29 | RESEARCH_WORKFLOW_INSTRUCTIONS
30 | + "\n\n"
31 | + "=" * 80
32 | + "\n\n"
33 | + SUBAGENT_DELEGATION_INSTRUCTIONS.format(
34 | max_concurrent_research_units=max_concurrent_research_units,
35 | max_researcher_iterations=max_researcher_iterations,
36 | )
37 | )
38 |
39 | # Create research sub-agent
40 | research_sub_agent = {
41 | "name": "research-agent",
42 | "description": "Delegate research to the sub-agent researcher. Only give this researcher one topic at a time.",
43 | "system_prompt": RESEARCHER_INSTRUCTIONS.format(date=current_date),
44 | "tools": [tavily_search, think_tool],
45 | }
46 |
47 | # Model Gemini 3
48 | # model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview", temperature=0.0)
49 |
50 | # Model Claude 4.5
51 | model = init_chat_model(model="anthropic:claude-sonnet-4-5-20250929", temperature=0.0)
52 |
53 | # Create the agent
54 | agent = create_deep_agent(
55 | model=model,
56 | tools=[tavily_search, think_tool],
57 | system_prompt=INSTRUCTIONS,
58 | subagents=[research_sub_agent],
59 | )
60 |
--------------------------------------------------------------------------------
/deep_research/utils.py:
--------------------------------------------------------------------------------
1 | """Utility functions for displaying messages and prompts in Jupyter notebooks."""
2 |
3 | import json
4 |
5 | from rich.console import Console
6 | from rich.panel import Panel
7 | from rich.text import Text
8 |
9 | console = Console()
10 |
11 |
12 | def format_message_content(message):
13 | """Convert message content to displayable string."""
14 | parts = []
15 | tool_calls_processed = False
16 |
17 | # Handle main content
18 | if isinstance(message.content, str):
19 | parts.append(message.content)
20 | elif isinstance(message.content, list):
21 | # Handle complex content like tool calls (Anthropic format)
22 | for item in message.content:
23 | if item.get("type") == "text":
24 | parts.append(item["text"])
25 | elif item.get("type") == "tool_use":
26 | parts.append(f"\n🔧 Tool Call: {item['name']}")
27 | parts.append(f" Args: {json.dumps(item['input'], indent=2)}")
28 | parts.append(f" ID: {item.get('id', 'N/A')}")
29 | tool_calls_processed = True
30 | else:
31 | parts.append(str(message.content))
32 |
33 | # Handle tool calls attached to the message (OpenAI format) - only if not already processed
34 | if (
35 | not tool_calls_processed
36 | and hasattr(message, "tool_calls")
37 | and message.tool_calls
38 | ):
39 | for tool_call in message.tool_calls:
40 | parts.append(f"\n🔧 Tool Call: {tool_call['name']}")
41 | parts.append(f" Args: {json.dumps(tool_call['args'], indent=2)}")
42 | parts.append(f" ID: {tool_call['id']}")
43 |
44 | return "\n".join(parts)
45 |
46 |
47 | def format_messages(messages):
48 | """Format and display a list of messages with Rich formatting."""
49 | for m in messages:
50 | msg_type = m.__class__.__name__.replace("Message", "")
51 | content = format_message_content(m)
52 |
53 | if msg_type == "Human":
54 | console.print(Panel(content, title="🧑 Human", border_style="blue"))
55 | elif msg_type == "Ai":
56 | console.print(Panel(content, title="🤖 Assistant", border_style="green"))
57 | elif msg_type == "Tool":
58 | console.print(Panel(content, title="🔧 Tool Output", border_style="yellow"))
59 | else:
60 | console.print(Panel(content, title=f"📝 {msg_type}", border_style="white"))
61 |
62 |
63 | def format_message(messages):
64 | """Alias for format_messages for backward compatibility."""
65 | return format_messages(messages)
66 |
67 |
68 | def show_prompt(prompt_text: str, title: str = "Prompt", border_style: str = "blue"):
69 | """Display a prompt with rich formatting and XML tag highlighting.
70 |
71 | Args:
72 | prompt_text: The prompt string to display
73 | title: Title for the panel (default: "Prompt")
74 | border_style: Border color style (default: "blue")
75 | """
76 | # Create a formatted display of the prompt
77 | formatted_text = Text(prompt_text)
78 | formatted_text.highlight_regex(r"<[^>]+>", style="bold blue") # Highlight XML tags
79 | formatted_text.highlight_regex(
80 | r"##[^#\n]+", style="bold magenta"
81 | ) # Highlight headers
82 | formatted_text.highlight_regex(
83 | r"###[^#\n]+", style="bold cyan"
84 | ) # Highlight sub-headers
85 |
86 | # Display in a panel for better presentation
87 | console.print(
88 | Panel(
89 | formatted_text,
90 | title=f"[bold green]{title}[/bold green]",
91 | border_style=border_style,
92 | padding=(1, 2),
93 | )
94 | )
95 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | ls-academy/
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 | cover/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | *.log
62 | local_settings.py
63 | db.sqlite3
64 | db.sqlite3-journal
65 |
66 | # Flask stuff:
67 | instance/
68 | .webassets-cache
69 |
70 | # Scrapy stuff:
71 | .scrapy
72 |
73 | # Sphinx documentation
74 | docs/_build/
75 |
76 | # PyBuilder
77 | .pybuilder/
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | # For a library or package, you might want to ignore these files since the code is
89 | # intended to run in multiple environments; otherwise, check them in:
90 | # .python-version
91 |
92 | # pipenv
93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
96 | # install all needed dependencies.
97 | #Pipfile.lock
98 |
99 | # poetry
100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101 | # This is especially recommended for binary packages to ensure reproducibility, and is more
102 | # commonly ignored for libraries.
103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104 | #poetry.lock
105 |
106 | # pdm
107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108 | #pdm.lock
109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110 | # in version control.
111 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
112 | .pdm.toml
113 | .pdm-python
114 | .pdm-build/
115 |
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 |
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 |
123 | # SageMath parsed files
124 | *.sage.py
125 |
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 |
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 |
139 | # Rope project settings
140 | .ropeproject
141 |
142 | # mkdocs documentation
143 | /site
144 |
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 |
150 | # Pyre type checker
151 | .pyre/
152 |
153 | # pytype static type analyzer
154 | .pytype/
155 |
156 | # Cython debug symbols
157 | cython_debug/
158 |
159 | # PyCharm
160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | # and can be added to the global gitignore or merged into this file. For a more nuclear
163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 |
166 | # Other
167 | .langgraph_api/
168 | .vscode/
169 | .DS_Store
--------------------------------------------------------------------------------
/deep_research/research_agent/tools.py:
--------------------------------------------------------------------------------
1 | """Research Tools.
2 |
3 | This module provides search and content processing utilities for the research agent,
4 | using Tavily for URL discovery and fetching full webpage content.
5 | """
6 |
7 | import httpx
8 | from langchain_core.tools import InjectedToolArg, tool
9 | from markdownify import markdownify
10 | from tavily import TavilyClient
11 | from typing_extensions import Annotated, Literal
12 |
13 | tavily_client = TavilyClient()
14 |
15 |
16 | def fetch_webpage_content(url: str, timeout: float = 10.0) -> str:
17 | """Fetch and convert webpage content to markdown.
18 |
19 | Args:
20 | url: URL to fetch
21 | timeout: Request timeout in seconds
22 |
23 | Returns:
24 | Webpage content as markdown
25 | """
26 | headers = {
27 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
28 | }
29 |
30 | try:
31 | response = httpx.get(url, headers=headers, timeout=timeout)
32 | response.raise_for_status()
33 | return markdownify(response.text)
34 | except Exception as e:
35 | return f"Error fetching content from {url}: {str(e)}"
36 |
37 |
38 | @tool(parse_docstring=True)
39 | def tavily_search(
40 | query: str,
41 | max_results: Annotated[int, InjectedToolArg] = 1,
42 | topic: Annotated[
43 | Literal["general", "news", "finance"], InjectedToolArg
44 | ] = "general",
45 | ) -> str:
46 | """Search the web for information on a given query.
47 |
48 | Uses Tavily to discover relevant URLs, then fetches and returns full webpage content as markdown.
49 |
50 | Args:
51 | query: Search query to execute
52 | max_results: Maximum number of results to return (default: 1)
53 | topic: Topic filter - 'general', 'news', or 'finance' (default: 'general')
54 |
55 | Returns:
56 | Formatted search results with full webpage content
57 | """
58 | # Use Tavily to discover URLs
59 | search_results = tavily_client.search(
60 | query,
61 | max_results=max_results,
62 | topic=topic,
63 | )
64 |
65 | # Fetch full content for each URL
66 | result_texts = []
67 | for result in search_results.get("results", []):
68 | url = result["url"]
69 | title = result["title"]
70 |
71 | # Fetch webpage content
72 | content = fetch_webpage_content(url)
73 |
74 | result_text = f"""## {title}
75 | **URL:** {url}
76 |
77 | {content}
78 |
79 | ---
80 | """
81 | result_texts.append(result_text)
82 |
83 | # Format final response
84 | response = f"""🔍 Found {len(result_texts)} result(s) for '{query}':
85 |
86 | {chr(10).join(result_texts)}"""
87 |
88 | return response
89 |
90 |
91 | @tool(parse_docstring=True)
92 | def think_tool(reflection: str) -> str:
93 | """Tool for strategic reflection on research progress and decision-making.
94 |
95 | Use this tool after each search to analyze results and plan next steps systematically.
96 | This creates a deliberate pause in the research workflow for quality decision-making.
97 |
98 | When to use:
99 | - After receiving search results: What key information did I find?
100 | - Before deciding next steps: Do I have enough to answer comprehensively?
101 | - When assessing research gaps: What specific information am I still missing?
102 | - Before concluding research: Can I provide a complete answer now?
103 |
104 | Reflection should address:
105 | 1. Analysis of current findings - What concrete information have I gathered?
106 | 2. Gap assessment - What crucial information is still missing?
107 | 3. Quality evaluation - Do I have sufficient evidence/examples for a good answer?
108 | 4. Strategic decision - Should I continue searching or provide my answer?
109 |
110 | Args:
111 | reflection: Your detailed reflection on research progress, findings, gaps, and next steps
112 |
113 | Returns:
114 | Confirmation that reflection was recorded for decision-making
115 | """
116 | return f"Reflection recorded: {reflection}"
117 |
--------------------------------------------------------------------------------
/deep_research/README.md:
--------------------------------------------------------------------------------
1 | # 🚀 Deep Research
2 |
3 | ## 🚀 Quickstart
4 |
5 | **Prerequisites**: Install [uv](https://docs.astral.sh/uv/) package manager:
6 | ```bash
7 | curl -LsSf https://astral.sh/uv/install.sh | sh
8 | ```
9 |
10 | Ensure you are in the `deep_research` directory:
11 | ```bash
12 | cd deep_research
13 | ```
14 |
15 | Install packages:
16 | ```bash
17 | uv sync
18 | ```
19 |
20 | Set your API keys in your environment:
21 |
22 | ```bash
23 | export ANTHROPIC_API_KEY=your_anthropic_api_key_here # Required for Claude model
24 | export GOOGLE_API_KEY=your_google_api_key_here # Required for Gemini model ([get one here](https://ai.google.dev/gemini-api/docs))
25 | export TAVILY_API_KEY=your_tavily_api_key_here # Required for web search ([get one here](https://www.tavily.com/)) with a generous free tier
26 | export LANGSMITH_API_KEY=your_langsmith_api_key_here # [LangSmith API key](https://smith.langchain.com/settings) (free to sign up)
27 | ```
28 |
29 | ## Usage Options
30 |
31 | You can run this quickstart in two ways:
32 |
33 | ### Option 1: Jupyter Notebook
34 |
35 | Run the interactive notebook to step through the research agent:
36 |
37 | ```bash
38 | uv run jupyter notebook research_agent.ipynb
39 | ```
40 |
41 | ### Option 2: LangGraph Server
42 |
43 | Run a local [LangGraph server](https://langchain-ai.github.io/langgraph/tutorials/langgraph-platform/local-server/) with a web interface:
44 |
45 | ```bash
46 | langgraph dev
47 | ```
48 |
49 | LangGraph server will open a new browser window with the Studio interface, which you can submit your search query to:
50 |
51 |
52 |
53 | You can also connect the LangGraph server to a [UI specifically designed for deepagents](https://github.com/langchain-ai/deep-agents-ui):
54 |
55 | ```bash
56 | $ git clone https://github.com/langchain-ai/deep-agents-ui.git
57 | $ cd deep-agents-ui
58 | $ yarn install
59 | $ yarn dev
60 | ```
61 |
62 | Then follow the instructions in the [deep-agents-ui README](https://github.com/langchain-ai/deep-agents-ui?tab=readme-ov-file#connecting-to-a-langgraph-server) to connect the UI to the running LangGraph server.
63 |
64 | This provides a user-friendly chat interface and visualization of files in state.
65 |
66 |
67 |
68 | ## 📚 Resources
69 |
70 | - **[Deep Research Course](https://academy.langchain.com/courses/deep-research-with-langgraph)** - Full course on deep research with LangGraph
71 |
72 | ### Custom Model
73 |
74 | By default, `deepagents` uses `"claude-sonnet-4-5-20250929"`. You can customize this by passing any [LangChain model object](https://python.langchain.com/docs/integrations/chat/). See the Deepagents package [README](https://github.com/langchain-ai/deepagents?tab=readme-ov-file#model) for more details.
75 |
76 | ```python
77 | from langchain.chat_models import init_chat_model
78 | from deepagents import create_deep_agent
79 |
80 | # Using Claude
81 | model = init_chat_model(model="anthropic:claude-sonnet-4-5-20250929", temperature=0.0)
82 |
83 | # Using Gemini
84 | from langchain_google_genai import ChatGoogleGenerativeAI
85 | model = ChatGoogleGenerativeAI(model="gemini-3-pro-preview")
86 |
87 | agent = create_deep_agent(
88 | model=model,
89 | )
90 | ```
91 |
92 | ### Custom Instructions
93 |
94 | The deep research agent uses custom instructions defined in `deep_research/research_agent/prompts.py` that complement (rather than duplicate) the default middleware instructions. You can modify these in any way you want.
95 |
96 | | Instruction Set | Purpose |
97 | |----------------|---------|
98 | | `RESEARCH_WORKFLOW_INSTRUCTIONS` | Defines the 5-step research workflow: save request → plan with TODOs → delegate to sub-agents → synthesize → respond. Includes research-specific planning guidelines like batching similar tasks and scaling rules for different query types. |
99 | | `SUBAGENT_DELEGATION_INSTRUCTIONS` | Provides concrete delegation strategies with examples: simple queries use 1 sub-agent, comparisons use 1 per element, multi-faceted research uses 1 per aspect. Sets limits on parallel execution (max 3 concurrent) and iteration rounds (max 3). |
100 | | `RESEARCHER_INSTRUCTIONS` | Guides individual research sub-agents to conduct focused web searches. Includes hard limits (2-3 searches for simple queries, max 5 for complex), emphasizes using `think_tool` after each search for strategic reflection, and defines stopping criteria. |
101 |
102 | ### Custom Tools
103 |
104 | The deep research agent adds the following custom tools beyond the built-in deepagent tools. You can also use your own tools, including via MCP servers. See the Deepagents package [README](https://github.com/langchain-ai/deepagents?tab=readme-ov-file#mcp) for more details.
105 |
106 | | Tool Name | Description |
107 | |-----------|-------------|
108 | | `tavily_search` | Web search tool that uses Tavily purely as a URL discovery engine. Performs searches using Tavily API to find relevant URLs, fetches full webpage content via HTTP with proper User-Agent headers (avoiding 403 errors), converts HTML to markdown, and returns the complete content without summarization to preserve all information for the agent's analysis. Works with both Claude and Gemini models. |
109 | | `think_tool` | Strategic reflection mechanism that helps the agent pause and assess progress between searches, analyze findings, identify gaps, and plan next steps. |
110 |
111 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🚀🧠 Deepagent Quickstarts
2 |
3 | [Deepagents](https://github.com/langchain-ai/deepagents) is a simple, open source agent harness. It uses some common principle seen in popular agents such as [Claude Code](https://code.claude.com/docs) and [Manus](https://www.youtube.com/watch?v=6_BcCthVvb8), including **planning** (prior to task execution), **computer access** (giving the able access to a shell and a filesystem), and **sub-agent delegation** (isolated task execution). This repo has a collection of quickstarts that demonstrate different agents that can be easily configured on top of the `deepagents` harness.
4 |
5 |
6 |
7 | ## 📚 Resources
8 |
9 | - **[Documentation](https://docs.langchain.com/oss/python/deepagents/overview)** - Full overview and API reference
10 | - **[Deepagents Repo](https://github.com/langchain-ai/deepagents)** - Deepagents package
11 |
12 | ## Quickstarts
13 |
14 | Here are the currently supported quickstarts:
15 |
16 | | Quickstart Name | Location | Description | Usage Options |
17 | |----------------|----------|-------------|---------------|
18 | | [Deep Research](deep_research/README.md) | `deep_research/` | A research agent that conducts multi-step web research using Tavily for URL discovery, fetches full webpage content, and coordinates work through parallel sub-agents and strategic reflection | **Jupyter Notebook** or **LangGraph Server** |
19 |
20 | ## Built-In Deepagent Components
21 |
22 | To use these quickstarts, it's important to understand the built-in components of the deepagent harness. You can see the deepagents [repo](https://github.com/langchain-ai/deepagents) for more details, but as a quick reference, here are the built-in tools and middleware:
23 |
24 | ### Tools
25 |
26 | Every deepagent comes with a set of general tools by default:
27 |
28 |
29 |
30 | | Tool Name | Description |
31 | |-----------|-------------|
32 | | `write_todos` | Create and manage structured task lists for tracking progress through complex workflows |
33 | | `ls` | List all files in a directory (requires absolute path) |
34 | | `read_file` | Read content from a file with optional pagination (offset/limit parameters) |
35 | | `write_file` | Create a new file or completely overwrite an existing file |
36 | | `edit_file` | Perform exact string replacements in files |
37 | | `glob` | Find files matching a pattern (e.g., `**/*.py`) |
38 | | `grep` | Search for text patterns within files |
39 | | `execute` | Run shell commands in a sandboxed environment (only if backend supports SandboxBackendProtocol) |
40 | | `task` | Delegate tasks to specialized sub-agents with isolated context windows |
41 |
42 | ### Middleware
43 |
44 | Deepagent also use some built-in ["middleware"](https://docs.langchain.com/oss/python/langchain/middleware/overview), which can:
45 |
46 | 1. **Provide tools** - Add new tools to the agent's toolkit (e.g., `FilesystemMiddleware` adds `ls`, `read_file`, `write_file`, etc.)
47 | 2. **Wrap model calls** - Inject system prompts and modify model requests before they're sent
48 | 3. **Wrap tool calls** - Process tool call results after tools execute (e.g., `SummarizationMiddleware` summarizes large conversation history)
49 |
50 | Every deepagent includes the following middleware by default (applied in order). Some middleware are provided by the `deepagents` package (`FilesystemMiddleware`, `SubAgentMiddleware`, `PatchToolCallsMiddleware`), while others come from `langchain` (`TodoListMiddleware`, `SummarizationMiddleware`, `HumanInTheLoopMiddleware`) and `langchain-anthropic` (`AnthropicPromptCachingMiddleware`):
51 |
52 | | Middleware | Tools Added | What It Does |
53 | |------------|-------------|--------------|
54 | | **TodoListMiddleware** | `write_todos`, `read_todos` | Task planning and progress tracking. Enables agents to create todo lists, break down tasks, and track completion. |
55 | | **FilesystemMiddleware** | `ls`, `read_file`, `write_file`, `edit_file`, `glob`, `grep`, `execute`* | File system operations and context offloading. Automatically saves large tool results (>20K tokens) to files to prevent context overflow. |
56 | | **SubAgentMiddleware** | `task` | Task delegation to specialized subagents with isolated contexts. Subagents handle complex subtasks independently and return summaries. |
57 | | **SummarizationMiddleware** | N/A | Automatic conversation summarization when context exceeds 170K tokens. Keeps last 6 messages intact while summarizing older content. |
58 | | **AnthropicPromptCachingMiddleware** | N/A | Prompt caching for Anthropic models to reduce API costs. Marks static system prompts for server-side caching. |
59 | | **PatchToolCallsMiddleware** | N/A | Fixes "dangling" tool calls from interruptions. Adds placeholder responses to prevent validation errors. |
60 | | **HumanInTheLoopMiddleware** | N/A | Human approval workflow for sensitive operations. Creates breakpoints for specified tools (only when `interrupt_on` configured). |
61 |
62 | \* The `execute` tool is only available if the backend implements `SandboxBackendProtocol`
63 |
64 | ## Writing Custom Instructions
65 |
66 | When building your own custom deepagent, you can provide a `system_prompt` parameter to `create_deep_agent()`. This custom prompt is **appended to** default instructions that are automatically injected by middleware. Understanding this layering is crucial for writing effective custom instructions. Read about the [default instructions in the deepagents README](https://github.com/langchain-ai/deepagents?tab=readme-ov-file#built-in-tools) below. You can follow some general guidelines below, and see specific examples in the quickstart folders.
67 |
68 | **Do:**
69 | - ✅ Define domain-specific workflows (e.g., research methodology, data analysis steps)
70 | - ✅ Provide concrete examples for your use case
71 | - ✅ Add specialized guidance (e.g., "batch similar research tasks into a single TODO")
72 | - ✅ Define stopping criteria and resource limits
73 | - ✅ Explain how tools work together in your workflow
74 |
75 | **Don't:**
76 | - ❌ Re-explain what standard tools do (already covered by middleware)
77 | - ❌ Duplicate middleware instructions about tool usage
78 | - ❌ Contradict default instructions (work with them, not against them)
--------------------------------------------------------------------------------
/deep_research/research_agent/prompts.py:
--------------------------------------------------------------------------------
1 | """Prompt templates and tool descriptions for the research deepagent."""
2 |
3 | RESEARCH_WORKFLOW_INSTRUCTIONS = """# Research Workflow
4 |
5 | Follow this workflow for all research requests:
6 |
7 | 1. **Plan**: Create a todo list with write_todos to break down the research into focused tasks
8 | 2. **Save the request**: Use write_file() to save the user's research question to `/research_request.md`
9 | 3. **Research**: Delegate research tasks to sub-agents using the task() tool - ALWAYS use sub-agents for research, never conduct research yourself
10 | 4. **Synthesize**: Review all sub-agent findings and consolidate citations (each unique URL gets one number across all findings)
11 | 5. **Write Report**: Write a comprehensive final report to `/final_report.md` (see Report Writing Guidelines below)
12 | 6. **Verify**: Read `/research_request.md` and confirm you've addressed all aspects with proper citations and structure
13 |
14 | ## Research Planning Guidelines
15 | - Batch similar research tasks into a single TODO to minimize overhead
16 | - For simple fact-finding questions, use 1 sub-agent
17 | - For comparisons or multi-faceted topics, delegate to multiple parallel sub-agents
18 | - Each sub-agent should research one specific aspect and return findings
19 |
20 | ## Report Writing Guidelines
21 |
22 | When writing the final report to `/final_report.md`, follow these structure patterns:
23 |
24 | **For comparisons:**
25 | 1. Introduction
26 | 2. Overview of topic A
27 | 3. Overview of topic B
28 | 4. Detailed comparison
29 | 5. Conclusion
30 |
31 | **For lists/rankings:**
32 | Simply list items with details - no introduction needed:
33 | 1. Item 1 with explanation
34 | 2. Item 2 with explanation
35 | 3. Item 3 with explanation
36 |
37 | **For summaries/overviews:**
38 | 1. Overview of topic
39 | 2. Key concept 1
40 | 3. Key concept 2
41 | 4. Key concept 3
42 | 5. Conclusion
43 |
44 | **General guidelines:**
45 | - Use clear section headings (## for sections, ### for subsections)
46 | - Write in paragraph form by default - be text-heavy, not just bullet points
47 | - Do NOT use self-referential language ("I found...", "I researched...")
48 | - Write as a professional report without meta-commentary
49 | - Each section should be comprehensive and detailed
50 | - Use bullet points only when listing is more appropriate than prose
51 |
52 | **Citation format:**
53 | - Cite sources inline using [1], [2], [3] format
54 | - Assign each unique URL a single citation number across ALL sub-agent findings
55 | - End report with ### Sources section listing each numbered source
56 | - Number sources sequentially without gaps (1,2,3,4...)
57 | - Format: [1] Source Title: URL (each on separate line for proper list rendering)
58 | - Example:
59 |
60 | Some important finding [1]. Another key insight [2].
61 |
62 | ### Sources
63 | [1] AI Research Paper: https://example.com/paper
64 | [2] Industry Analysis: https://example.com/analysis
65 | """
66 |
67 | RESEARCHER_INSTRUCTIONS = """You are a research assistant conducting research on the user's input topic. For context, today's date is {date}.
68 |
69 |
70 | Your job is to use tools to gather information about the user's input topic.
71 | You can use any of the research tools provided to you to find resources that can help answer the research question.
72 | You can call these tools in series or in parallel, your research is conducted in a tool-calling loop.
73 |
74 |
75 |
76 | You have access to two specific research tools:
77 | 1. **tavily_search**: For conducting web searches to gather information
78 | 2. **think_tool**: For reflection and strategic planning during research
79 | **CRITICAL: Use think_tool after each search to reflect on results and plan next steps**
80 |
81 |
82 |
83 | Think like a human researcher with limited time. Follow these steps:
84 |
85 | 1. **Read the question carefully** - What specific information does the user need?
86 | 2. **Start with broader searches** - Use broad, comprehensive queries first
87 | 3. **After each search, pause and assess** - Do I have enough to answer? What's still missing?
88 | 4. **Execute narrower searches as you gather information** - Fill in the gaps
89 | 5. **Stop when you can answer confidently** - Don't keep searching for perfection
90 |
91 |
92 |
93 | **Tool Call Budgets** (Prevent excessive searching):
94 | - **Simple queries**: Use 2-3 search tool calls maximum
95 | - **Complex queries**: Use up to 5 search tool calls maximum
96 | - **Always stop**: After 5 search tool calls if you cannot find the right sources
97 |
98 | **Stop Immediately When**:
99 | - You can answer the user's question comprehensively
100 | - You have 3+ relevant examples/sources for the question
101 | - Your last 2 searches returned similar information
102 |
103 |
104 |
105 | After each search tool call, use think_tool to analyze the results:
106 | - What key information did I find?
107 | - What's missing?
108 | - Do I have enough to answer the question comprehensively?
109 | - Should I search more or provide my answer?
110 |
111 |
112 |
113 | When providing your findings back to the orchestrator:
114 |
115 | 1. **Structure your response**: Organize findings with clear headings and detailed explanations
116 | 2. **Cite sources inline**: Use [1], [2], [3] format when referencing information from your searches
117 | 3. **Include Sources section**: End with ### Sources listing each numbered source with title and URL
118 |
119 | Example:
120 | ```
121 | ## Key Findings
122 |
123 | Context engineering is a critical technique for AI agents [1]. Studies show that proper context management can improve performance by 40% [2].
124 |
125 | ### Sources
126 | [1] Context Engineering Guide: https://example.com/context-guide
127 | [2] AI Performance Study: https://example.com/study
128 | ```
129 |
130 | The orchestrator will consolidate citations from all sub-agents into the final report.
131 |
132 | """
133 |
134 | TASK_DESCRIPTION_PREFIX = """Delegate a task to a specialized sub-agent with isolated context. Available agents for delegation are:
135 | {other_agents}
136 | """
137 |
138 | SUBAGENT_DELEGATION_INSTRUCTIONS = """# Sub-Agent Research Coordination
139 |
140 | Your role is to coordinate research by delegating tasks from your TODO list to specialized research sub-agents.
141 |
142 | ## Delegation Strategy
143 |
144 | **DEFAULT: Start with 1 sub-agent** for most queries:
145 | - "What is quantum computing?" → 1 sub-agent (general overview)
146 | - "List the top 10 coffee shops in San Francisco" → 1 sub-agent
147 | - "Summarize the history of the internet" → 1 sub-agent
148 | - "Research context engineering for AI agents" → 1 sub-agent (covers all aspects)
149 |
150 | **ONLY parallelize when the query EXPLICITLY requires comparison or has clearly independent aspects:**
151 |
152 | **Explicit comparisons** → 1 sub-agent per element:
153 | - "Compare OpenAI vs Anthropic vs DeepMind AI safety approaches" → 3 parallel sub-agents
154 | - "Compare Python vs JavaScript for web development" → 2 parallel sub-agents
155 |
156 | **Clearly separated aspects** → 1 sub-agent per aspect (use sparingly):
157 | - "Research renewable energy adoption in Europe, Asia, and North America" → 3 parallel sub-agents (geographic separation)
158 | - Only use this pattern when aspects cannot be covered efficiently by a single comprehensive search
159 |
160 | ## Key Principles
161 | - **Bias towards single sub-agent**: One comprehensive research task is more token-efficient than multiple narrow ones
162 | - **Avoid premature decomposition**: Don't break "research X" into "research X overview", "research X techniques", "research X applications" - just use 1 sub-agent for all of X
163 | - **Parallelize only for clear comparisons**: Use multiple sub-agents when comparing distinct entities or geographically separated data
164 |
165 | ## Parallel Execution Limits
166 | - Use at most {max_concurrent_research_units} parallel sub-agents per iteration
167 | - Make multiple task() calls in a single response to enable parallel execution
168 | - Each sub-agent returns findings independently
169 |
170 | ## Research Limits
171 | - Stop after {max_researcher_iterations} delegation rounds if you haven't found adequate sources
172 | - Stop when you have sufficient information to answer comprehensively
173 | - Bias towards focused research over exhaustive exploration"""
--------------------------------------------------------------------------------