├── .env.example ├── .gitignore ├── LICENSE ├── README.md ├── examples ├── arxiv.md ├── inference-market-gpt45.md ├── inference-market.md └── pubmed.md ├── langgraph.json ├── pyproject.toml ├── src └── open_deep_research │ ├── __init__.py │ ├── configuration.py │ ├── graph.ipynb │ ├── graph.py │ ├── multi_agent.ipynb │ ├── multi_agent.py │ ├── prompts.py │ ├── state.py │ └── utils.py ├── tests ├── __init__.py ├── conftest.py ├── run_test.py └── test_report_quality.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=sk-xxx 2 | ANTHROPIC_API_KEY=sk-xxx 3 | TAVILY_API_KEY=xxx 4 | GROQ_API_KEY=xxx 5 | PERPLEXITY_API_KEY=xxx 6 | LINKUP_API_KEY=xxx 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.egg-info 3 | *.pyc 4 | 5 | # Python 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # Virtual environments 28 | venv/ 29 | env/ 30 | ENV/ 31 | .env 32 | 33 | # IDE specific files 34 | .idea/ 35 | .vscode/ 36 | *.swp 37 | *.swo 38 | .DS_Store 39 | 40 | # Jupyter Notebook 41 | .ipynb_checkpoints 42 | 43 | # Testing 44 | .coverage 45 | htmlcov/ 46 | .pytest_cache/ 47 | .tox/ 48 | 49 | # Logs 50 | *.log 51 | logs/ 52 | 53 | # Local development 54 | .env.local 55 | .env.development.local 56 | .env.test.local 57 | .env.production.local 58 | 59 | # Dependencies 60 | node_modules/ 61 | 62 | # LangGraph specific 63 | .langgraph/ 64 | 65 | # Temporary files 66 | tmp/ 67 | temp/ 68 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 LangChain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Deep Research 2 | 3 | Open Deep Research is an experimental, fully open-source research assistant that automates deep research and produces comprehensive reports on any topic. It features two implementations - a [workflow](https://langchain-ai.github.io/langgraph/tutorials/workflows/) and a multi-agent architecture - each with distinct advantages. You can customize the entire research and writing process with specific models, prompts, report structure, and search tools. 4 | 5 | #### Workflow 6 | 7 | ![open-deep-research-overview](https://github.com/user-attachments/assets/a171660d-b735-4587-ab2f-cd771f773756) 8 | 9 | #### Multi-agent 10 | 11 | ![multi-agent-researcher](https://github.com/user-attachments/assets/3c734c3c-57aa-4bc0-85dd-74e2ec2c0880) 12 | 13 | ### 🚀 Quickstart 14 | 15 | Clone the repository: 16 | ```bash 17 | git clone https://github.com/langchain-ai/open_deep_research.git 18 | cd open_deep_research 19 | ``` 20 | 21 | Then edit the `.env` file to customize the environment variables (for model selection, search tools, and other configuration settings): 22 | ```bash 23 | cp .env.example .env 24 | ``` 25 | 26 | Launch the assistant with the LangGraph server locally, which will open in your browser: 27 | 28 | #### Mac 29 | 30 | ```bash 31 | # Install uv package manager 32 | curl -LsSf https://astral.sh/uv/install.sh | sh 33 | 34 | # Install dependencies and start the LangGraph server 35 | uvx --refresh --from "langgraph-cli[inmem]" --with-editable . --python 3.11 langgraph dev --allow-blocking 36 | ``` 37 | 38 | #### Windows / Linux 39 | 40 | ```powershell 41 | # Install dependencies 42 | pip install -e . 43 | pip install -U "langgraph-cli[inmem]" 44 | 45 | # Start the LangGraph server 46 | langgraph dev 47 | ``` 48 | 49 | Use this to open the Studio UI: 50 | ``` 51 | - 🚀 API: http://127.0.0.1:2024 52 | - 🎨 Studio UI: https://smith.langchain.com/studio/?baseUrl=http://127.0.0.1:2024 53 | - 📚 API Docs: http://127.0.0.1:2024/docs 54 | ``` 55 | 56 | #### Multi-agent 57 | 58 | (1) Chat with the agent about your topic of interest, and it will initiate report generation: 59 | 60 | input 61 | 62 | (2) The report is produced as markdown. 63 | 64 | #### Workflow 65 | 66 | (1) Provide a `Topic`: 67 | 68 | input 69 | 70 | (2) This will generate a report plan and present it to the user for review. 71 | 72 | (3) We can pass a string (`"..."`) with feedback to regenerate the plan based on the feedback. 73 | 74 | feedback 75 | 76 | (4) Or, we can just pass `true` to the JSON input box in Studio accept the plan. 77 | 78 | accept 79 | 80 | (5) Once accepted, the report sections will be generated. 81 | 82 | report_gen 83 | 84 | The report is produced as markdown. 85 | 86 | report 87 | 88 | ### Search Tools 89 | 90 | Available search tools: 91 | 92 | * [Tavily API](https://tavily.com/) - General web search 93 | * [Perplexity API](https://www.perplexity.ai/hub/blog/introducing-the-sonar-pro-api) - General web search 94 | * [Exa API](https://exa.ai/) - Powerful neural search for web content 95 | * [ArXiv](https://arxiv.org/) - Academic papers in physics, mathematics, computer science, and more 96 | * [PubMed](https://pubmed.ncbi.nlm.nih.gov/) - Biomedical literature from MEDLINE, life science journals, and online books 97 | * [Linkup API](https://www.linkup.so/) - General web search 98 | * [DuckDuckGo API](https://duckduckgo.com/) - General web search 99 | * [Google Search API/Scrapper](https://google.com/) - Create custom search engine [here](https://programmablesearchengine.google.com/controlpanel/all) and get API key [here](https://developers.google.com/custom-search/v1/introduction) 100 | * [Microsoft Azure AI Search](https://azure.microsoft.com/en-us/products/ai-services/ai-search) - Cloud based vector database solution 101 | 102 | Open Deep Research is compatible with many different LLMs: 103 | 104 | * You can select any model that is integrated [with the `init_chat_model()` API](https://python.langchain.com/docs/how_to/chat_models_universal_init/) 105 | * See full list of supported integrations [here](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) 106 | 107 | ### Using the package 108 | 109 | ```bash 110 | pip install open-deep-research 111 | ``` 112 | 113 | See [src/open_deep_research/graph.ipynb](src/open_deep_research/graph.ipynb) and [src/open_deep_research/multi_agent.ipynb](src/open_deep_research/multi_agent.ipynb) for example usage in a Jupyter notebook: 114 | 115 | ## Open Deep Research Implementations 116 | 117 | Open Deep Research features two distinct implementation approaches, each with its own strengths: 118 | 119 | ## 1. Graph-based Workflow Implementation (`src/open_deep_research/graph.py`) 120 | 121 | The graph-based implementation follows a structured plan-and-execute workflow: 122 | 123 | - **Planning Phase**: Uses a planner model to analyze the topic and generate a structured report plan 124 | - **Human-in-the-Loop**: Allows for human feedback and approval of the report plan before proceeding 125 | - **Sequential Research Process**: Creates sections one by one with reflection between search iterations 126 | - **Section-Specific Research**: Each section has dedicated search queries and content retrieval 127 | - **Supports Multiple Search Tools**: Works with all search providers (Tavily, Perplexity, Exa, ArXiv, PubMed, Linkup, etc.) 128 | 129 | This implementation provides a more interactive experience with greater control over the report structure, making it ideal for situations where report quality and accuracy are critical. 130 | 131 | You can customize the research assistant workflow through several parameters: 132 | 133 | - `report_structure`: Define a custom structure for your report (defaults to a standard research report format) 134 | - `number_of_queries`: Number of search queries to generate per section (default: 2) 135 | - `max_search_depth`: Maximum number of reflection and search iterations (default: 2) 136 | - `planner_provider`: Model provider for planning phase (default: "anthropic", but can be any provider from supported integrations with `init_chat_model` as listed [here](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html)) 137 | - `planner_model`: Specific model for planning (default: "claude-3-7-sonnet-latest") 138 | - `planner_model_kwargs`: Additional parameter for planner_model 139 | - `writer_provider`: Model provider for writing phase (default: "anthropic", but can be any provider from supported integrations with `init_chat_model` as listed [here](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html)) 140 | - `writer_model`: Model for writing the report (default: "claude-3-5-sonnet-latest") 141 | - `writer_model_kwargs`: Additional parameter for writer_model 142 | - `search_api`: API to use for web searches (default: "tavily", options include "perplexity", "exa", "arxiv", "pubmed", "linkup") 143 | 144 | ## 2. Multi-Agent Implementation (`src/open_deep_research/multi_agent.py`) 145 | 146 | The multi-agent implementation uses a supervisor-researcher architecture: 147 | 148 | - **Supervisor Agent**: Manages the overall research process, plans sections, and assembles the final report 149 | - **Researcher Agents**: Multiple independent agents work in parallel, each responsible for researching and writing a specific section 150 | - **Parallel Processing**: All sections are researched simultaneously, significantly reducing report generation time 151 | - **Specialized Tool Design**: Each agent has access to specific tools for its role (search for researchers, section planning for supervisors) 152 | - **Currently Limited to Tavily Search**: The multi-agent implementation currently only works with Tavily for search, though the framework is designed to support additional search tools in the future 153 | 154 | This implementation focuses on efficiency and parallelization, making it ideal for faster report generation with less direct user involvement. 155 | 156 | ## Search API Configuration 157 | 158 | Not all search APIs support additional configuration parameters. Here are the ones that do: 159 | 160 | - **Exa**: `max_characters`, `num_results`, `include_domains`, `exclude_domains`, `subpages` 161 | - Note: `include_domains` and `exclude_domains` cannot be used together 162 | - Particularly useful when you need to narrow your research to specific trusted sources, ensure information accuracy, or when your research requires using specified domains (e.g., academic journals, government sites) 163 | - Provides AI-generated summaries tailored to your specific query, making it easier to extract relevant information from search results 164 | - **ArXiv**: `load_max_docs`, `get_full_documents`, `load_all_available_meta` 165 | - **PubMed**: `top_k_results`, `email`, `api_key`, `doc_content_chars_max` 166 | - **Linkup**: `depth` 167 | 168 | Example with Exa configuration: 169 | ```python 170 | thread = {"configurable": {"thread_id": str(uuid.uuid4()), 171 | "search_api": "exa", 172 | "search_api_config": { 173 | "num_results": 5, 174 | "include_domains": ["nature.com", "sciencedirect.com"] 175 | }, 176 | # Other configuration... 177 | }} 178 | ``` 179 | 180 | ## Model Considerations 181 | 182 | (1) You can use models supported with [the `init_chat_model()` API](https://python.langchain.com/docs/how_to/chat_models_universal_init/). See full list of supported integrations [here](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html). 183 | 184 | (2) ***The workflow planner and writer models need to support structured outputs***: Check whether structured outputs are supported by the model you are using [here](https://python.langchain.com/docs/integrations/chat/). 185 | 186 | (3) ***The agent models need to support tool calling:*** Ensure tool calling is well supoorted; tests have been done with Claude 3.7, o3, o3-mini, and gpt4.1. See [here](https://smith.langchain.com/public/adc5d60c-97ee-4aa0-8b2c-c776fb0d7bd6/d). 187 | 188 | (4) With Groq, there are token per minute (TPM) limits if you are on the `on_demand` service tier: 189 | - The `on_demand` service tier has a limit of `6000 TPM` 190 | - You will want a [paid plan](https://github.com/cline/cline/issues/47#issuecomment-2640992272) for section writing with Groq models 191 | 192 | (5) `deepseek-R1` [is not strong at function calling](https://api-docs.deepseek.com/guides/reasoning_model), which the assistant uses to generate structured outputs for report sections and report section grading. See example traces [here](https://smith.langchain.com/public/07d53997-4a6d-4ea8-9a1f-064a85cd6072/r). 193 | - Consider providers that are strong at function calling such as OpenAI, Anthropic, and certain OSS models like Groq's `llama-3.3-70b-versatile`. 194 | - If you see the following error, it is likely due to the model not being able to produce structured outputs (see [trace](https://smith.langchain.com/public/8a6da065-3b8b-4a92-8df7-5468da336cbe/r)): 195 | ``` 196 | groq.APIError: Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details. 197 | ``` 198 | 199 | (6) Follow [here[(https://github.com/langchain-ai/open_deep_research/issues/75#issuecomment-2811472408) to use with OpenRouter. 200 | 201 | (7) For working with local models via Ollama, see [here](https://github.com/langchain-ai/open_deep_research/issues/65#issuecomment-2743586318). 202 | 203 | ## Testing Report Quality 204 | 205 | To compare the quality of reports generated by both implementations: 206 | 207 | ```bash 208 | # Test with default Anthropic models 209 | python tests/run_test.py --all 210 | 211 | # Test with OpenAI o3 models 212 | python tests/run_test.py --all \ 213 | --supervisor-model "openai:o3" \ 214 | --researcher-model "openai:o3" \ 215 | --planner-provider "openai" \ 216 | --planner-model "o3" \ 217 | --writer-provider "openai" \ 218 | --writer-model "o3" \ 219 | --eval-model "openai:o3" \ 220 | --search-api "tavily" 221 | ``` 222 | 223 | The test results will be logged to LangSmith, allowing you to compare the quality of reports generated by each implementation with different model configurations. 224 | 225 | ## UX 226 | 227 | ### Local deployment 228 | 229 | Follow the [quickstart](#-quickstart) to start LangGraph server locally. 230 | 231 | ### Hosted deployment 232 | 233 | You can easily deploy to [LangGraph Platform](https://langchain-ai.github.io/langgraph/concepts/#deployment-options). 234 | -------------------------------------------------------------------------------- /examples/arxiv.md: -------------------------------------------------------------------------------- 1 | # Obesity Among Young Adults in the United States: A Growing Public Health Challenge 2 | 3 | The obesity epidemic among young adults in the United States represents a complex public health crisis shaped by interconnected social, economic, and environmental factors. Recent research reveals that over one-third of US adults suffer from obesity, with rates disproportionately affecting disadvantaged communities. This health challenge extends beyond individual choices, as built environment characteristics and socioeconomic conditions explain up to 90% of obesity prevalence variation across American cities. Understanding these systemic influences is crucial for developing effective interventions that address both individual and community-level factors contributing to obesity among young adults. 4 | 5 | ## Obesity Prevalence and Trends in US Young Adults 6 | 7 | **Over one-third of US adults suffer from obesity, with the condition showing strong correlations to socioeconomic and environmental factors that disproportionately affect disadvantaged communities.** National data reveals systematic variations in obesity rates that map closely to neighborhood characteristics and built environment features. 8 | 9 | Advanced analysis using satellite imagery and machine learning has demonstrated that built environment characteristics explain 72-90% of obesity prevalence variation at the census tract level across major US cities. These correlations are particularly pronounced in disadvantaged neighborhoods where multiple social determinants of health intersect. 10 | 11 | Key factors associated with higher adult obesity rates include: 12 | - Lower median household income 13 | - Limited health insurance coverage 14 | - Higher concentration of rental housing 15 | - Reduced access to physical activity resources 16 | - Higher poverty rates 17 | 18 | A comprehensive study in Shelby County, Tennessee exemplifies these patterns, showing significantly higher obesity prevalence in areas with multiple socioeconomic challenges. The findings suggest that addressing structural and environmental factors may be as crucial as individual interventions for reducing obesity rates. 19 | 20 | ### Sources 21 | - Association Between Neighborhood Factors and Adult Obesity in Shelby County, Tennessee (2022): http://arxiv.org/abs/2208.05335v1 22 | - Using Deep Learning to Examine the Association between the Built Environment and Neighborhood Adult Obesity Prevalence (2017): http://arxiv.org/abs/1711.00885v1 23 | - Progress of the anti-obesity of Berberine (2025): http://arxiv.org/abs/2501.02282v1 24 | 25 | ## Socioeconomic Determinants of Obesity in Young Adults 26 | 27 | **Social and economic disparities create stark differences in obesity prevalence among young adults, with disadvantaged neighborhoods showing up to 90% higher rates compared to affluent areas.** Research from Shelby County, Tennessee demonstrates how multiple socioeconomic factors intersect to influence obesity risk through both direct and indirect pathways. 28 | 29 | Key social determinants shaping obesity outcomes include: 30 | * Median household income - Affects access to healthy food options 31 | * Insurance status - Determines preventive care availability 32 | * Housing conditions - Influences exposure to obesity-promoting environments 33 | * Education level - Impacts health literacy and dietary choices 34 | * Geographic location - Correlates with neighborhood resources 35 | 36 | Advanced geospatial analysis reveals that built environment characteristics explain 72-90% of obesity variation across cities. In Shelby County, census tracts with higher percentages of uninsured residents, home renters, and individuals living below the poverty level demonstrated significantly elevated obesity rates. 37 | 38 | These findings emphasize the need for obesity interventions that address systemic inequalities rather than focusing solely on individual behavior modification. Public health initiatives must consider how social determinants create barriers to healthy weight maintenance. 39 | 40 | ### Sources 41 | - Association Between Neighborhood Factors and Adult Obesity in Shelby County, Tennessee: http://arxiv.org/abs/2208.05335v1 42 | - Using Deep Learning to Examine the Association between the Built Environment and Neighborhood Adult Obesity Prevalence: http://arxiv.org/abs/1711.00885v1 43 | 44 | ## Built Environment's Impact on Obesity 45 | 46 | **The physical design of urban spaces significantly influences obesity rates, with walkability and food accessibility emerging as critical factors that can increase obesity risk by up to 42% in underserved areas.** Research demonstrates that neighborhood characteristics create complex ecosystems affecting dietary health and physical activity patterns. 47 | 48 | The built environment shapes obesity risk through three primary mechanisms: food accessibility, physical activity opportunities, and socioeconomic factors. Studies reveal that areas with limited walkability and higher concentrations of fast-food establishments, particularly through online food delivery platforms, create "cyber food swamps" that contribute to unhealthy dietary choices. A 10% increase in accessible fast-food options raises the probability of unhealthy food orders by 22%. 49 | 50 | Key built environment factors affecting obesity include: 51 | * Walking infrastructure and neighborhood walkability 52 | * Distance to healthy food retailers versus fast food 53 | * Availability of recreational facilities 54 | * Transportation access 55 | * Socioeconomic status of the area 56 | 57 | Recent research in tertiary education campuses demonstrates that improving walkability can increase positive walking experiences by 9.75%, suggesting that targeted modifications to the built environment could help reduce obesity rates. 58 | 59 | ### Sources 60 | - Using Tableau and Google Map API for Understanding the Impact of Walkability on Dublin City: http://arxiv.org/abs/2310.07563v1 61 | - Exploring the Causal Relationship between Walkability and Affective Walking Experience: http://arxiv.org/abs/2311.06262v1 62 | - Cyber Food Swamps: Investigating the Impacts of Online-to-Offline Food Delivery Platforms: http://arxiv.org/abs/2409.16601v2 63 | - The association between neighborhood obesogenic factors and prostate cancer risk and mortality: http://arxiv.org/abs/2405.18456v1 64 | 65 | ## Machine Learning Applications in Obesity Analysis 66 | 67 | **Advanced machine learning and deep learning techniques are revolutionizing obesity research by uncovering complex patterns in environmental, behavioral, and socioeconomic factors, with prediction accuracies reaching up to 88% for adolescent obesity risk.** 68 | 69 | Recent studies using deep learning analysis of satellite imagery have demonstrated that built environment features can explain 72-90% of obesity prevalence variation across U.S. cities. This breakthrough enables automated assessment of neighborhood characteristics that influence obesity rates at the census tract level. 70 | 71 | Machine learning models have identified key social determinants of health strongly correlated with adult obesity, including: 72 | * Median household income 73 | * Housing status (rental vs. ownership) 74 | * Insurance coverage 75 | * Race and ethnicity demographics 76 | * Age distribution 77 | * Marital status 78 | 79 | Novel applications include DeepHealthNet, which achieves 88.4% accuracy in adolescent obesity prediction by analyzing physical activity patterns and health metrics. Similarly, recurrent neural networks analyzing longitudinal patient records and wearable device data have achieved 77-86% accuracy in predicting obesity status improvements. 80 | 81 | These insights are particularly valuable for public health decision-making, enabling targeted interventions in disadvantaged neighborhoods where obesity prevalence is significantly higher. 82 | 83 | ### Sources 84 | - Using Deep Learning to Examine the Built Environment and Neighborhood Adult Obesity: http://arxiv.org/abs/1711.00885v1 85 | - DeepHealthNet: Adolescent Obesity Prediction System: http://arxiv.org/abs/2308.14657v2 86 | - Association Between Neighborhood Factors and Adult Obesity in Shelby County, Tennessee: http://arxiv.org/abs/2208.05335v1 87 | - Recurrent Neural Networks based Obesity Status Prediction: http://arxiv.org/abs/1809.07828v1 88 | 89 | ## Current Interventions and Policy Recommendations 90 | 91 | **Current obesity interventions targeting young adults must shift from individual-focused approaches to addressing systemic neighborhood-level factors that drive health disparities.** Research demonstrates that built environment characteristics explain up to 90% of obesity prevalence variation across cities, highlighting the critical role of structural determinants. 92 | 93 | Recent geospatial analyses have identified key social determinants that shape obesity rates in disadvantaged communities, including housing stability, food access, and neighborhood infrastructure. The Shelby County, Tennessee case study reveals significant associations between obesity prevalence and multiple socioeconomic factors, particularly in areas with lower median household incomes and higher percentages of uninsured residents. 94 | 95 | To develop more effective interventions, policymakers should prioritize: 96 | * Implementing zoning policies that promote physical activity 97 | * Improving access to healthy food options in underserved areas 98 | * Addressing housing stability through rental assistance programs 99 | * Expanding health insurance coverage in high-risk communities 100 | * Investing in neighborhood infrastructure improvements 101 | 102 | These evidence-based policy measures represent a crucial shift toward addressing the root causes of obesity through coordinated community-level interventions rather than focusing solely on individual behavior change. 103 | 104 | ### Sources 105 | - Association Between Neighborhood Factors and Adult Obesity in Shelby County, Tennessee: http://arxiv.org/abs/2208.05335v1 106 | - Using Deep Learning to Examine the Built Environment and Neighborhood Adult Obesity Prevalence: http://arxiv.org/abs/1711.00885v1 107 | - Structured psychosocial stress and the US obesity epidemic: http://arxiv.org/abs/q-bio/0312011v1 108 | 109 | # Obesity in Young Adults: A Complex Public Health Challenge 110 | 111 | The rising prevalence of obesity among young adults in the United States represents a critical public health challenge shaped by interconnected social, economic, and environmental factors. Recent research reveals that over one-third of US adults suffer from obesity, with rates disproportionately affecting disadvantaged communities. Advanced analysis demonstrates that neighborhood characteristics and built environment features explain up to 90% of obesity prevalence variation across major cities, highlighting how systemic inequalities create barriers to maintaining healthy weight. 112 | 113 | ## Key Findings and Future Directions 114 | 115 | The evidence demonstrates that obesity in young adults stems from complex interactions between built environment, socioeconomic factors, and healthcare access. Machine learning analyses have revolutionized our understanding of these relationships, achieving prediction accuracies up to 88% for obesity risk. The research points to critical areas requiring immediate intervention: 116 | 117 | * Built Environment Modifications 118 | - Improve neighborhood walkability 119 | - Increase access to recreational facilities 120 | - Address food desert challenges 121 | - Regulate "cyber food swamps" 122 | 123 | * Policy Interventions 124 | - Expand health insurance coverage 125 | - Implement supportive housing policies 126 | - Develop targeted community programs 127 | - Enhance public transportation access 128 | 129 | Success in reducing obesity rates will require coordinated efforts that address these systemic factors rather than focusing solely on individual behavior change. Future initiatives must prioritize evidence-based structural interventions that promote health equity across all communities. -------------------------------------------------------------------------------- /examples/inference-market-gpt45.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | The AI inference market is rapidly expanding, driven by growing demand for real-time data processing and advancements in specialized hardware and cloud-based solutions. This report examines three innovative companies—Fireworks AI, Together.ai, and Groq—that are shaping the competitive landscape. Fireworks AI offers flexible, multimodal inference solutions; Together.ai emphasizes optimized performance for open-source models; and Groq delivers unmatched speed through custom hardware. By analyzing their technologies, market positioning, and performance metrics, this report provides insights into how these key players are influencing the future of AI inference. 4 | 5 | ## Market Overview of AI Inference 6 | 7 | **The global AI inference server market is experiencing rapid growth, projected to expand from USD 38.4 billion in 2023 to USD 166.7 billion by 2031, at a CAGR of 18%.** This growth is driven by increasing demand for real-time data processing, advancements in AI technologies, and widespread adoption of cloud-based and edge computing solutions. 8 | 9 | North America currently dominates the market, accounting for approximately 38% of global revenue, due to its advanced technological infrastructure, significant R&D investments, and presence of major industry players such as NVIDIA, Intel, and Dell. Asia-Pacific is expected to exhibit the highest growth rate, driven by rapid digital transformation initiatives and government support for AI adoption, particularly in China, India, and Japan. 10 | 11 | Key factors influencing market growth include: 12 | 13 | - Rising adoption of AI-driven applications in healthcare, finance, automotive, and retail sectors. 14 | - Increased deployment of specialized hardware (GPUs, TPUs, FPGAs) optimized for AI workloads. 15 | - Growing preference for cloud-based deployment models due to scalability and cost-effectiveness. 16 | 17 | However, high initial implementation costs, complexity of integration, and data privacy concerns remain significant challenges. 18 | 19 | ### Sources 20 | 21 | - AI Inference Server Market Size, Scope, Growth, and Forecast : https://www.verifiedmarketresearch.com/product/ai-inference-server-market/ 22 | - AI Server Market Size & Share, Growth Forecasts Report 2032 : https://www.gminsights.com/industry-analysis/ai-server-market 23 | - AI Inference Server Market Forecast To 2032 : https://www.businessresearchinsights.com/market-reports/ai-inference-server-market-118293 24 | 25 | ## Deep Dive: Fireworks AI 26 | 27 | **Fireworks AI provides a flexible inference platform optimized for deploying and fine-tuning large language models (LLMs), emphasizing ease of use, scalability, and performance customization.** 28 | 29 | The platform supports two primary deployment modes: serverless inference and dedicated deployments. Serverless inference allows quick experimentation with popular pre-deployed models like Llama 3.1 405B, billed per token without guaranteed SLAs. Dedicated deployments offer private, GPU-based infrastructure with performance guarantees, supporting both base models and efficient Low-Rank Adaptation (LoRA) addons. 30 | 31 | Fireworks AI's Document Inlining feature notably extends text-based models into multimodal capabilities, enabling visual reasoning tasks by seamlessly integrating image and PDF content. Performance optimization techniques include quantization, batching, and caching, tailored to specific use cases such as chatbots and coding assistants requiring low latency. 32 | 33 | Competitively, Fireworks AI positions itself against providers like OpenAI and Cohere, with a recent Series B funding round of $52M, total funding of $77M, and estimated annual recurring revenue (ARR) around $6M. 34 | 35 | - Founded: 2022 36 | - Headquarters: Redwood City, CA 37 | - Employees: ~60 38 | - Key Investors: Sequoia Capital, NVIDIA, AMD Ventures 39 | 40 | ### Sources 41 | - Overview - Fireworks AI Docs : https://docs.fireworks.ai/models/overview 42 | - Performance optimization - Fireworks AI Docs : https://docs.fireworks.ai/faq/deployment/performance/optimization 43 | - DeepSeek R1 Just Got Eyes with Fireworks AI Document Inlining : https://fireworks.ai/blog/deepseek-r1-got-eyes 44 | - Fireworks AI 2025 Company Profile: Valuation, Funding & Investors : https://pitchbook.com/profiles/company/561272-14 45 | - Fireworks AI: Contact Details, Revenue, Funding, Employees and Company Profile : https://siliconvalleyjournals.com/company/fireworks-ai/ 46 | - Fireworks AI - Overview, News & Similar companies - ZoomInfo : https://www.zoominfo.com/c/fireworks-ai-inc/5000025791 47 | - Fireworks AI Stock Price, Funding, Valuation, Revenue & Financial : https://www.cbinsights.com/company/fireworks-ai/financials 48 | 49 | ## Deep Dive: Together.ai 50 | 51 | **Together.ai differentiates itself in the AI inference market through its comprehensive cloud platform, optimized for rapid inference, extensive model selection, and flexible GPU infrastructure.** 52 | 53 | Together.ai provides a robust cloud-based solution for training, fine-tuning, and deploying generative AI models, emphasizing high-performance inference capabilities. Its inference engine leverages proprietary technologies such as FlashAttention-3 and speculative decoding, achieving inference speeds up to four times faster than competitors. The platform supports over 100 open-source models, including popular large language models (LLMs) like Llama-2 and RedPajama, enabling developers to quickly experiment and deploy tailored AI solutions. 54 | 55 | Together.ai's flexible GPU clusters, featuring NVIDIA H100 and H200 GPUs interconnected via high-speed Infiniband networks, facilitate scalable distributed training and inference workloads. This infrastructure positions Together.ai competitively against GPU cloud providers like CoreWeave and Lambda Labs, particularly for startups and enterprises requiring variable compute resources. 56 | 57 | Financially, Together.ai has demonstrated rapid growth, reaching an estimated $130M ARR in 2024, driven by increasing demand for generative AI applications and developer-friendly tooling. 58 | 59 | ### Sources 60 | - Together AI: Reviews, Features, Pricing, Guides, and Alternatives : https://aipure.ai/products/together-ai 61 | - Together AI revenue, valuation & growth rate | Sacra : https://sacra.com/c/together-ai/ 62 | - AI Solutions with Together.ai: Inference, Fine-Tuning & Models : https://pwraitools.com/generative-ai-tools/ai-solutions-with-together-ai-inference-fine-tuning-and-models/ 63 | 64 | ## Deep Dive: Groq 65 | 66 | **Groq's vertically integrated Tensor Streaming Processor (TSP) architecture delivers unmatched inference performance and energy efficiency, significantly outperforming traditional GPUs.** 67 | 68 | Groq's TSP chip achieves inference speeds of 500-700 tokens per second on large language models, representing a 5-10x improvement over Nvidia's latest GPUs. Independent benchmarks confirm Groq's LPU (Language Processing Unit) reaches 276 tokens per second on Meta's Llama 3.3 70B model, maintaining consistent performance across varying context lengths without typical latency trade-offs. 69 | 70 | Groq's unique hardware-software co-design eliminates external memory dependencies, embedding memory directly on-chip. This approach reduces data movement, resulting in up to 10x greater energy efficiency compared to GPUs. GroqCloud, the company's cloud inference platform, supports popular open-source models and has attracted over 360,000 developers. 71 | 72 | Financially, Groq has raised $640 million in a Series D round at a $2.8 billion valuation, reflecting strong market confidence. Groq plans to deploy over 108,000 LPUs by early 2025, positioning itself as a leading provider of low-latency AI inference infrastructure. 73 | 74 | ### Sources 75 | - Groq revenue, valuation & funding | Sacra : https://sacra.com/c/groq/ 76 | - Groq Raises $640M To Meet Soaring Demand for Fast AI Inference : https://groq.com/news_press/groq-raises-640m-to-meet-soaring-demand-for-fast-ai-inference/ 77 | - New AI Inference Speed Benchmark for Llama 3.3 70B, Powered by Groq : https://groq.com/new-ai-inference-speed-benchmark-for-llama-3-3-70b-powered-by-groq/ 78 | - Groq Inference Performance, Quality, & Cost Savings : https://groq.com/inference/ 79 | - GroqThoughts PowerPaper 2024 : https://groq.com/wp-content/uploads/2024/07/GroqThoughts_PowerPaper_2024.pdf 80 | 81 | ## Comparative Analysis 82 | 83 | **Fireworks AI, Together.ai, and Groq each offer distinct strengths in AI inference, targeting different market segments and performance needs.** 84 | 85 | Fireworks AI emphasizes speed and scalability through its proprietary FireAttention inference engine, delivering multi-modal capabilities (text, image, audio) with low latency. It prioritizes data privacy, maintaining HIPAA and SOC2 compliance, and offers flexible deployment options including serverless and on-demand models. 86 | 87 | Together.ai differentiates itself by providing optimized inference for over 200 open-source large language models (LLMs). It achieves sub-100ms latency through automated infrastructure optimizations such as token caching, load balancing, and model quantization. Its cost-effective approach makes it attractive for developers requiring extensive model variety and scalability. 88 | 89 | Groq specializes in hardware-accelerated inference, leveraging its custom Tensor Streaming Processor (TSP) chip architecture. GroqCloud provides ultra-low latency inference performance (500-700 tokens/second), significantly outperforming traditional GPUs. Groq targets latency-sensitive enterprise applications, including conversational AI and autonomous systems, with both cloud and on-premises deployment options. 90 | 91 | | Feature | Fireworks AI | Together.ai | Groq | 92 | |---------------------|------------------------------|------------------------------|-------------------------------| 93 | | Technology | Proprietary inference engine | Optimized open-source models | Custom hardware (TSP chips) | 94 | | Market Positioning | Multi-modal, privacy-focused | Cost-effective, scalable | Ultra-low latency enterprise | 95 | | Revenue Estimates | Not publicly available | Not publicly available | $3.4M (2023) | 96 | | Performance Metrics | Low latency, multi-modal | Sub-100ms latency | 500-700 tokens/sec inference | 97 | 98 | ### Sources 99 | - Fireworks AI vs GroqCloud Platform Comparison 2025 | PeerSpot : https://www.peerspot.com/products/comparisons/fireworks-ai_vs_groqcloud-platform 100 | - Fireworks AI vs Together Inference Comparison 2025 | PeerSpot : https://www.peerspot.com/products/comparisons/fireworks-ai_vs_together-inference 101 | - Top 10 AI Inference Platforms in 2025 - DEV Community : https://dev.to/lina_lam_9ee459f98b67e9d5/top-10-ai-inference-platforms-in-2025-56kd 102 | - Groq revenue, valuation & funding | Sacra : https://sacra.com/c/groq/ 103 | 104 | ## Conclusion and Synthesis 105 | 106 | The AI inference market is rapidly expanding, projected to reach $166.7 billion by 2031, driven by demand for real-time processing and specialized hardware. Fireworks AI, Together.ai, and Groq each offer distinct competitive advantages: 107 | 108 | | Feature | Fireworks AI | Together.ai | Groq | 109 | |--------------------|-----------------------------------|----------------------------------|----------------------------------| 110 | | Core Strength | Multi-modal, privacy-focused | Extensive open-source support | Custom hardware, ultra-low latency | 111 | | Technology | Proprietary inference engine | Optimized GPU infrastructure | Tensor Streaming Processor (TSP) | 112 | | Revenue Estimates | ~$6M ARR | ~$130M ARR | ~$3.4M ARR | 113 | | Performance | Low latency, flexible deployment | Sub-100ms latency | 500-700 tokens/sec inference | 114 | 115 | Next steps include monitoring Groq's hardware adoption, evaluating Together.ai's scalability for diverse models, and assessing Fireworks AI's multimodal capabilities for specialized enterprise applications. -------------------------------------------------------------------------------- /examples/inference-market.md: -------------------------------------------------------------------------------- 1 | # The AI Inference Market: Analyzing Emerging Leaders 2 | 3 | The AI inference market is experiencing unprecedented growth, projected to reach $133.2 billion by 2034, as specialized providers challenge traditional semiconductor dominance. While established chip manufacturers control over 80% of the market, new entrants like Fireworks, Together.ai, and Groq are reshaping the competitive landscape through innovative approaches to inference optimization and pricing. 4 | 5 | This analysis examines how these emerging players are disrupting the market through differentiated technologies, aggressive pricing strategies, and superior performance metrics, particularly in the rapidly expanding cloud-based inference segment that now represents 55% of total market share. Their success highlights a fundamental shift in how AI computation is being delivered and monetized. 6 | 7 | ## AI Inference Market Overview 8 | 9 | **The global AI inference market is experiencing unprecedented growth, projected to reach $133.2 billion by 2034, with a transformative shift occurring in market dynamics as new specialized providers challenge traditional semiconductor dominance.** 10 | 11 | While established chip manufacturers (NVIDIA, AMD, Intel) control 80-82% of the market, emerging players are gaining traction through differentiated approaches. The market expansion is particularly evident in cloud-based deployments, which now represent 55% of total market share. 12 | 13 | Key factors driving market evolution include: 14 | * Increasing demand for real-time processing capabilities 15 | * Shift toward token-based pricing models 16 | * Rising adoption of specialized AI hardware 17 | * Growth in open-source model deployment 18 | * Integration of edge computing solutions 19 | 20 | North America maintains market leadership with 38% global share, generating $9.34 billion in revenue (2024). This dominance stems from robust digital infrastructure and concentrated presence of technology companies, particularly in the United States where revenue reaches $8.6 billion. 21 | 22 | The market shows sustained growth potential, supported by ongoing infrastructure investments and technological innovation, particularly in cloud-based deployments where North America maintains clear leadership. 23 | 24 | ### Sources 25 | - AI Inference Server Market Forecast : https://www.einpresswire.com/article/779610673/ai-inference-server-market-supports-new-technology-with-usd-133-2-billion-by-2034-regional-growth-at-usd-9-34-billion 26 | - SemiAnalysis Market Report : https://semianalysis.com/2024/02/21/groq-inference-tokenomics-speed-but/ 27 | - Markets and Markets AI Inference Report : https://www.marketsandmarkets.com/Market-Reports/ai-inference-market-189921964.html 28 | 29 | ## Fireworks.ai Profile 30 | 31 | **Fireworks.ai has emerged as a significant AI inference provider by focusing on performance optimization, reaching a $552M valuation in 2024 with an estimated $44M in annual revenue.** Their platform serves over 25 billion tokens daily to more than 23,000 developers through a tiered pricing structure that scales with usage. 32 | 33 | The company's technical differentiation comes from custom optimizations like FireAttention, which demonstrates superior performance metrics compared to competitors. Benchmark tests show up to 5.6x higher throughput and 12.2x lower latency versus vLLM for Mixtral 8x7B models in fp8 format. 34 | 35 | Their pricing model combines usage-based tiers with flexible deployment options: 36 | * Basic tier: $50/month spending limit 37 | * Growth tier: $500/month spending limit 38 | * Scale tier: $5,000/month spending limit 39 | * Enterprise tier: Custom limits with dedicated support 40 | * On-demand GPU deployments: $2.90-$9.99 per hour 41 | 42 | Notable enterprise customers including DoorDash, Quora, and Upwork validate their approach. Since founding in 2022, Fireworks has secured $77M in funding from investors like Benchmark and Sequoia Capital. 43 | 44 | ### Sources 45 | - Fireworks AI Valued at $552M: https://www.pymnts.com/news/investment-tracker/2024/fireworks-ai-valued-552-million-dollars-after-new-funding-round/ 46 | - FireAttention v3 Performance Metrics: https://fireworks.ai/blog/fireattention-v3 47 | - AWS Case Study: https://aws.amazon.com/solutions/case-studies/fireworks-ai-case-study/ 48 | 49 | ## Together.ai Profile 50 | 51 | **Together.ai has established itself as a major AI inference provider by combining competitive pricing with superior technical performance, reaching a $3.3B valuation in early 2024.** Their platform supports over 200 open-source models and serves both individual developers and enterprise customers through a tiered pricing structure. 52 | 53 | The company's technical advantage stems from their integrated inference stack, which delivers up to 400 tokens per second on Llama models. This performance translates to significant cost savings, with their 70B parameter models priced at $0.88 per million tokens—substantially below market rates. 54 | 55 | Their pricing strategy segments customers into three tiers: 56 | - Build: Pay-as-you-go with $1 free credit for developers 57 | - Scale: Reserved GPU instances for production workloads 58 | - Enterprise: Private deployments with custom optimization 59 | 60 | Notable enterprise adoption includes Salesforce, Zoom, and The Washington Post, validating their platform's capabilities. Together.ai's recent $305M Series B funding demonstrates strong market confidence in their approach to democratizing AI infrastructure. 61 | 62 | ### Sources 63 | - Together.ai Series B Announcement: https://www.together.ai/blog/together-ai-announcing-305m-series-b 64 | - Together.ai Pricing Strategy: https://canvasbusinessmodel.com/blogs/marketing-strategy/together-ai-marketing-strategy 65 | - Salesforce Ventures Investment: https://salesforceventures.com/perspectives/welcome-together-ai/ 66 | 67 | ## Groq Profile 68 | 69 | **Groq's Language Processing Unit (LPU) represents a radical departure from traditional GPU architectures, delivering superior inference performance at significantly lower costs.** Their proprietary tensor-streaming processor achieves 241 tokens per second for Llama 2 Chat (70B), more than double competing solutions, while maintaining exceptional energy efficiency at 1-3 joules per token. 70 | 71 | The company's aggressive pricing strategy undercuts competitors, offering Mixtral 8x7B inference at $0.24 per million tokens compared to Fireworks' $0.50. This pricing advantage stems from lower manufacturing costs ($6,000 per 14nm wafer vs. $16,000 for NVIDIA's 5nm H100) and architectural efficiencies. 72 | 73 | Key competitive advantages: 74 | - Superior inference speed: Up to 18x faster than cloud competitors 75 | - Cost efficiency: $20,000 per LPU vs $25,000+ for NVIDIA H100 76 | - Energy optimization: 80 TB/s bandwidth with 750 TOPS at INT8 77 | 78 | Recently valued at $2.8 billion after raising $640M, Groq has gained significant traction with over 360,000 developers on GroqCloud. While 2023 revenue was modest at $3.4M, planned deployment of 108,000 LPUs by Q1 2025 positions them for substantial growth in the expanding inference market. 79 | 80 | ### Sources 81 | - Groq Report Analysis: https://notice-reports.s3.amazonaws.com/Groq%20Report%202024.12.23_17.58.23.pdf 82 | - SemiAnalysis Pricing Study: https://semianalysis.com/2024/02/21/groq-inference-tokenomics-speed-but/ 83 | - Groq Funding Announcement: https://www.prnewswire.com/news-releases/groq-raises-640m-to-meet-soaring-demand-for-fast-ai-inference-302214097.html 84 | 85 | ## Comparative Performance Analysis 86 | 87 | **Recent benchmarks reveal Groq as the current performance leader in LLM inference, with Together.ai and Fireworks competing for second position across key metrics.** Independent testing from ArtificialAnalysis.ai shows significant variations in core performance indicators: 88 | 89 | | Provider | TTFT (seconds) | Tokens/Second | Cost (per 1M tokens) | 90 | |----------|---------------|---------------|---------------------| 91 | | Groq | 0.22 | 241 | $0.27 | 92 | | Together | 0.50 | 117 | $0.88 | 93 | | Fireworks | 0.40 | 98 | $0.90 | 94 | 95 | Performance advantages can vary significantly based on specific workloads and model sizes. Together.ai's Inference Engine 2.0 demonstrates strong performance with smaller models, while Fireworks maintains consistent performance across their model range. 96 | 97 | A notable limitation emerges with larger inputs - Groq shows a 560% increase in TTFT when processing 10K versus 1K input tokens. This suggests optimal use cases may differ between providers despite headline performance metrics. 98 | 99 | The competitive landscape remains dynamic, with providers regularly releasing optimization updates that can significantly impact these metrics. 100 | 101 | ### Sources 102 | - ArtificialAnalysis.ai LLM Benchmark: https://wandb.ai/capecape/benchmark_llama_70b/reports/Is-the-new-Cerebras-API-the-fastest-LLM-service-provider 103 | - Comparative Analysis of AI API Providers: https://friendli.ai/blog/comparative-analysis-ai-api-provider 104 | - Together Inference Engine Analysis: https://www.together.ai/blog/together-inference-engine-v1 105 | 106 | ## Conclusion and Market Outlook 107 | 108 | The AI inference market is rapidly evolving with specialized providers challenging traditional semiconductor dominance. Our analysis reveals distinct competitive advantages among emerging leaders: 109 | 110 | | Provider | Key Strength | Performance | Pricing | Market Position | 111 | |----------|--------------|-------------|----------|-----------------| 112 | | Groq | Custom LPU Architecture | 241 tokens/sec | $0.24/M tokens | $2.8B valuation, disruptive hardware | 113 | | Together.ai | Model Variety | 117 tokens/sec | $0.88/M tokens | $3.3B valuation, broad adoption | 114 | | Fireworks | Optimization Tech | 98 tokens/sec | $0.90/M tokens | $552M valuation, developer focus | 115 | 116 | Looking ahead, Groq's superior performance metrics and aggressive pricing position them to capture significant market share, particularly in high-throughput applications. Together.ai's extensive model support and enterprise relationships suggest continued growth in the mid-market segment, while Fireworks' optimization technology provides a strong foundation for specialized use cases. As the market expands toward $133.2B by 2034, these providers are well-positioned to challenge NVIDIA's dominance through differentiated approaches to inference delivery. -------------------------------------------------------------------------------- /examples/pubmed.md: -------------------------------------------------------------------------------- 1 | # Diabetic Nephropathy Treatment: Current Approaches and Future Directions 2 | 3 | Diabetic nephropathy has emerged as the leading cause of end-stage renal disease worldwide, affecting approximately 40% of diabetes patients. The condition's progressive nature and complex pathophysiology demand early intervention through comprehensive treatment strategies. Recent advances in therapeutic options, from SGLT2 inhibitors to non-steroidal mineralocorticoid receptor antagonists, have transformed the management landscape. This report examines current treatment protocols, emerging therapies, and diagnostic approaches, with particular emphasis on the growing importance of personalized medicine and integrated care models in improving patient outcomes. 4 | 5 | ## Key Treatment Advances and Future Directions 6 | 7 | Modern diabetic nephropathy management has evolved into a sophisticated, multi-faceted approach that combines established treatments with innovative therapies. The emergence of the four-pillar treatment strategy, incorporating RAS blockers, SGLT2 inhibitors, GLP-1 receptor agonists, and finerenone, represents a significant advancement in care standards. Technological progress in diagnostic tools, particularly multiparametric MRI and novel biomarkers, enables earlier intervention and more precise monitoring of disease progression. 8 | 9 | Key developments driving treatment evolution: 10 | * Integration of multiple therapeutic agents for enhanced outcomes 11 | * Adoption of personalized medicine approaches using proteomics 12 | * Implementation of comprehensive care models showing cost-effective results 13 | * Advanced imaging techniques enabling non-invasive monitoring 14 | * Emergence of novel biomarkers for earlier detection 15 | 16 | The future of diabetic nephropathy treatment lies in closing the evidence-to-practice gap and expanding access to these advanced therapeutic options. 17 | 18 | ## Prevalence and Mechanisms of Diabetic Nephropathy 19 | 20 | **Diabetic nephropathy has become the leading cause of end-stage renal disease worldwide, affecting approximately 40% of diabetes patients and contributing to 38% of renal disease cases in regions like the Philippines.** 21 | 22 | The pathogenesis involves complex interactions between metabolic and hemodynamic factors. Hyperglycemia triggers increased production of advanced glycation end-products (AGEs) and activates inflammatory pathways, while concurrent hypertension amplifies kidney damage through elevated glomerular pressure. The condition typically develops over 10-15 years as these mechanisms progressively damage the kidney's filtering system. 23 | 24 | Key risk factors that accelerate nephropathy progression include: 25 | * Poorly controlled blood glucose (HbA1c >7%) 26 | * Sustained hypertension (>130/80 mmHg) 27 | * Genetic variants in ACE and APOL1 genes 28 | * Obesity and smoking 29 | * Limited access to regular screening 30 | 31 | Recent guidelines from KDIGO emphasize the importance of early detection and holistic care through multidisciplinary teams. The initial presentation typically involves microalbuminuria, which can progress to overt proteinuria and declining glomerular filtration rate without intervention. Research shows that aggressive early treatment can delay or prevent progression, particularly when addressing both glycemic control and blood pressure management. 32 | 33 | ### Sources 34 | - Diabetic Nephropathy: StatPearls : https://pubmed.ncbi.nlm.nih.gov/30480939/ 35 | - Current status of diabetes mellitus care in the Philippines : https://pubmed.ncbi.nlm.nih.gov/38382166/ 36 | - Lifestyle Modifications in Delaying CKD Progression : https://pubmed.ncbi.nlm.nih.gov/36874334/ 37 | 38 | ## Biomarkers for Early Detection of Diabetic Nephropathy 39 | 40 | **The landscape of diabetic nephropathy detection is rapidly evolving beyond traditional microalbuminuria testing, as emerging biomarkers offer more precise and earlier disease identification.** While microalbuminuria remains the clinical standard, its limited predictive power has driven research into more sophisticated detection methods. 41 | 42 | Recent studies have identified several promising biomarker categories that can detect kidney damage before albumin changes become apparent. These include markers of specific nephron damage sites, oxidative stress indicators, and inflammatory signals. A comprehensive 2024 review highlighted five key biomarker categories: 43 | 44 | - Glomerular damage markers 45 | - Tubular damage indicators 46 | - Oxidative stress biomarkers 47 | - Inflammatory biomarkers 48 | - Novel molecular markers (miRNAs, proteomics, metabolomics) 49 | 50 | A significant advancement comes from combining multiple biomarker types. For example, integrating serum creatinine with cystatin C measurements has demonstrated superior accuracy in detecting early kidney dysfunction, particularly when using newer race-free prediction equations. This multi-marker approach reflects the complex pathophysiology of diabetic kidney disease and enables more personalized intervention strategies. 51 | 52 | ### Sources 53 | - Insights into the Novel Biomarkers Expressed in Diabetic Nephropathy (2024): https://pubmed.ncbi.nlm.nih.gov/39415582/ 54 | - Diagnostic challenges of diabetic kidney disease (2023): https://pubmed.ncbi.nlm.nih.gov/37545693/ 55 | - Urinary biomarkers for early diabetic nephropathy (2014): https://pubmed.ncbi.nlm.nih.gov/25060761/ 56 | 57 | ## Treatment Protocols for Diabetic Nephropathy 58 | 59 | **Modern diabetic nephropathy management requires a comprehensive approach combining established treatments with emerging therapeutic options to effectively slow disease progression and protect kidney function.** The foundation remains strict glycemic control (HbA1c <7%) and blood pressure management (<130/80 mmHg in patients with albuminuria). 60 | 61 | Renin-angiotensin system (RAS) blockers, particularly ACE inhibitors and ARBs, continue as first-line treatments for their dual action on blood pressure and nephroprotection. Recent evidence supports combination therapy with newer agents for enhanced outcomes. 62 | 63 | Key therapeutic advances include: 64 | * SGLT2 inhibitors (dapagliflozin, empagliflozin) - reduce disease progression by promoting urinary potassium excretion and normalizing plasma potassium levels 65 | * Non-steroidal mineralocorticoid receptor antagonists (finerenone) - decrease albuminuria and cardiovascular complications 66 | * Lifestyle modifications - Mediterranean diet adherence and regular exercise show significant benefits 67 | * Antioxidant interventions - target oxidative stress mechanisms 68 | 69 | The SONAR trial demonstrated that atrasentan, an endothelin receptor antagonist, significantly decreased renal events in diabetic kidney disease patients. Regular monitoring of kidney function, albuminuria, and electrolyte levels remains essential for optimizing treatment outcomes. 70 | 71 | ### Sources 72 | - What Not to Overlook in the Management of Patients with Type 2 Diabetes Mellitus: https://pubmed.ncbi.nlm.nih.gov/39062970/ 73 | - Lifestyle Modifications and Nutritional and Therapeutic Interventions: https://pubmed.ncbi.nlm.nih.gov/36874334/ 74 | - Diabetic Kidney Disease: https://pubmed.ncbi.nlm.nih.gov/25905328/ 75 | - Impaired distal renal potassium handling in diabetic mice: https://pubmed.ncbi.nlm.nih.gov/38779755/ 76 | 77 | ## Recent Advances in Diabetic Nephropathy Treatment 78 | 79 | **The emergence of a four-pillar treatment approach represents a paradigm shift in diabetic nephropathy management, moving beyond the traditional reliance on RAS blockade alone to include multiple complementary therapeutic agents.** This comprehensive strategy has demonstrated superior cardiorenal protection compared to single-agent approaches. 80 | 81 | The four essential pillars of modern treatment include: 82 | 83 | * RAS blockers (ACE inhibitors/ARBs) as foundational therapy 84 | * SGLT2 inhibitors for reducing kidney disease progression 85 | * GLP-1 receptor agonists for glycemic control and renoprotection 86 | * Finerenone, a non-steroidal mineralocorticoid receptor antagonist, for additional protection 87 | 88 | Recent clinical trials suggest that combining these therapies may provide additive benefits, though ongoing studies are still evaluating optimal combinations. The PRIORITY study exemplifies the movement toward personalized medicine, using urinary proteomics to predict treatment response and guide therapy selection. 89 | 90 | Implementation challenges persist, with many eligible patients not receiving recommended combinations. Healthcare systems are addressing this through specialized clinics and electronic health record-based decision support tools to narrow the evidence-to-practice gap. 91 | 92 | ### Sources 93 | - Finerenone: Do We Really Need an Additional Therapy in Type 2 Diabetes Mellitus and Kidney Disease?: https://pubmed.ncbi.nlm.nih.gov/39862018/ 94 | - Slowing the Progression of Chronic Kidney Disease in Patients with Type 2 Diabetes Using Four Pillars of Therapy: https://pubmed.ncbi.nlm.nih.gov/39259460/ 95 | - Updated evidence on cardiovascular and renal effects of GLP-1 receptor agonists: https://pubmed.ncbi.nlm.nih.gov/39548500/ 96 | 97 | ## Noninvasive MRI Techniques for Diabetic Nephropathy Assessment 98 | 99 | **Multiparametric MRI represents a breakthrough in noninvasive renal assessment, enabling detailed evaluation of kidney structure and function without radiation or contrast agents.** This technology combines multiple specialized imaging sequences to provide comprehensive insights into kidney health. 100 | 101 | The diffusion-weighted imaging (DWI) sequence measures water molecule movement, offering early detection of interstitial fibrosis and predictive value for renal function deterioration in diabetic nephropathy. Blood oxygen level-dependent (BOLD) MRI assesses tissue oxygenation by detecting deoxyhemoglobin levels, proving particularly valuable for monitoring chronic kidney disease progression. 102 | 103 | Key MRI sequences and their clinical applications: 104 | - T1/T2 Relaxometry: Evaluates tissue water content and fibrosis; corticomedullary changes correlate with filtration rate 105 | - DWI: Measures microstructural changes and fibrosis development 106 | - BOLD: Monitors tissue oxygenation and predicts functional decline 107 | - Arterial Spin Labeling: Assesses renal hemodynamics without contrast 108 | 109 | While these techniques show promise for early disease detection and monitoring, further clinical trials are needed before widespread implementation. The technology's potential for personalized treatment decisions and virtual biopsy capabilities represents a significant advance in diabetic nephropathy management. 110 | 111 | ### Sources 112 | - Multiparametric MRI: can we assess renal function differently? (2024): https://pubmed.ncbi.nlm.nih.gov/40008350/ 113 | - Noninvasive Assessment of Diabetic Kidney Disease With MRI: Hype or Hope? (2023): https://pubmed.ncbi.nlm.nih.gov/37675919/ 114 | 115 | ## Integrated Care and Systemic Challenges in Diabetic Nephropathy Management 116 | 117 | **Quality improvement collaboratives in integrated diabetes care settings can significantly improve patient outcomes while remaining cost-effective, with studies showing increased life expectancy of nearly one year for male patients and 0.76 years for female patients.** The success of such integrated approaches demonstrates the critical importance of coordinated care between specialists in managing diabetic nephropathy. 118 | 119 | However, implementing effective integrated care faces several systemic barriers that must be addressed: 120 | 121 | * Limited specialist availability in rural regions 122 | * Poor communication between healthcare providers 123 | * Insurance coverage restrictions 124 | * Lack of standardized protocols 125 | * Delayed specialist referrals 126 | 127 | A notable example comes from a Netherlands study of integrated diabetes care across 37 general practices and 13 outpatient clinics. Their collaborative care model reduced cardiovascular event risk (hazard ratio: 0.83 for men, 0.98 for women) and cardiovascular mortality (hazard ratio: 0.78 for men, 0.88 for women). The program cost approximately €22 per patient initially, with lifetime costs increasing by €860 for men and €645 for women – proving highly cost-effective at under €2,000 per quality-adjusted life year. 128 | 129 | ### Sources 130 | - Cost-effectiveness of a quality improvement collaborative focusing on patients with diabetes: https://pubmed.ncbi.nlm.nih.gov/20808258/ 131 | 132 | # Diabetic Nephropathy Treatment: Current Approaches and Future Directions 133 | 134 | Diabetic nephropathy has emerged as the leading cause of end-stage renal disease globally, affecting 40% of diabetes patients and demanding increasingly sophisticated treatment approaches. The evolution of treatment strategies from single-agent protocols to comprehensive four-pillar approaches, combined with advances in early detection and monitoring, has transformed the management landscape. This report examines current best practices, emerging therapies, and the critical role of integrated care in improving patient outcomes. 135 | 136 | ## Key Findings and Treatment Framework 137 | 138 | Modern diabetic nephropathy management has evolved into a multi-faceted approach requiring careful coordination of therapeutic strategies. The evidence supports a structured treatment framework that combines established protocols with emerging innovations. 139 | 140 | * Foundation Treatments 141 | - Glycemic control (HbA1c <7%) 142 | - Blood pressure management (<130/80 mmHg) 143 | - RAS blockers (ACE inhibitors/ARBs) 144 | - Lifestyle modifications 145 | 146 | * Emerging Therapeutic Advances 147 | - SGLT2 inhibitors for disease progression 148 | - Non-steroidal mineralocorticoid receptor antagonists 149 | - GLP-1 receptor agonists 150 | - Multiparametric MRI for monitoring 151 | 152 | The path forward requires addressing implementation challenges through integrated care models while leveraging new diagnostic tools and biomarkers for earlier intervention. Success depends on bridging the evidence-to-practice gap through specialized clinics and improved coordination among healthcare providers. -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "dockerfile_lines": [], 3 | "graphs": { 4 | "open_deep_research": "./src/open_deep_research/graph.py:graph", 5 | "open_deep_research_multi_agent": "./src/open_deep_research/multi_agent.py:graph" 6 | }, 7 | "python_version": "3.11", 8 | "env": "./.env", 9 | "dependencies": [ 10 | "." 11 | ] 12 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "open_deep_research" 3 | version = "0.0.15" 4 | description = "Planning, research, and report generation." 5 | authors = [ 6 | { name = "Lance Martin" } 7 | ] 8 | readme = "README.md" 9 | license = { text = "MIT" } 10 | requires-python = ">=3.10" 11 | dependencies = [ 12 | "langgraph>=0.2.55", 13 | "langchain-community>=0.3.9", 14 | "langchain-openai>=0.3.7", 15 | "langchain-anthropic>=0.3.9", 16 | "openai>=1.61.0", 17 | "tavily-python>=0.5.0", 18 | "langchain-groq>=0.2.4", 19 | "arxiv>=2.1.3", 20 | "pymupdf>=1.25.3", 21 | "xmltodict>=0.14.2", 22 | "linkup-sdk>=0.2.3", 23 | "duckduckgo-search>=3.0.0", 24 | "exa-py>=1.8.8", 25 | "requests>=2.32.3", 26 | "beautifulsoup4==4.13.3", 27 | "langchain-deepseek>=0.1.2", 28 | "python-dotenv>=1.0.1", 29 | "langgraph_supervisor", 30 | "langchain_tavily", 31 | "pytest", 32 | "httpx>=0.24.0", 33 | "markdownify>=0.11.6", 34 | "azure-identity>=1.21.0", 35 | "azure-search>=1.0.0b2", 36 | "azure-search-documents>=11.5.2" 37 | ] 38 | 39 | [project.optional-dependencies] 40 | dev = ["mypy>=1.11.1", "ruff>=0.6.1"] 41 | 42 | [build-system] 43 | requires = ["setuptools>=73.0.0", "wheel"] 44 | build-backend = "setuptools.build_meta" 45 | 46 | [tool.setuptools] 47 | packages = ["open_deep_research"] 48 | 49 | [tool.setuptools.package-dir] 50 | "open_deep_research" = "src/open_deep_research" 51 | 52 | [tool.setuptools.package-data] 53 | "*" = ["py.typed"] 54 | 55 | [tool.ruff] 56 | lint.select = [ 57 | "E", # pycodestyle 58 | "F", # pyflakes 59 | "I", # isort 60 | "D", # pydocstyle 61 | "D401", # First line should be in imperative mood 62 | "T201", 63 | "UP", 64 | ] 65 | lint.ignore = [ 66 | "UP006", 67 | "UP007", 68 | "UP035", 69 | "D417", 70 | "E501", 71 | ] 72 | 73 | [tool.ruff.lint.per-file-ignores] 74 | "tests/*" = ["D", "UP"] 75 | 76 | [tool.ruff.lint.pydocstyle] 77 | convention = "google" 78 | -------------------------------------------------------------------------------- /src/open_deep_research/__init__.py: -------------------------------------------------------------------------------- 1 | """Planning, research, and report generation.""" 2 | 3 | __version__ = "0.0.15" -------------------------------------------------------------------------------- /src/open_deep_research/configuration.py: -------------------------------------------------------------------------------- 1 | import os 2 | from enum import Enum 3 | from dataclasses import dataclass, fields 4 | from typing import Any, Optional, Dict 5 | 6 | from langchain_core.language_models.chat_models import BaseChatModel 7 | from langchain_core.runnables import RunnableConfig 8 | from dataclasses import dataclass 9 | 10 | DEFAULT_REPORT_STRUCTURE = """Use this structure to create a report on the user-provided topic: 11 | 12 | 1. Introduction (no research needed) 13 | - Brief overview of the topic area 14 | 15 | 2. Main Body Sections: 16 | - Each section should focus on a sub-topic of the user-provided topic 17 | 18 | 3. Conclusion 19 | - Aim for 1 structural element (either a list or table) that distills the main body sections 20 | - Provide a concise summary of the report""" 21 | 22 | class SearchAPI(Enum): 23 | PERPLEXITY = "perplexity" 24 | TAVILY = "tavily" 25 | EXA = "exa" 26 | ARXIV = "arxiv" 27 | PUBMED = "pubmed" 28 | LINKUP = "linkup" 29 | DUCKDUCKGO = "duckduckgo" 30 | GOOGLESEARCH = "googlesearch" 31 | 32 | @dataclass(kw_only=True) 33 | class Configuration: 34 | """The configurable fields for the chatbot.""" 35 | # Common configuration 36 | report_structure: str = DEFAULT_REPORT_STRUCTURE # Defaults to the default report structure 37 | search_api: SearchAPI = SearchAPI.TAVILY # Default to TAVILY 38 | search_api_config: Optional[Dict[str, Any]] = None 39 | 40 | # Graph-specific configuration 41 | number_of_queries: int = 2 # Number of search queries to generate per iteration 42 | max_search_depth: int = 2 # Maximum number of reflection + search iterations 43 | planner_provider: str = "anthropic" # Defaults to Anthropic as provider 44 | planner_model: str = "claude-3-7-sonnet-latest" # Defaults to claude-3-7-sonnet-latest 45 | planner_model_kwargs: Optional[Dict[str, Any]] = None # kwargs for planner_model 46 | writer_provider: str = "anthropic" # Defaults to Anthropic as provider 47 | writer_model: str = "claude-3-5-sonnet-latest" # Defaults to claude-3-5-sonnet-latest 48 | writer_model_kwargs: Optional[Dict[str, Any]] = None # kwargs for writer_model 49 | 50 | # Multi-agent specific configuration 51 | supervisor_model: str = "openai:gpt-4.1" # Model for supervisor agent in multi-agent setup 52 | researcher_model: str = "openai:gpt-4.1" # Model for research agents in multi-agent setup 53 | 54 | @classmethod 55 | def from_runnable_config( 56 | cls, config: Optional[RunnableConfig] = None 57 | ) -> "Configuration": 58 | """Create a Configuration instance from a RunnableConfig.""" 59 | configurable = ( 60 | config["configurable"] if config and "configurable" in config else {} 61 | ) 62 | values: dict[str, Any] = { 63 | f.name: os.environ.get(f.name.upper(), configurable.get(f.name)) 64 | for f in fields(cls) 65 | if f.init 66 | } 67 | return cls(**{k: v for k, v in values.items() if v}) 68 | -------------------------------------------------------------------------------- /src/open_deep_research/graph.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Research Workflow\n", 8 | "\n", 9 | "This notebook demonstrates the research [workflow](https://langchain-ai.github.io/langgraph/tutorials/workflows/) that creates comprehensive reports through a series of focused steps. The system:\n", 10 | "\n", 11 | "1. Uses a **graph workflow** with specialized nodes for each report creation stage\n", 12 | "2. Enables user **feedback and approval** at critical planning points \n", 13 | "3. Produces a well-structured report with introduction, researched body sections, and conclusion\n", 14 | "\n", 15 | "## From repo " 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 1, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "/Users/rlm/Desktop/Code/open_deep_research/src\n" 28 | ] 29 | }, 30 | { 31 | "name": "stderr", 32 | "output_type": "stream", 33 | "text": [ 34 | "/Users/rlm/Desktop/Code/open_deep_research/open-deep-research-env/lib/python3.11/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.\n", 35 | " self.shell.db['dhist'] = compress_dhist(dhist)[-100:]\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "%cd ..\n", 41 | "%load_ext autoreload\n", 42 | "%autoreload 2" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## From package " 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 15, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "\n", 62 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.0.1\u001b[0m\n", 63 | "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "! pip install -U -q open-deep-research" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "# Compile the Graph-Based Research Workflow\n", 76 | "\n", 77 | "The next step is to compile the LangGraph workflow that orchestrates the report creation process. This defines the sequence of operations and decision points in the research pipeline." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# Import required modules and initialize the builder from open_deep_research\n", 87 | "import uuid \n", 88 | "import os, getpass\n", 89 | "import open_deep_research \n", 90 | "print(open_deep_research.__version__) \n", 91 | "from IPython.display import Image, display, Markdown\n", 92 | "from langgraph.types import Command\n", 93 | "from langgraph.checkpoint.memory import MemorySaver\n", 94 | "from open_deep_research.graph import builder" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Create a memory-based checkpointer and compile the graph\n", 104 | "# This enables state persistence and tracking throughout the workflow execution\n", 105 | "\n", 106 | "memory = MemorySaver()\n", 107 | "graph = builder.compile(checkpointer=memory)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# Visualize the graph structure\n", 117 | "# This shows the nodes and edges in the research workflow\n", 118 | "\n", 119 | "display(Image(graph.get_graph(xray=1).draw_mermaid_png()))" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Helper function to set environment variables for API keys\n", 129 | "# This ensures all necessary credentials are available for various services\n", 130 | "\n", 131 | "def _set_env(var: str):\n", 132 | " if not os.environ.get(var):\n", 133 | " os.environ[var] = getpass.getpass(f\"{var}: \")\n", 134 | "\n", 135 | "# Set the API keys used for any model or search tool selections below, such as:\n", 136 | "_set_env(\"OPENAI_API_KEY\")\n", 137 | "_set_env(\"ANTHROPIC_API_KEY\")\n", 138 | "_set_env(\"TAVILY_API_KEY\")\n", 139 | "_set_env(\"GROQ_API_KEY\")\n", 140 | "_set_env(\"PERPLEXITY_API_KEY\")" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "# Define report structure template and configure the research workflow\n", 150 | "# This sets parameters for models, search tools, and report organization\n", 151 | "\n", 152 | "REPORT_STRUCTURE = \"\"\"Use this structure to create a report on the user-provided topic:\n", 153 | "\n", 154 | "1. Introduction (no research needed)\n", 155 | " - Brief overview of the topic area\n", 156 | "\n", 157 | "2. Main Body Sections:\n", 158 | " - Each section should focus on a sub-topic of the user-provided topic\n", 159 | " \n", 160 | "3. Conclusion\n", 161 | " - Aim for 1 structural element (either a list of table) that distills the main body sections \n", 162 | " - Provide a concise summary of the report\"\"\"\n", 163 | "\n", 164 | "# Configuration option 1: Claude 3.7 Sonnet for planning with perplexity search\n", 165 | "thread = {\"configurable\": {\"thread_id\": str(uuid.uuid4()),\n", 166 | " \"search_api\": \"perplexity\",\n", 167 | " \"planner_provider\": \"anthropic\",\n", 168 | " \"planner_model\": \"claude-3-7-sonnet-latest\",\n", 169 | " # \"planner_model_kwargs\": {\"temperature\":0.8}, # if set custom parameters\n", 170 | " \"writer_provider\": \"anthropic\",\n", 171 | " \"writer_model\": \"claude-3-5-sonnet-latest\",\n", 172 | " # \"writer_model_kwargs\": {\"temperature\":0.8}, # if set custom parameters\n", 173 | " \"max_search_depth\": 2,\n", 174 | " \"report_structure\": REPORT_STRUCTURE,\n", 175 | " }}\n", 176 | "\n", 177 | "# Configuration option 2: DeepSeek-R1-Distill-Llama-70B for planning and llama-3.3-70b-versatile for writing\n", 178 | "thread = {\"configurable\": {\"thread_id\": str(uuid.uuid4()),\n", 179 | " \"search_api\": \"tavily\",\n", 180 | " \"planner_provider\": \"groq\",\n", 181 | " \"planner_model\": \"deepseek-r1-distill-llama-70b\",\n", 182 | " \"writer_provider\": \"groq\",\n", 183 | " \"writer_model\": \"llama-3.3-70b-versatile\",\n", 184 | " \"report_structure\": REPORT_STRUCTURE,\n", 185 | " \"max_search_depth\": 1,}\n", 186 | " }\n", 187 | "\n", 188 | "# Configuration option 3: Use OpenAI o3 for both planning and writing (selected option)\n", 189 | "thread = {\"configurable\": {\"thread_id\": str(uuid.uuid4()),\n", 190 | " \"search_api\": \"tavily\",\n", 191 | " \"planner_provider\": \"openai\",\n", 192 | " \"planner_model\": \"o3\",\n", 193 | " \"writer_provider\": \"openai\",\n", 194 | " \"writer_model\": \"o3\",\n", 195 | " \"max_search_depth\": 2,\n", 196 | " \"report_structure\": REPORT_STRUCTURE,\n", 197 | " }}\n", 198 | "\n", 199 | "# Define research topic about Model Context Protocol\n", 200 | "topic = \"Overview of Model Context Protocol (MCP), an Anthropic‑backed open standard for integrating external context and tools with LLMs. Give an architectural overview for developers, tell me about interesting MCP servers, and compare to google Agent2Agent (A2A) protocol.\"\n", 201 | "\n", 202 | "# Run the graph workflow until first interruption (waiting for user feedback)\n", 203 | "async for event in graph.astream({\"topic\":topic,}, thread, stream_mode=\"updates\"):\n", 204 | " if '__interrupt__' in event:\n", 205 | " interrupt_value = event['__interrupt__'][0].value\n", 206 | " display(Markdown(interrupt_value))" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "# User Feedback Phase\n", 214 | "\n", 215 | "* This allows for providing directed feedback on the initial report plan\n", 216 | "* The user can review the proposed report structure and provide specific guidance\n", 217 | "* The system will incorporate this feedback into the final report plan" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# Submit feedback on the report plan\n", 227 | "# The system will continue execution with the updated requirements\n", 228 | "\n", 229 | "# Provide specific feedback to focus and refine the report structure\n", 230 | "async for event in graph.astream(Command(resume=\"Looks great! Just do one section related to Agent2Agent (A2A) protocol, introducing it and comparing to MCP.\"), thread, stream_mode=\"updates\"):\n", 231 | " if '__interrupt__' in event:\n", 232 | " interrupt_value = event['__interrupt__'][0].value\n", 233 | " display(Markdown(interrupt_value))" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "# Final Approval Phase\n", 241 | "* After incorporating feedback, approve the plan to start content generation" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "# Approve the final plan and execute the report generation\n", 251 | "# This triggers the research and writing phases for all sections\n", 252 | "\n", 253 | "# The system will now:\n", 254 | "# 1. Research each section topic\n", 255 | "# 2. Generate content with citations\n", 256 | "# 3. Create introduction and conclusion\n", 257 | "# 4. Compile the final report\n", 258 | "\n", 259 | "async for event in graph.astream(Command(resume=True), thread, stream_mode=\"updates\"):\n", 260 | " print(event)\n", 261 | " print(\"\\n\")" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 9, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/markdown": [ 272 | "# Introduction \n", 273 | "Large language models excel at reasoning, but without structured access to the outside world they remain isolated. The Model Context Protocol (MCP) bridges this gap, defining an open, vendor‑neutral way for models to tap files, databases, APIs, and other tools through simple JSON‑RPC exchanges. This report walks developers through the protocol’s architecture, surveys real‑world MCP servers that showcase its flexibility, and contrasts MCP with Google’s emerging Agent‑to‑Agent (A2A) standard. By the end, you should know when, why, and how to weave MCP into your own agentic systems.\n", 274 | "\n", 275 | "## MCP Architectural Overview for Developers\n", 276 | "\n", 277 | "MCP uses a client‑host‑server model: a host process spawns isolated clients, and every client keeps a 1‑to‑1, stateful session with a single server that exposes prompts, resources, and tools through JSON‑RPC 2.0 messages [1][5]. \n", 278 | "\n", 279 | "A session passes through three phases — initialize, operation, shutdown. The client begins with an initialize request that lists its protocolVersion and capabilities; the server replies with a compatible version and its own capabilities. After the client’s initialized notification, both sides may exchange requests, responses, or one‑way notifications under the agreed capabilities [2]. \n", 280 | "\n", 281 | "Two official transports exist. Stdio is ideal for local child processes, while HTTP (SSE/“streamable HTTP”) supports multi‑client, remote scenarios. Both must preserve JSON‑RPC framing, and servers should validate Origin headers, bind to localhost where possible, and apply TLS or authentication to block DNS‑rebind or similar attacks [1][3]. \n", 282 | "\n", 283 | "To integrate MCP, developers can: \n", 284 | "1) implement a server that registers needed primitives and advertises them in initialize.result.capabilities; \n", 285 | "2) validate all inputs and set reasonable timeouts; \n", 286 | "3) or consume existing servers via SDKs—select a transport, send initialize, then invoke or subscribe to tools/resources exactly as negotiated [4][5]. \n", 287 | "\n", 288 | "### Sources \n", 289 | "[1] MCP Protocol Specification: https://www.claudemcp.com/specification \n", 290 | "[2] Lifecycle – Model Context Protocol: https://modelcontextprotocol.info/specification/draft/basic/lifecycle/ \n", 291 | "[3] Transports – Model Context Protocol: https://modelcontextprotocol.io/specification/2025-03-26/basic/transports \n", 292 | "[4] Core Architecture – Model Context Protocol: https://modelcontextprotocol.io/docs/concepts/architecture \n", 293 | "[5] Architecture – Model Context Protocol Specification: https://spec.modelcontextprotocol.io/specification/2025-03-26/architecture/\n", 294 | "\n", 295 | "## Ecosystem Spotlight: Notable MCP Servers\n", 296 | "\n", 297 | "Hundreds of MCP servers now exist, spanning core data access, commercial platforms, and hobby projects—proof that the protocol can wrap almost any tool or API [1][2].\n", 298 | "\n", 299 | "Reference servers maintained by Anthropic demonstrate the basics. Filesystem, PostgreSQL, Git, and Slack servers cover file I/O, SQL queries, repository ops, and chat workflows. Developers can launch them in seconds with commands like \n", 300 | "`npx -y @modelcontextprotocol/server-filesystem` (TypeScript) or `uvx mcp-server-git` (Python) and then point any MCP‑aware client, such as Claude Desktop, at the spawned process [1].\n", 301 | "\n", 302 | "Platform vendors are adding “first‑party” connectors. Microsoft cites the GitHub MCP Server and a Playwright browser‑automation server as popular examples that let C# or .NET apps drive code reviews or end‑to‑end tests through a uniform interface [3]. Other partner servers—e.g., Cloudflare for edge resources or Stripe for payments—expose full product APIs while still enforcing user approval through MCP’s tool‑calling flow [2].\n", 303 | "\n", 304 | "Community builders rapidly fill remaining gaps. Docker and Kubernetes servers give agents controlled shell access; Snowflake, Neon, and Qdrant handle cloud databases; Todoist and Obsidian servers tackle personal productivity. Because every server follows the same JSON‑RPC schema and ships as a small CLI, developers can fork an existing TypeScript or Python implementation and swap in their own SDK calls to create new connectors in hours, not weeks [2]. \n", 305 | "\n", 306 | "### Sources \n", 307 | "[1] Example Servers – Model Context Protocol: https://modelcontextprotocol.io/examples \n", 308 | "[2] Model Context Protocol Servers Repository: https://github.com/madhukarkumar/anthropic-mcp-servers \n", 309 | "[3] Microsoft partners with Anthropic to create official C# SDK for Model Context Protocol: https://devblogs.microsoft.com/blog/microsoft-partners-with-anthropic-to-create-official-c-sdk-for-model-context-protocol\n", 310 | "\n", 311 | "## Agent‑to‑Agent (A2A) Protocol and Comparison with MCP \n", 312 | "\n", 313 | "Google’s Agent‑to‑Agent (A2A) protocol, announced in April 2025, gives autonomous agents a common way to talk directly across vendors and clouds [2]. Its goal is to let one “client” agent delegate work to a “remote” agent without sharing internal code or memory, enabling true multi‑agent systems. \n", 314 | "\n", 315 | "Discovery starts with a JSON Agent Card served at /.well‑known/agent.json, which lists version, skills and endpoints [3]. After discovery, the client opens a Task—an atomic unit that moves through states and exchanges Messages and multimodal Artifacts. HTTP request/response, Server‑Sent Events, or push notifications are chosen based on task length to stream progress safely [2]. \n", 316 | "\n", 317 | "Anthropic’s Model Context Protocol (MCP) tackles a different layer: it links a single language model to external tools and data through a Host‑Client‑Server triad, exposing Resources, Tools and Prompts over JSON‑RPC [1]. Communication is model‑to‑tool, not agent‑to‑agent. \n", 318 | "\n", 319 | "Google therefore calls A2A “complementary” to MCP: use MCP to give each agent the data and actions it needs; use A2A to let those empowered agents discover one another, coordinate plans and exchange results [1]. In practice, developers might pipe an A2A task that, mid‑flow, invokes an MCP tool or serve an MCP connector as an A2A remote agent, showing the standards can interlock instead of compete. \n", 320 | "\n", 321 | "### Sources \n", 322 | "[1] MCP vs A2A: Comprehensive Comparison of AI Agent Protocols: https://www.toolworthy.ai/blog/mcp-vs-a2a-protocol-comparison \n", 323 | "[2] Google A2A vs MCP: The New Protocol Standard Developers Need to Know: https://www.trickle.so/blog/google-a2a-vs-mcp \n", 324 | "[3] A2A vs MCP: Comparing AI Standards for Agent Interoperability: https://www.ikangai.com/a2a-vs-mcp-ai-standards/\n", 325 | "\n", 326 | "## Conclusion\n", 327 | "\n", 328 | "Model Context Protocol (MCP) secures a model’s immediate tool belt, while Google’s Agent‑to‑Agent (A2A) protocol enables those empowered agents to find and hire one another. Their scopes differ but interlock, giving developers a layered recipe for robust, multi‑agent applications.\n", 329 | "\n", 330 | "| Aspect | MCP | A2A |\n", 331 | "| --- | --- | --- |\n", 332 | "| Layer | Model‑to‑tool RPC | Agent‑to‑agent orchestration |\n", 333 | "| Session start | `initialize` handshake | Task creation lifecycle |\n", 334 | "| Discovery | Client‑supplied server URI | `/.well‑known/agent.json` card |\n", 335 | "| Streaming | Stdio or HTTP/SSE | HTTP, SSE, or push |\n", 336 | "| Best fit | Embed filesystems, DBs, SaaS APIs into one agent | Delegate subtasks across clouds or vendors |\n", 337 | "\n", 338 | "Next steps: prototype an A2A task that internally calls an MCP PostgreSQL server; harden both layers with TLS and capability scoping; finally, contribute a new open‑source MCP connector to accelerate community adoption." 339 | ], 340 | "text/plain": [ 341 | "" 342 | ] 343 | }, 344 | "execution_count": 9, 345 | "metadata": {}, 346 | "output_type": "execute_result" 347 | } 348 | ], 349 | "source": [ 350 | "# Display the final generated report\n", 351 | "# Retrieve the completed report from the graph's state and format it for display\n", 352 | "\n", 353 | "final_state = graph.get_state(thread)\n", 354 | "report = final_state.values.get('final_report')\n", 355 | "Markdown(report)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "Trace: \n", 363 | "\n", 364 | "> Note: uses 80k tokens \n", 365 | "\n", 366 | "https://smith.langchain.com/public/31eca7c9-beae-42a3-bef4-5bce9488d7be/r" 367 | ] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "open-deep-research-env", 373 | "language": "python", 374 | "name": "python3" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 3 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython3", 386 | "version": "3.11.6" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 2 391 | } 392 | -------------------------------------------------------------------------------- /src/open_deep_research/graph.py: -------------------------------------------------------------------------------- 1 | from typing import Literal 2 | 3 | from langchain.chat_models import init_chat_model 4 | from langchain_core.messages import HumanMessage, SystemMessage 5 | from langchain_core.runnables import RunnableConfig 6 | 7 | from langgraph.constants import Send 8 | from langgraph.graph import START, END, StateGraph 9 | from langgraph.types import interrupt, Command 10 | 11 | from open_deep_research.state import ( 12 | ReportStateInput, 13 | ReportStateOutput, 14 | Sections, 15 | ReportState, 16 | SectionState, 17 | SectionOutputState, 18 | Queries, 19 | Feedback 20 | ) 21 | 22 | from open_deep_research.prompts import ( 23 | report_planner_query_writer_instructions, 24 | report_planner_instructions, 25 | query_writer_instructions, 26 | section_writer_instructions, 27 | final_section_writer_instructions, 28 | section_grader_instructions, 29 | section_writer_inputs 30 | ) 31 | 32 | from open_deep_research.configuration import Configuration 33 | from open_deep_research.utils import ( 34 | format_sections, 35 | get_config_value, 36 | get_search_params, 37 | select_and_execute_search 38 | ) 39 | 40 | ## Nodes -- 41 | 42 | async def generate_report_plan(state: ReportState, config: RunnableConfig): 43 | """Generate the initial report plan with sections. 44 | 45 | This node: 46 | 1. Gets configuration for the report structure and search parameters 47 | 2. Generates search queries to gather context for planning 48 | 3. Performs web searches using those queries 49 | 4. Uses an LLM to generate a structured plan with sections 50 | 51 | Args: 52 | state: Current graph state containing the report topic 53 | config: Configuration for models, search APIs, etc. 54 | 55 | Returns: 56 | Dict containing the generated sections 57 | """ 58 | 59 | # Inputs 60 | topic = state["topic"] 61 | 62 | # Get list of feedback on the report plan 63 | feedback_list = state.get("feedback_on_report_plan", []) 64 | 65 | # Concatenate feedback on the report plan into a single string 66 | feedback = " /// ".join(feedback_list) if feedback_list else "" 67 | 68 | # Get configuration 69 | configurable = Configuration.from_runnable_config(config) 70 | report_structure = configurable.report_structure 71 | number_of_queries = configurable.number_of_queries 72 | search_api = get_config_value(configurable.search_api) 73 | search_api_config = configurable.search_api_config or {} # Get the config dict, default to empty 74 | params_to_pass = get_search_params(search_api, search_api_config) # Filter parameters 75 | 76 | # Convert JSON object to string if necessary 77 | if isinstance(report_structure, dict): 78 | report_structure = str(report_structure) 79 | 80 | # Set writer model (model used for query writing) 81 | writer_provider = get_config_value(configurable.writer_provider) 82 | writer_model_name = get_config_value(configurable.writer_model) 83 | writer_model_kwargs = get_config_value(configurable.writer_model_kwargs or {}) 84 | writer_model = init_chat_model(model=writer_model_name, model_provider=writer_provider, model_kwargs=writer_model_kwargs) 85 | structured_llm = writer_model.with_structured_output(Queries) 86 | 87 | # Format system instructions 88 | system_instructions_query = report_planner_query_writer_instructions.format(topic=topic, report_organization=report_structure, number_of_queries=number_of_queries) 89 | 90 | # Generate queries 91 | results = await structured_llm.ainvoke([SystemMessage(content=system_instructions_query), 92 | HumanMessage(content="Generate search queries that will help with planning the sections of the report.")]) 93 | 94 | # Web search 95 | query_list = [query.search_query for query in results.queries] 96 | 97 | # Search the web with parameters 98 | source_str = await select_and_execute_search(search_api, query_list, params_to_pass) 99 | 100 | # Format system instructions 101 | system_instructions_sections = report_planner_instructions.format(topic=topic, report_organization=report_structure, context=source_str, feedback=feedback) 102 | 103 | # Set the planner 104 | planner_provider = get_config_value(configurable.planner_provider) 105 | planner_model = get_config_value(configurable.planner_model) 106 | planner_model_kwargs = get_config_value(configurable.planner_model_kwargs or {}) 107 | 108 | # Report planner instructions 109 | planner_message = """Generate the sections of the report. Your response must include a 'sections' field containing a list of sections. 110 | Each section must have: name, description, research, and content fields.""" 111 | 112 | # Run the planner 113 | if planner_model == "claude-3-7-sonnet-latest": 114 | # Allocate a thinking budget for claude-3-7-sonnet-latest as the planner model 115 | planner_llm = init_chat_model(model=planner_model, 116 | model_provider=planner_provider, 117 | max_tokens=20_000, 118 | thinking={"type": "enabled", "budget_tokens": 16_000}) 119 | 120 | else: 121 | # With other models, thinking tokens are not specifically allocated 122 | planner_llm = init_chat_model(model=planner_model, 123 | model_provider=planner_provider, 124 | model_kwargs=planner_model_kwargs) 125 | 126 | # Generate the report sections 127 | structured_llm = planner_llm.with_structured_output(Sections) 128 | report_sections = await structured_llm.ainvoke([SystemMessage(content=system_instructions_sections), 129 | HumanMessage(content=planner_message)]) 130 | 131 | # Get sections 132 | sections = report_sections.sections 133 | 134 | return {"sections": sections} 135 | 136 | def human_feedback(state: ReportState, config: RunnableConfig) -> Command[Literal["generate_report_plan","build_section_with_web_research"]]: 137 | """Get human feedback on the report plan and route to next steps. 138 | 139 | This node: 140 | 1. Formats the current report plan for human review 141 | 2. Gets feedback via an interrupt 142 | 3. Routes to either: 143 | - Section writing if plan is approved 144 | - Plan regeneration if feedback is provided 145 | 146 | Args: 147 | state: Current graph state with sections to review 148 | config: Configuration for the workflow 149 | 150 | Returns: 151 | Command to either regenerate plan or start section writing 152 | """ 153 | 154 | # Get sections 155 | topic = state["topic"] 156 | sections = state['sections'] 157 | sections_str = "\n\n".join( 158 | f"Section: {section.name}\n" 159 | f"Description: {section.description}\n" 160 | f"Research needed: {'Yes' if section.research else 'No'}\n" 161 | for section in sections 162 | ) 163 | 164 | # Get feedback on the report plan from interrupt 165 | interrupt_message = f"""Please provide feedback on the following report plan. 166 | \n\n{sections_str}\n 167 | \nDoes the report plan meet your needs?\nPass 'true' to approve the report plan.\nOr, provide feedback to regenerate the report plan:""" 168 | 169 | feedback = interrupt(interrupt_message) 170 | 171 | # If the user approves the report plan, kick off section writing 172 | if isinstance(feedback, bool) and feedback is True: 173 | # Treat this as approve and kick off section writing 174 | return Command(goto=[ 175 | Send("build_section_with_web_research", {"topic": topic, "section": s, "search_iterations": 0}) 176 | for s in sections 177 | if s.research 178 | ]) 179 | 180 | # If the user provides feedback, regenerate the report plan 181 | elif isinstance(feedback, str): 182 | # Treat this as feedback and append it to the existing list 183 | return Command(goto="generate_report_plan", 184 | update={"feedback_on_report_plan": [feedback]}) 185 | else: 186 | raise TypeError(f"Interrupt value of type {type(feedback)} is not supported.") 187 | 188 | async def generate_queries(state: SectionState, config: RunnableConfig): 189 | """Generate search queries for researching a specific section. 190 | 191 | This node uses an LLM to generate targeted search queries based on the 192 | section topic and description. 193 | 194 | Args: 195 | state: Current state containing section details 196 | config: Configuration including number of queries to generate 197 | 198 | Returns: 199 | Dict containing the generated search queries 200 | """ 201 | 202 | # Get state 203 | topic = state["topic"] 204 | section = state["section"] 205 | 206 | # Get configuration 207 | configurable = Configuration.from_runnable_config(config) 208 | number_of_queries = configurable.number_of_queries 209 | 210 | # Generate queries 211 | writer_provider = get_config_value(configurable.writer_provider) 212 | writer_model_name = get_config_value(configurable.writer_model) 213 | writer_model_kwargs = get_config_value(configurable.writer_model_kwargs or {}) 214 | writer_model = init_chat_model(model=writer_model_name, model_provider=writer_provider, model_kwargs=writer_model_kwargs) 215 | structured_llm = writer_model.with_structured_output(Queries) 216 | 217 | # Format system instructions 218 | system_instructions = query_writer_instructions.format(topic=topic, 219 | section_topic=section.description, 220 | number_of_queries=number_of_queries) 221 | 222 | # Generate queries 223 | queries = await structured_llm.ainvoke([SystemMessage(content=system_instructions), 224 | HumanMessage(content="Generate search queries on the provided topic.")]) 225 | 226 | return {"search_queries": queries.queries} 227 | 228 | async def search_web(state: SectionState, config: RunnableConfig): 229 | """Execute web searches for the section queries. 230 | 231 | This node: 232 | 1. Takes the generated queries 233 | 2. Executes searches using configured search API 234 | 3. Formats results into usable context 235 | 236 | Args: 237 | state: Current state with search queries 238 | config: Search API configuration 239 | 240 | Returns: 241 | Dict with search results and updated iteration count 242 | """ 243 | 244 | # Get state 245 | search_queries = state["search_queries"] 246 | 247 | # Get configuration 248 | configurable = Configuration.from_runnable_config(config) 249 | search_api = get_config_value(configurable.search_api) 250 | search_api_config = configurable.search_api_config or {} # Get the config dict, default to empty 251 | params_to_pass = get_search_params(search_api, search_api_config) # Filter parameters 252 | 253 | # Web search 254 | query_list = [query.search_query for query in search_queries] 255 | 256 | # Search the web with parameters 257 | source_str = await select_and_execute_search(search_api, query_list, params_to_pass) 258 | 259 | return {"source_str": source_str, "search_iterations": state["search_iterations"] + 1} 260 | 261 | async def write_section(state: SectionState, config: RunnableConfig) -> Command[Literal[END, "search_web"]]: 262 | """Write a section of the report and evaluate if more research is needed. 263 | 264 | This node: 265 | 1. Writes section content using search results 266 | 2. Evaluates the quality of the section 267 | 3. Either: 268 | - Completes the section if quality passes 269 | - Triggers more research if quality fails 270 | 271 | Args: 272 | state: Current state with search results and section info 273 | config: Configuration for writing and evaluation 274 | 275 | Returns: 276 | Command to either complete section or do more research 277 | """ 278 | 279 | # Get state 280 | topic = state["topic"] 281 | section = state["section"] 282 | source_str = state["source_str"] 283 | 284 | # Get configuration 285 | configurable = Configuration.from_runnable_config(config) 286 | 287 | # Format system instructions 288 | section_writer_inputs_formatted = section_writer_inputs.format(topic=topic, 289 | section_name=section.name, 290 | section_topic=section.description, 291 | context=source_str, 292 | section_content=section.content) 293 | 294 | # Generate section 295 | writer_provider = get_config_value(configurable.writer_provider) 296 | writer_model_name = get_config_value(configurable.writer_model) 297 | writer_model_kwargs = get_config_value(configurable.writer_model_kwargs or {}) 298 | writer_model = init_chat_model(model=writer_model_name, model_provider=writer_provider, model_kwargs=writer_model_kwargs) 299 | 300 | section_content = await writer_model.ainvoke([SystemMessage(content=section_writer_instructions), 301 | HumanMessage(content=section_writer_inputs_formatted)]) 302 | 303 | # Write content to the section object 304 | section.content = section_content.content 305 | 306 | # Grade prompt 307 | section_grader_message = ("Grade the report and consider follow-up questions for missing information. " 308 | "If the grade is 'pass', return empty strings for all follow-up queries. " 309 | "If the grade is 'fail', provide specific search queries to gather missing information.") 310 | 311 | section_grader_instructions_formatted = section_grader_instructions.format(topic=topic, 312 | section_topic=section.description, 313 | section=section.content, 314 | number_of_follow_up_queries=configurable.number_of_queries) 315 | 316 | # Use planner model for reflection 317 | planner_provider = get_config_value(configurable.planner_provider) 318 | planner_model = get_config_value(configurable.planner_model) 319 | planner_model_kwargs = get_config_value(configurable.planner_model_kwargs or {}) 320 | 321 | if planner_model == "claude-3-7-sonnet-latest": 322 | # Allocate a thinking budget for claude-3-7-sonnet-latest as the planner model 323 | reflection_model = init_chat_model(model=planner_model, 324 | model_provider=planner_provider, 325 | max_tokens=20_000, 326 | thinking={"type": "enabled", "budget_tokens": 16_000}).with_structured_output(Feedback) 327 | else: 328 | reflection_model = init_chat_model(model=planner_model, 329 | model_provider=planner_provider, model_kwargs=planner_model_kwargs).with_structured_output(Feedback) 330 | # Generate feedback 331 | feedback = await reflection_model.ainvoke([SystemMessage(content=section_grader_instructions_formatted), 332 | HumanMessage(content=section_grader_message)]) 333 | 334 | # If the section is passing or the max search depth is reached, publish the section to completed sections 335 | if feedback.grade == "pass" or state["search_iterations"] >= configurable.max_search_depth: 336 | # Publish the section to completed sections 337 | return Command( 338 | update={"completed_sections": [section]}, 339 | goto=END 340 | ) 341 | 342 | # Update the existing section with new content and update search queries 343 | else: 344 | return Command( 345 | update={"search_queries": feedback.follow_up_queries, "section": section}, 346 | goto="search_web" 347 | ) 348 | 349 | async def write_final_sections(state: SectionState, config: RunnableConfig): 350 | """Write sections that don't require research using completed sections as context. 351 | 352 | This node handles sections like conclusions or summaries that build on 353 | the researched sections rather than requiring direct research. 354 | 355 | Args: 356 | state: Current state with completed sections as context 357 | config: Configuration for the writing model 358 | 359 | Returns: 360 | Dict containing the newly written section 361 | """ 362 | 363 | # Get configuration 364 | configurable = Configuration.from_runnable_config(config) 365 | 366 | # Get state 367 | topic = state["topic"] 368 | section = state["section"] 369 | completed_report_sections = state["report_sections_from_research"] 370 | 371 | # Format system instructions 372 | system_instructions = final_section_writer_instructions.format(topic=topic, section_name=section.name, section_topic=section.description, context=completed_report_sections) 373 | 374 | # Generate section 375 | writer_provider = get_config_value(configurable.writer_provider) 376 | writer_model_name = get_config_value(configurable.writer_model) 377 | writer_model_kwargs = get_config_value(configurable.writer_model_kwargs or {}) 378 | writer_model = init_chat_model(model=writer_model_name, model_provider=writer_provider, model_kwargs=writer_model_kwargs) 379 | 380 | section_content = await writer_model.ainvoke([SystemMessage(content=system_instructions), 381 | HumanMessage(content="Generate a report section based on the provided sources.")]) 382 | 383 | # Write content to section 384 | section.content = section_content.content 385 | 386 | # Write the updated section to completed sections 387 | return {"completed_sections": [section]} 388 | 389 | def gather_completed_sections(state: ReportState): 390 | """Format completed sections as context for writing final sections. 391 | 392 | This node takes all completed research sections and formats them into 393 | a single context string for writing summary sections. 394 | 395 | Args: 396 | state: Current state with completed sections 397 | 398 | Returns: 399 | Dict with formatted sections as context 400 | """ 401 | 402 | # List of completed sections 403 | completed_sections = state["completed_sections"] 404 | 405 | # Format completed section to str to use as context for final sections 406 | completed_report_sections = format_sections(completed_sections) 407 | 408 | return {"report_sections_from_research": completed_report_sections} 409 | 410 | def compile_final_report(state: ReportState): 411 | """Compile all sections into the final report. 412 | 413 | This node: 414 | 1. Gets all completed sections 415 | 2. Orders them according to original plan 416 | 3. Combines them into the final report 417 | 418 | Args: 419 | state: Current state with all completed sections 420 | 421 | Returns: 422 | Dict containing the complete report 423 | """ 424 | 425 | # Get sections 426 | sections = state["sections"] 427 | completed_sections = {s.name: s.content for s in state["completed_sections"]} 428 | 429 | # Update sections with completed content while maintaining original order 430 | for section in sections: 431 | section.content = completed_sections[section.name] 432 | 433 | # Compile final report 434 | all_sections = "\n\n".join([s.content for s in sections]) 435 | 436 | return {"final_report": all_sections} 437 | 438 | def initiate_final_section_writing(state: ReportState): 439 | """Create parallel tasks for writing non-research sections. 440 | 441 | This edge function identifies sections that don't need research and 442 | creates parallel writing tasks for each one. 443 | 444 | Args: 445 | state: Current state with all sections and research context 446 | 447 | Returns: 448 | List of Send commands for parallel section writing 449 | """ 450 | 451 | # Kick off section writing in parallel via Send() API for any sections that do not require research 452 | return [ 453 | Send("write_final_sections", {"topic": state["topic"], "section": s, "report_sections_from_research": state["report_sections_from_research"]}) 454 | for s in state["sections"] 455 | if not s.research 456 | ] 457 | 458 | # Report section sub-graph -- 459 | 460 | # Add nodes 461 | section_builder = StateGraph(SectionState, output=SectionOutputState) 462 | section_builder.add_node("generate_queries", generate_queries) 463 | section_builder.add_node("search_web", search_web) 464 | section_builder.add_node("write_section", write_section) 465 | 466 | # Add edges 467 | section_builder.add_edge(START, "generate_queries") 468 | section_builder.add_edge("generate_queries", "search_web") 469 | section_builder.add_edge("search_web", "write_section") 470 | 471 | # Outer graph for initial report plan compiling results from each section -- 472 | 473 | # Add nodes 474 | builder = StateGraph(ReportState, input=ReportStateInput, output=ReportStateOutput, config_schema=Configuration) 475 | builder.add_node("generate_report_plan", generate_report_plan) 476 | builder.add_node("human_feedback", human_feedback) 477 | builder.add_node("build_section_with_web_research", section_builder.compile()) 478 | builder.add_node("gather_completed_sections", gather_completed_sections) 479 | builder.add_node("write_final_sections", write_final_sections) 480 | builder.add_node("compile_final_report", compile_final_report) 481 | 482 | # Add edges 483 | builder.add_edge(START, "generate_report_plan") 484 | builder.add_edge("generate_report_plan", "human_feedback") 485 | builder.add_edge("build_section_with_web_research", "gather_completed_sections") 486 | builder.add_conditional_edges("gather_completed_sections", initiate_final_section_writing, ["write_final_sections"]) 487 | builder.add_edge("write_final_sections", "compile_final_report") 488 | builder.add_edge("compile_final_report", END) 489 | 490 | graph = builder.compile() 491 | -------------------------------------------------------------------------------- /src/open_deep_research/multi_agent.py: -------------------------------------------------------------------------------- 1 | from typing import List, Annotated, TypedDict, operator, Literal 2 | from pydantic import BaseModel, Field 3 | 4 | from langchain.chat_models import init_chat_model 5 | from langchain_core.tools import tool 6 | from langchain_core.runnables import RunnableConfig 7 | from langgraph.graph import MessagesState 8 | 9 | from langgraph.types import Command, Send 10 | from langgraph.graph import START, END, StateGraph 11 | 12 | from open_deep_research.configuration import Configuration 13 | from open_deep_research.utils import get_config_value, tavily_search, duckduckgo_search 14 | from open_deep_research.prompts import SUPERVISOR_INSTRUCTIONS, RESEARCH_INSTRUCTIONS 15 | 16 | ## Tools factory - will be initialized based on configuration 17 | def get_search_tool(config: RunnableConfig): 18 | """Get the appropriate search tool based on configuration""" 19 | configurable = Configuration.from_runnable_config(config) 20 | search_api = get_config_value(configurable.search_api) 21 | 22 | # TODO: Configure other search functions as tools 23 | if search_api.lower() == "tavily": 24 | return tavily_search 25 | elif search_api.lower() == "duckduckgo": 26 | return duckduckgo_search 27 | else: 28 | raise NotImplementedError( 29 | f"The search API '{search_api}' is not yet supported in the multi-agent implementation. " 30 | f"Currently, only Tavily/DuckDuckGo is supported. Please use the graph-based implementation in " 31 | f"src/open_deep_research/graph.py for other search APIs, or set search_api to 'tavily' or 'duckduckgo'." 32 | ) 33 | 34 | @tool 35 | class Section(BaseModel): 36 | """Section of the report.""" 37 | name: str = Field( 38 | description="Name for this section of the report.", 39 | ) 40 | description: str = Field( 41 | description="Research scope for this section of the report.", 42 | ) 43 | content: str = Field( 44 | description="The content of the section." 45 | ) 46 | 47 | @tool 48 | class Sections(BaseModel): 49 | """List of section titles of the report.""" 50 | sections: List[str] = Field( 51 | description="Sections of the report.", 52 | ) 53 | 54 | @tool 55 | class Introduction(BaseModel): 56 | """Introduction to the report.""" 57 | name: str = Field( 58 | description="Name for the report.", 59 | ) 60 | content: str = Field( 61 | description="The content of the introduction, giving an overview of the report." 62 | ) 63 | 64 | @tool 65 | class Conclusion(BaseModel): 66 | """Conclusion to the report.""" 67 | name: str = Field( 68 | description="Name for the conclusion of the report.", 69 | ) 70 | content: str = Field( 71 | description="The content of the conclusion, summarizing the report." 72 | ) 73 | 74 | ## State 75 | class ReportStateOutput(TypedDict): 76 | final_report: str # Final report 77 | 78 | class ReportState(MessagesState): 79 | sections: list[str] # List of report sections 80 | completed_sections: Annotated[list, operator.add] # Send() API key 81 | final_report: str # Final report 82 | 83 | class SectionState(MessagesState): 84 | section: str # Report section 85 | completed_sections: list[Section] # Final key we duplicate in outer state for Send() API 86 | 87 | class SectionOutputState(TypedDict): 88 | completed_sections: list[Section] # Final key we duplicate in outer state for Send() API 89 | 90 | # Tool lists will be built dynamically based on configuration 91 | def get_supervisor_tools(config: RunnableConfig): 92 | """Get supervisor tools based on configuration""" 93 | search_tool = get_search_tool(config) 94 | tool_list = [search_tool, Sections, Introduction, Conclusion] 95 | return tool_list, {tool.name: tool for tool in tool_list} 96 | 97 | def get_research_tools(config: RunnableConfig): 98 | """Get research tools based on configuration""" 99 | search_tool = get_search_tool(config) 100 | tool_list = [search_tool, Section] 101 | return tool_list, {tool.name: tool for tool in tool_list} 102 | 103 | async def supervisor(state: ReportState, config: RunnableConfig): 104 | """LLM decides whether to call a tool or not""" 105 | 106 | # Messages 107 | messages = state["messages"] 108 | 109 | # Get configuration 110 | configurable = Configuration.from_runnable_config(config) 111 | supervisor_model = get_config_value(configurable.supervisor_model) 112 | 113 | # Initialize the model 114 | llm = init_chat_model(model=supervisor_model) 115 | 116 | # If sections have been completed, but we don't yet have the final report, then we need to initiate writing the introduction and conclusion 117 | if state.get("completed_sections") and not state.get("final_report"): 118 | research_complete_message = {"role": "user", "content": "Research is complete. Now write the introduction and conclusion for the report. Here are the completed main body sections: \n\n" + "\n\n".join([s.content for s in state["completed_sections"]])} 119 | messages = messages + [research_complete_message] 120 | 121 | # Get tools based on configuration 122 | supervisor_tool_list, _ = get_supervisor_tools(config) 123 | 124 | # Invoke 125 | return { 126 | "messages": [ 127 | await llm.bind_tools(supervisor_tool_list, parallel_tool_calls=False).ainvoke( 128 | [ 129 | {"role": "system", 130 | "content": SUPERVISOR_INSTRUCTIONS, 131 | } 132 | ] 133 | + messages 134 | ) 135 | ] 136 | } 137 | 138 | async def supervisor_tools(state: ReportState, config: RunnableConfig) -> Command[Literal["supervisor", "research_team", "__end__"]]: 139 | """Performs the tool call and sends to the research agent""" 140 | 141 | result = [] 142 | sections_list = [] 143 | intro_content = None 144 | conclusion_content = None 145 | 146 | # Get tools based on configuration 147 | _, supervisor_tools_by_name = get_supervisor_tools(config) 148 | 149 | # First process all tool calls to ensure we respond to each one (required for OpenAI) 150 | for tool_call in state["messages"][-1].tool_calls: 151 | # Get the tool 152 | tool = supervisor_tools_by_name[tool_call["name"]] 153 | # Perform the tool call - use ainvoke for async tools 154 | if hasattr(tool, 'ainvoke'): 155 | observation = await tool.ainvoke(tool_call["args"]) 156 | else: 157 | observation = tool.invoke(tool_call["args"]) 158 | 159 | # Append to messages 160 | result.append({"role": "tool", 161 | "content": observation, 162 | "name": tool_call["name"], 163 | "tool_call_id": tool_call["id"]}) 164 | 165 | # Store special tool results for processing after all tools have been called 166 | if tool_call["name"] == "Sections": 167 | sections_list = observation.sections 168 | elif tool_call["name"] == "Introduction": 169 | # Format introduction with proper H1 heading if not already formatted 170 | if not observation.content.startswith("# "): 171 | intro_content = f"# {observation.name}\n\n{observation.content}" 172 | else: 173 | intro_content = observation.content 174 | elif tool_call["name"] == "Conclusion": 175 | # Format conclusion with proper H2 heading if not already formatted 176 | if not observation.content.startswith("## "): 177 | conclusion_content = f"## {observation.name}\n\n{observation.content}" 178 | else: 179 | conclusion_content = observation.content 180 | 181 | # After processing all tool calls, decide what to do next 182 | if sections_list: 183 | # Send the sections to the research agents 184 | return Command(goto=[Send("research_team", {"section": s}) for s in sections_list], update={"messages": result}) 185 | elif intro_content: 186 | # Store introduction while waiting for conclusion 187 | # Append to messages to guide the LLM to write conclusion next 188 | result.append({"role": "user", "content": "Introduction written. Now write a conclusion section."}) 189 | return Command(goto="supervisor", update={"final_report": intro_content, "messages": result}) 190 | elif conclusion_content: 191 | # Get all sections and combine in proper order: Introduction, Body Sections, Conclusion 192 | intro = state.get("final_report", "") 193 | body_sections = "\n\n".join([s.content for s in state["completed_sections"]]) 194 | 195 | # Assemble final report in correct order 196 | complete_report = f"{intro}\n\n{body_sections}\n\n{conclusion_content}" 197 | 198 | # Append to messages to indicate completion 199 | result.append({"role": "user", "content": "Report is now complete with introduction, body sections, and conclusion."}) 200 | return Command(goto="supervisor", update={"final_report": complete_report, "messages": result}) 201 | else: 202 | # Default case (for search tools, etc.) 203 | return Command(goto="supervisor", update={"messages": result}) 204 | 205 | async def supervisor_should_continue(state: ReportState) -> Literal["supervisor_tools", END]: 206 | """Decide if we should continue the loop or stop based upon whether the LLM made a tool call""" 207 | 208 | messages = state["messages"] 209 | last_message = messages[-1] 210 | 211 | # If the LLM makes a tool call, then perform an action 212 | if last_message.tool_calls: 213 | return "supervisor_tools" 214 | 215 | # Else end because the supervisor asked a question or is finished 216 | else: 217 | return END 218 | 219 | async def research_agent(state: SectionState, config: RunnableConfig): 220 | """LLM decides whether to call a tool or not""" 221 | 222 | # Get configuration 223 | configurable = Configuration.from_runnable_config(config) 224 | researcher_model = get_config_value(configurable.researcher_model) 225 | 226 | # Initialize the model 227 | llm = init_chat_model(model=researcher_model) 228 | 229 | # Get tools based on configuration 230 | research_tool_list, _ = get_research_tools(config) 231 | 232 | return { 233 | "messages": [ 234 | # Enforce tool calling to either perform more search or call the Section tool to write the section 235 | await llm.bind_tools(research_tool_list).ainvoke( 236 | [ 237 | {"role": "system", 238 | "content": RESEARCH_INSTRUCTIONS.format(section_description=state["section"]) 239 | } 240 | ] 241 | + state["messages"] 242 | ) 243 | ] 244 | } 245 | 246 | async def research_agent_tools(state: SectionState, config: RunnableConfig): 247 | """Performs the tool call and route to supervisor or continue the research loop""" 248 | 249 | result = [] 250 | completed_section = None 251 | 252 | # Get tools based on configuration 253 | _, research_tools_by_name = get_research_tools(config) 254 | 255 | # Process all tool calls first (required for OpenAI) 256 | for tool_call in state["messages"][-1].tool_calls: 257 | # Get the tool 258 | tool = research_tools_by_name[tool_call["name"]] 259 | # Perform the tool call - use ainvoke for async tools 260 | if hasattr(tool, 'ainvoke'): 261 | observation = await tool.ainvoke(tool_call["args"]) 262 | else: 263 | observation = tool.invoke(tool_call["args"]) 264 | # Append to messages 265 | result.append({"role": "tool", 266 | "content": observation, 267 | "name": tool_call["name"], 268 | "tool_call_id": tool_call["id"]}) 269 | 270 | # Store the section observation if a Section tool was called 271 | if tool_call["name"] == "Section": 272 | completed_section = observation 273 | 274 | # After processing all tools, decide what to do next 275 | if completed_section: 276 | # Write the completed section to state and return to the supervisor 277 | return {"messages": result, "completed_sections": [completed_section]} 278 | else: 279 | # Continue the research loop for search tools, etc. 280 | return {"messages": result} 281 | 282 | async def research_agent_should_continue(state: SectionState) -> Literal["research_agent_tools", END]: 283 | """Decide if we should continue the loop or stop based upon whether the LLM made a tool call""" 284 | 285 | messages = state["messages"] 286 | last_message = messages[-1] 287 | 288 | # If the LLM makes a tool call, then perform an action 289 | if last_message.tool_calls: 290 | return "research_agent_tools" 291 | 292 | else: 293 | return END 294 | 295 | """Build the multi-agent workflow""" 296 | 297 | # Research agent workflow 298 | research_builder = StateGraph(SectionState, output=SectionOutputState, config_schema=Configuration) 299 | research_builder.add_node("research_agent", research_agent) 300 | research_builder.add_node("research_agent_tools", research_agent_tools) 301 | research_builder.add_edge(START, "research_agent") 302 | research_builder.add_conditional_edges( 303 | "research_agent", 304 | research_agent_should_continue, 305 | { 306 | # Name returned by should_continue : Name of next node to visit 307 | "research_agent_tools": "research_agent_tools", 308 | END: END, 309 | }, 310 | ) 311 | research_builder.add_edge("research_agent_tools", "research_agent") 312 | 313 | # Supervisor workflow 314 | supervisor_builder = StateGraph(ReportState, input=MessagesState, output=ReportStateOutput, config_schema=Configuration) 315 | supervisor_builder.add_node("supervisor", supervisor) 316 | supervisor_builder.add_node("supervisor_tools", supervisor_tools) 317 | supervisor_builder.add_node("research_team", research_builder.compile()) 318 | 319 | # Flow of the supervisor agent 320 | supervisor_builder.add_edge(START, "supervisor") 321 | supervisor_builder.add_conditional_edges( 322 | "supervisor", 323 | supervisor_should_continue, 324 | { 325 | # Name returned by should_continue : Name of next node to visit 326 | "supervisor_tools": "supervisor_tools", 327 | END: END, 328 | }, 329 | ) 330 | supervisor_builder.add_edge("research_team", "supervisor") 331 | 332 | graph = supervisor_builder.compile() -------------------------------------------------------------------------------- /src/open_deep_research/prompts.py: -------------------------------------------------------------------------------- 1 | report_planner_query_writer_instructions="""You are performing research for a report. 2 | 3 | 4 | {topic} 5 | 6 | 7 | 8 | {report_organization} 9 | 10 | 11 | 12 | Your goal is to generate {number_of_queries} web search queries that will help gather information for planning the report sections. 13 | 14 | The queries should: 15 | 16 | 1. Be related to the Report topic 17 | 2. Help satisfy the requirements specified in the report organization 18 | 19 | Make the queries specific enough to find high-quality, relevant sources while covering the breadth needed for the report structure. 20 | 21 | 22 | 23 | Call the Queries tool 24 | 25 | """ 26 | 27 | report_planner_instructions="""I want a plan for a report that is concise and focused. 28 | 29 | 30 | The topic of the report is: 31 | {topic} 32 | 33 | 34 | 35 | The report should follow this organization: 36 | {report_organization} 37 | 38 | 39 | 40 | Here is context to use to plan the sections of the report: 41 | {context} 42 | 43 | 44 | 45 | Generate a list of sections for the report. Your plan should be tight and focused with NO overlapping sections or unnecessary filler. 46 | 47 | For example, a good report structure might look like: 48 | 1/ intro 49 | 2/ overview of topic A 50 | 3/ overview of topic B 51 | 4/ comparison between A and B 52 | 5/ conclusion 53 | 54 | Each section should have the fields: 55 | 56 | - Name - Name for this section of the report. 57 | - Description - Brief overview of the main topics covered in this section. 58 | - Research - Whether to perform web research for this section of the report. IMPORTANT: Main body sections (not intro/conclusion) MUST have Research=True. A report must have AT LEAST 2-3 sections with Research=True to be useful. 59 | - Content - The content of the section, which you will leave blank for now. 60 | 61 | Integration guidelines: 62 | - Include examples and implementation details within main topic sections, not as separate sections 63 | - Ensure each section has a distinct purpose with no content overlap 64 | - Combine related concepts rather than separating them 65 | - CRITICAL: Every section MUST be directly relevant to the main topic 66 | - Avoid tangential or loosely related sections that don't directly address the core topic 67 | 68 | Before submitting, review your structure to ensure it has no redundant sections and follows a logical flow. 69 | 70 | 71 | 72 | Here is feedback on the report structure from review (if any): 73 | {feedback} 74 | 75 | 76 | 77 | Call the Sections tool 78 | 79 | """ 80 | 81 | query_writer_instructions="""You are an expert technical writer crafting targeted web search queries that will gather comprehensive information for writing a technical report section. 82 | 83 | 84 | {topic} 85 | 86 | 87 |
88 | {section_topic} 89 |
90 | 91 | 92 | Your goal is to generate {number_of_queries} search queries that will help gather comprehensive information above the section topic. 93 | 94 | The queries should: 95 | 96 | 1. Be related to the topic 97 | 2. Examine different aspects of the topic 98 | 99 | Make the queries specific enough to find high-quality, relevant sources. 100 | 101 | 102 | 103 | Call the Queries tool 104 | 105 | """ 106 | 107 | section_writer_instructions = """Write one section of a research report. 108 | 109 | 110 | 1. Review the report topic, section name, and section topic carefully. 111 | 2. If present, review any existing section content. 112 | 3. Then, look at the provided Source material. 113 | 4. Decide the sources that you will use it to write a report section. 114 | 5. Write the report section and list your sources. 115 | 116 | 117 | 118 | - If existing section content is not populated, write from scratch 119 | - If existing section content is populated, synthesize it with the source material 120 | - Strict 150-200 word limit 121 | - Use simple, clear language 122 | - Use short paragraphs (2-3 sentences max) 123 | - Use ## for section title (Markdown format) 124 | 125 | 126 | 127 | - Assign each unique URL a single citation number in your text 128 | - End with ### Sources that lists each source with corresponding numbers 129 | - IMPORTANT: Number sources sequentially without gaps (1,2,3,4...) in the final list regardless of which sources you choose 130 | - Example format: 131 | [1] Source Title: URL 132 | [2] Source Title: URL 133 | 134 | 135 | 136 | 1. Verify that EVERY claim is grounded in the provided Source material 137 | 2. Confirm each URL appears ONLY ONCE in the Source list 138 | 3. Verify that sources are numbered sequentially (1,2,3...) without any gaps 139 | 140 | """ 141 | 142 | section_writer_inputs=""" 143 | 144 | {topic} 145 | 146 | 147 |
148 | {section_name} 149 |
150 | 151 |
152 | {section_topic} 153 |
154 | 155 | 156 | {section_content} 157 | 158 | 159 | 160 | {context} 161 | 162 | """ 163 | 164 | section_grader_instructions = """Review a report section relative to the specified topic: 165 | 166 | 167 | {topic} 168 | 169 | 170 |
171 | {section_topic} 172 |
173 | 174 |
175 | {section} 176 |
177 | 178 | 179 | Evaluate whether the section content adequately addresses the section topic. 180 | 181 | If the section content does not adequately address the section topic, generate {number_of_follow_up_queries} follow-up search queries to gather missing information. 182 | 183 | 184 | 185 | Call the Feedback tool and output with the following schema: 186 | 187 | grade: Literal["pass","fail"] = Field( 188 | description="Evaluation result indicating whether the response meets requirements ('pass') or needs revision ('fail')." 189 | ) 190 | follow_up_queries: List[SearchQuery] = Field( 191 | description="List of follow-up search queries.", 192 | ) 193 | 194 | """ 195 | 196 | final_section_writer_instructions="""You are an expert technical writer crafting a section that synthesizes information from the rest of the report. 197 | 198 | 199 | {topic} 200 | 201 | 202 |
203 | {section_name} 204 |
205 | 206 |
207 | {section_topic} 208 |
209 | 210 | 211 | {context} 212 | 213 | 214 | 215 | 1. Section-Specific Approach: 216 | 217 | For Introduction: 218 | - Use # for report title (Markdown format) 219 | - 50-100 word limit 220 | - Write in simple and clear language 221 | - Focus on the core motivation for the report in 1-2 paragraphs 222 | - Use a clear narrative arc to introduce the report 223 | - Include NO structural elements (no lists or tables) 224 | - No sources section needed 225 | 226 | For Conclusion/Summary: 227 | - Use ## for section title (Markdown format) 228 | - 100-150 word limit 229 | - For comparative reports: 230 | * Must include a focused comparison table using Markdown table syntax 231 | * Table should distill insights from the report 232 | * Keep table entries clear and concise 233 | - For non-comparative reports: 234 | * Only use ONE structural element IF it helps distill the points made in the report: 235 | * Either a focused table comparing items present in the report (using Markdown table syntax) 236 | * Or a short list using proper Markdown list syntax: 237 | - Use `*` or `-` for unordered lists 238 | - Use `1.` for ordered lists 239 | - Ensure proper indentation and spacing 240 | - End with specific next steps or implications 241 | - No sources section needed 242 | 243 | 3. Writing Approach: 244 | - Use concrete details over general statements 245 | - Make every word count 246 | - Focus on your single most important point 247 | 248 | 249 | 250 | - For introduction: 50-100 word limit, # for report title, no structural elements, no sources section 251 | - For conclusion: 100-150 word limit, ## for section title, only ONE structural element at most, no sources section 252 | - Markdown format 253 | - Do not include word count or any preamble in your response 254 | """ 255 | 256 | 257 | ## Supervisor 258 | SUPERVISOR_INSTRUCTIONS = """ 259 | You are scoping research for a report based on a user-provided topic. 260 | 261 | ### Your responsibilities: 262 | 263 | 1. **Gather Background Information** 264 | Based upon the user's topic, use the `enhanced_tavily_search` to collect relevant information about the topic. 265 | - You MUST perform ONLY ONE search to gather comprehensive context 266 | - Create a highly targeted search query that will yield the most valuable information 267 | - Take time to analyze and synthesize the search results before proceeding 268 | - Do not proceed to the next step until you have an understanding of the topic 269 | 270 | 2. **Clarify the Topic** 271 | After your initial research, engage with the user to clarify any questions that arose. 272 | - Ask ONE SET of follow-up questions based on what you learned from your searches 273 | - Do not proceed until you fully understand the topic, goals, constraints, and any preferences 274 | - Synthesize what you've learned so far before asking questions 275 | - You MUST engage in at least one clarification exchange with the user before proceeding 276 | 277 | 3. **Define Report Structure** 278 | Only after completing both research AND clarification with the user: 279 | - Use the `Sections` tool to define a list of report sections 280 | - Each section should be a written description with: a section name and a section research plan 281 | - Do not include sections for introductions or conclusions (We'll add these later) 282 | - Ensure sections are scoped to be independently researchable 283 | - Base your sections on both the search results AND user clarifications 284 | - Format your sections as a list of strings, with each string having the scope of research for that section. 285 | 286 | 4. **Assemble the Final Report** 287 | When all sections are returned: 288 | - IMPORTANT: First check your previous messages to see what you've already completed 289 | - If you haven't created an introduction yet, use the `Introduction` tool to generate one 290 | - Set content to include report title with a single # (H1 level) at the beginning 291 | - Example: "# [Report Title]\n\n[Introduction content...]" 292 | - After the introduction, use the `Conclusion` tool to summarize key insights 293 | - Set content to include conclusion title with ## (H2 level) at the beginning 294 | - Example: "## Conclusion\n\n[Conclusion content...]" 295 | - Only use ONE structural element IF it helps distill the points made in the report: 296 | - Either a focused table comparing items present in the report (using Markdown table syntax) 297 | - Or a short list using proper Markdown list syntax: 298 | - Use `*` or `-` for unordered lists 299 | - Use `1.` for ordered lists 300 | - Ensure proper indentation and spacing 301 | - Do not call the same tool twice - check your message history 302 | 303 | ### Additional Notes: 304 | - You are a reasoning model. Think through problems step-by-step before acting. 305 | - IMPORTANT: Do not rush to create the report structure. Gather information thoroughly first. 306 | - Use multiple searches to build a complete picture before drawing conclusions. 307 | - Maintain a clear, informative, and professional tone throughout.""" 308 | 309 | RESEARCH_INSTRUCTIONS = """ 310 | You are a researcher responsible for completing a specific section of a report. 311 | 312 | ### Your goals: 313 | 314 | 1. **Understand the Section Scope** 315 | Begin by reviewing the section scope of work. This defines your research focus. Use it as your objective. 316 | 317 |
318 | {section_description} 319 |
320 | 321 | 2. **Strategic Research Process** 322 | Follow this precise research strategy: 323 | 324 | a) **First Query**: Begin with a SINGLE, well-crafted search query with `enhanced_tavily_search` that directly addresses the core of the section topic. 325 | - Formulate ONE targeted query that will yield the most valuable information 326 | - Avoid generating multiple similar queries (e.g., 'Benefits of X', 'Advantages of X', 'Why use X') 327 | - Example: "Model Context Protocol developer benefits and use cases" is better than separate queries for benefits and use cases 328 | 329 | b) **Analyze Results Thoroughly**: After receiving search results: 330 | - Carefully read and analyze ALL provided content 331 | - Identify specific aspects that are well-covered and those that need more information 332 | - Assess how well the current information addresses the section scope 333 | 334 | c) **Follow-up Research**: If needed, conduct targeted follow-up searches: 335 | - Create ONE follow-up query that addresses SPECIFIC missing information 336 | - Example: If general benefits are covered but technical details are missing, search for "Model Context Protocol technical implementation details" 337 | - AVOID redundant queries that would return similar information 338 | 339 | d) **Research Completion**: Continue this focused process until you have: 340 | - Comprehensive information addressing ALL aspects of the section scope 341 | - At least 3 high-quality sources with diverse perspectives 342 | - Both breadth (covering all aspects) and depth (specific details) of information 343 | 344 | 3. **Use the Section Tool** 345 | Only after thorough research, write a high-quality section using the Section tool: 346 | - `name`: The title of the section 347 | - `description`: The scope of research you completed (brief, 1-2 sentences) 348 | - `content`: The completed body of text for the section, which MUST: 349 | - Begin with the section title formatted as "## [Section Title]" (H2 level with ##) 350 | - Be formatted in Markdown style 351 | - Be MAXIMUM 200 words (strictly enforce this limit) 352 | - End with a "### Sources" subsection (H3 level with ###) containing a numbered list of URLs used 353 | - Use clear, concise language with bullet points where appropriate 354 | - Include relevant facts, statistics, or expert opinions 355 | 356 | Example format for content: 357 | ``` 358 | ## [Section Title] 359 | 360 | [Body text in markdown format, maximum 200 words...] 361 | 362 | ### Sources 363 | 1. [URL 1] 364 | 2. [URL 2] 365 | 3. [URL 3] 366 | ``` 367 | 368 | --- 369 | 370 | ### Research Decision Framework 371 | 372 | Before each search query or when writing the section, think through: 373 | 374 | 1. **What information do I already have?** 375 | - Review all information gathered so far 376 | - Identify the key insights and facts already discovered 377 | 378 | 2. **What information is still missing?** 379 | - Identify specific gaps in knowledge relative to the section scope 380 | - Prioritize the most important missing information 381 | 382 | 3. **What is the most effective next action?** 383 | - Determine if another search is needed (and what specific aspect to search for) 384 | - Or if enough information has been gathered to write a comprehensive section 385 | 386 | --- 387 | 388 | ### Notes: 389 | - Focus on QUALITY over QUANTITY of searches 390 | - Each search should have a clear, distinct purpose 391 | - Do not write introductions or conclusions unless explicitly part of your section 392 | - Keep a professional, factual tone 393 | - Always follow markdown formatting 394 | - Stay within the 200 word limit for the main content 395 | """ 396 | -------------------------------------------------------------------------------- /src/open_deep_research/state.py: -------------------------------------------------------------------------------- 1 | from typing import Annotated, List, TypedDict, Literal 2 | from pydantic import BaseModel, Field 3 | import operator 4 | 5 | class Section(BaseModel): 6 | name: str = Field( 7 | description="Name for this section of the report.", 8 | ) 9 | description: str = Field( 10 | description="Brief overview of the main topics and concepts to be covered in this section.", 11 | ) 12 | research: bool = Field( 13 | description="Whether to perform web research for this section of the report." 14 | ) 15 | content: str = Field( 16 | description="The content of the section." 17 | ) 18 | 19 | class Sections(BaseModel): 20 | sections: List[Section] = Field( 21 | description="Sections of the report.", 22 | ) 23 | 24 | class SearchQuery(BaseModel): 25 | search_query: str = Field(None, description="Query for web search.") 26 | 27 | class Queries(BaseModel): 28 | queries: List[SearchQuery] = Field( 29 | description="List of search queries.", 30 | ) 31 | 32 | class Feedback(BaseModel): 33 | grade: Literal["pass","fail"] = Field( 34 | description="Evaluation result indicating whether the response meets requirements ('pass') or needs revision ('fail')." 35 | ) 36 | follow_up_queries: List[SearchQuery] = Field( 37 | description="List of follow-up search queries.", 38 | ) 39 | 40 | class ReportStateInput(TypedDict): 41 | topic: str # Report topic 42 | 43 | class ReportStateOutput(TypedDict): 44 | final_report: str # Final report 45 | 46 | class ReportState(TypedDict): 47 | topic: str # Report topic 48 | feedback_on_report_plan: Annotated[list[str], operator.add] # List of feedback on the report plan 49 | sections: list[Section] # List of report sections 50 | completed_sections: Annotated[list, operator.add] # Send() API key 51 | report_sections_from_research: str # String of any completed sections from research to write final sections 52 | final_report: str # Final report 53 | 54 | class SectionState(TypedDict): 55 | topic: str # Report topic 56 | section: Section # Report section 57 | search_iterations: int # Number of search iterations done 58 | search_queries: list[SearchQuery] # List of search queries 59 | source_str: str # String of formatted source content from web search 60 | report_sections_from_research: str # String of any completed sections from research to write final sections 61 | completed_sections: list[Section] # Final key we duplicate in outer state for Send() API 62 | 63 | class SectionOutputState(TypedDict): 64 | completed_sections: list[Section] # Final key we duplicate in outer state for Send() API 65 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Test package initialization -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pytest configuration for open_deep_research tests. 3 | """ 4 | 5 | import pytest 6 | 7 | def pytest_addoption(parser): 8 | """Add command-line options to pytest.""" 9 | parser.addoption("--research-agent", action="store", help="Agent type: multi_agent or graph") 10 | parser.addoption("--search-api", action="store", help="Search API to use") 11 | parser.addoption("--eval-model", action="store", help="Model for evaluation") 12 | parser.addoption("--supervisor-model", action="store", help="Model for supervisor agent") 13 | parser.addoption("--researcher-model", action="store", help="Model for researcher agent") 14 | parser.addoption("--planner-provider", action="store", help="Provider for planner model") 15 | parser.addoption("--planner-model", action="store", help="Model for planning") 16 | parser.addoption("--writer-provider", action="store", help="Provider for writer model") 17 | parser.addoption("--writer-model", action="store", help="Model for writing") 18 | parser.addoption("--max-search-depth", action="store", help="Maximum search depth") -------------------------------------------------------------------------------- /tests/run_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import subprocess 4 | import sys 5 | import argparse 6 | 7 | ''' 8 | Example w/ o3 -- 9 | python tests/run_test.py --all \ 10 | --supervisor-model "openai:o3" \ 11 | --researcher-model "openai:o3" \ 12 | --planner-provider "openai" \ 13 | --planner-model "o3" \ 14 | --writer-provider "openai" \ 15 | --writer-model "o3" \ 16 | --eval-model "openai:o3" \ 17 | --search-api "tavily" 18 | 19 | Example w/ gpt-4.1 -- 20 | python tests/run_test.py --all \ 21 | --supervisor-model "openai:gpt-4.1" \ 22 | --researcher-model "openai:gpt-4.1" \ 23 | --planner-provider "openai" \ 24 | --planner-model "gpt-4.1" \ 25 | --writer-provider "openai" \ 26 | --writer-model "gpt-4.1" \ 27 | --eval-model "openai:o3" \ 28 | --search-api "tavily" 29 | ''' 30 | 31 | def main(): 32 | # LangSmith project name for report quality testing 33 | langsmith_project = "Open Deep Research: Report Quality Testing" 34 | 35 | # Parse command line arguments 36 | parser = argparse.ArgumentParser(description="Run report quality tests for different agents") 37 | parser.add_argument("--rich-output", action="store_true", help="Show rich output in terminal") 38 | parser.add_argument("--experiment-name", help="Name for the LangSmith experiment") 39 | parser.add_argument("--agent", choices=["multi_agent", "graph"], help="Run tests for a specific agent") 40 | parser.add_argument("--all", action="store_true", help="Run tests for all agents") 41 | 42 | # Model configuration options 43 | parser.add_argument("--supervisor-model", help="Model for supervisor agent (e.g., 'anthropic:claude-3-7-sonnet-latest')") 44 | parser.add_argument("--researcher-model", help="Model for researcher agent (e.g., 'anthropic:claude-3-5-sonnet-latest')") 45 | parser.add_argument("--planner-provider", help="Provider for planner model (e.g., 'anthropic')") 46 | parser.add_argument("--planner-model", help="Model for planner in graph-based agent (e.g., 'claude-3-7-sonnet-latest')") 47 | parser.add_argument("--writer-provider", help="Provider for writer model (e.g., 'anthropic')") 48 | parser.add_argument("--writer-model", help="Model for writer in graph-based agent (e.g., 'claude-3-5-sonnet-latest')") 49 | parser.add_argument("--eval-model", help="Model for evaluating report quality (default: openai:gpt-4-turbo)") 50 | 51 | # Search API configuration 52 | parser.add_argument("--search-api", choices=["tavily", "duckduckgo"], 53 | help="Search API to use for content retrieval") 54 | 55 | args = parser.parse_args() 56 | 57 | # Base pytest options 58 | base_pytest_options = ["-v", "--disable-warnings", "--langsmith-output"] 59 | if args.rich_output: 60 | base_pytest_options.append("--rich-output") 61 | 62 | # Define available agents 63 | agents = ["multi_agent", "graph"] 64 | 65 | # Determine which agents to test 66 | if args.agent: 67 | if args.agent in agents: 68 | agents_to_test = [args.agent] 69 | else: 70 | print(f"Error: Unknown agent '{args.agent}'") 71 | print(f"Available agents: {', '.join(agents)}") 72 | return 1 73 | elif args.all: 74 | agents_to_test = agents 75 | else: 76 | # Default to testing all agents 77 | agents_to_test = agents 78 | 79 | # Run tests for each agent 80 | for agent in agents_to_test: 81 | print(f"\nRunning tests for {agent}...") 82 | 83 | # Set up LangSmith environment for this agent 84 | os.environ["LANGSMITH_PROJECT"] = langsmith_project 85 | os.environ["LANGSMITH_TEST_SUITE"] = langsmith_project 86 | 87 | # Ensure tracing is enabled 88 | os.environ["LANGCHAIN_TRACING_V2"] = "true" 89 | 90 | # Create a fresh copy of the pytest options for this run 91 | pytest_options = base_pytest_options.copy() 92 | 93 | # We're now using direct pytest command line arguments instead of environment variables 94 | # No need to set environment variables for test parameters 95 | 96 | # Test file path 97 | test_file = "tests/test_report_quality.py" 98 | 99 | # Set up experiment name 100 | experiment_name = args.experiment_name if args.experiment_name else f"Report Quality Test | Agent: {agent}" 101 | print(f" Project: {langsmith_project}") 102 | print(f" Experiment: {experiment_name}") 103 | 104 | os.environ["LANGSMITH_EXPERIMENT"] = experiment_name 105 | 106 | print(f"\nℹ️ Test results for {agent} are being logged to LangSmith") 107 | 108 | # Run the test with direct pytest arguments instead of environment variables 109 | cmd = ["python", "-m", "pytest", test_file] + pytest_options + [ 110 | f"--research-agent={agent}" 111 | ] 112 | 113 | # Add model configurations if provided 114 | if args.supervisor_model: 115 | cmd.append(f"--supervisor-model={args.supervisor_model}") 116 | if args.researcher_model: 117 | cmd.append(f"--researcher-model={args.researcher_model}") 118 | if args.planner_provider: 119 | cmd.append(f"--planner-provider={args.planner_provider}") 120 | if args.planner_model: 121 | cmd.append(f"--planner-model={args.planner_model}") 122 | if args.writer_provider: 123 | cmd.append(f"--writer-provider={args.writer_provider}") 124 | if args.writer_model: 125 | cmd.append(f"--writer-model={args.writer_model}") 126 | if args.eval_model: 127 | cmd.append(f"--eval-model={args.eval_model}") 128 | if args.search_api: 129 | cmd.append(f"--search-api={args.search_api}") 130 | 131 | print(f"Running command: {' '.join(cmd)}") 132 | 133 | result = subprocess.run(cmd, capture_output=True, text=True) 134 | 135 | # Print test output with highlighting for section relevance analysis 136 | stdout = result.stdout 137 | 138 | # Highlight section relevance information with colors if supported 139 | if "POTENTIAL IRRELEVANT SECTIONS DETECTED" in stdout: 140 | # Extract and highlight the section relevance analysis 141 | import re 142 | section_analysis = re.search(r'⚠️ POTENTIAL IRRELEVANT SECTIONS DETECTED:.*?(?=\n\n|\Z)', 143 | stdout, re.DOTALL) 144 | if section_analysis: 145 | analysis_text = section_analysis.group(0) 146 | # Use ANSI color codes for highlighting (red for irrelevant sections) 147 | highlighted_analysis = f"\033[1;31m{analysis_text}\033[0m" 148 | stdout = stdout.replace(analysis_text, highlighted_analysis) 149 | 150 | print(stdout) 151 | 152 | if result.stderr: 153 | print("Errors/warnings:") 154 | print(result.stderr) 155 | 156 | # Print a summary of section relevance issues 157 | if "POTENTIAL IRRELEVANT SECTIONS DETECTED" in result.stdout: 158 | print("\n\033[1;33m==== SECTION RELEVANCE ISSUES DETECTED ====\033[0m") 159 | print("Some sections may not be relevant to the main topic.") 160 | print("Review the detailed analysis in the test output above.") 161 | print("Consider updating the prompts to improve section relevance.") 162 | 163 | if __name__ == "__main__": 164 | sys.exit(main() or 0) -------------------------------------------------------------------------------- /tests/test_report_quality.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import uuid 5 | import pytest 6 | import asyncio 7 | from pydantic import BaseModel, Field 8 | from langchain.chat_models import init_chat_model 9 | from langsmith import testing as t 10 | 11 | from langgraph.checkpoint.memory import MemorySaver 12 | from langgraph.types import Command 13 | 14 | # Import the report generation agents 15 | from open_deep_research.graph import builder 16 | from open_deep_research.multi_agent import supervisor_builder 17 | 18 | class CriteriaGrade(BaseModel): 19 | """Score the response against specific criteria.""" 20 | grade: bool = Field(description="Does the response meet the provided criteria?") 21 | justification: str = Field(description="The justification for the grade and score, including specific examples from the response.") 22 | 23 | # Function to create evaluation LLM at test time 24 | def get_evaluation_llm(eval_model=None): 25 | """Create and return an evaluation LLM. 26 | 27 | Args: 28 | eval_model: Model identifier to use for evaluation 29 | Format: "provider:model_name" (e.g., "openai:gpt-4-turbo") 30 | If None, it will use environment variable or default 31 | 32 | Returns: 33 | Structured LLM for generating evaluation grades 34 | """ 35 | # Use provided model, then environment variable, then default 36 | model_to_use = eval_model or os.environ.get("EVAL_MODEL", "openai:gpt-4-turbo") 37 | 38 | criteria_eval_llm = init_chat_model(model_to_use) 39 | return criteria_eval_llm.with_structured_output(CriteriaGrade) 40 | 41 | RESPONSE_CRITERIA_SYSTEM_PROMPT = """ 42 | You are evaluating the quality of a research report. Please assess the report against the following criteria, being especially strict about section relevance. 43 | 44 | 1. Topic Relevance (Overall): Does the report directly address the user's input topic thoroughly? 45 | 46 | 2. Section Relevance (Critical): CAREFULLY assess each individual section for relevance to the main topic: 47 | - Identify each section by its ## header 48 | - For each section, determine if it is directly relevant to the primary topic 49 | - Flag any sections that seem tangential, off-topic, or only loosely connected to the main topic 50 | - A high-quality report should have NO irrelevant sections 51 | 52 | 3. Structure and Flow: Do the sections flow logically from one to the next, creating a cohesive narrative? 53 | 54 | 4. Introduction Quality: Does the introduction effectively provide context and set up the scope of the report? 55 | 56 | 5. Conclusion Quality: Does the conclusion meaningfully summarize key findings and insights from the report? 57 | 58 | 6. Structural Elements: Does the report use structural elements (e.g., tables, lists) to effectively convey information? 59 | 60 | 7. Section Headers: Are section headers properly formatted with Markdown (# for title, ## for sections, ### for subsections)? 61 | 62 | 8. Citations: Does the report properly cite sources in each main body section? 63 | 64 | 9. Overall Quality: Is the report well-researched, accurate, and professionally written? 65 | 66 | Evaluation Instructions: 67 | - Be STRICT about section relevance - ALL sections must clearly connect to the primary topic 68 | - A report with even ONE irrelevant section should be considered flawed 69 | - You must individually mention each section by name and assess its relevance 70 | - Provide specific examples from the report to justify your evaluation for each criterion 71 | - The report fails if any sections are irrelevant to the main topic, regardless of other qualities 72 | """ 73 | 74 | # Define fixtures for test configuration 75 | @pytest.fixture 76 | def research_agent(request): 77 | """Get the research agent type from command line or environment variable.""" 78 | return request.config.getoption("--research-agent") or os.environ.get("RESEARCH_AGENT", "multi_agent") 79 | 80 | @pytest.fixture 81 | def search_api(request): 82 | """Get the search API from command line or environment variable.""" 83 | return request.config.getoption("--search-api") or os.environ.get("SEARCH_API", "tavily") 84 | 85 | @pytest.fixture 86 | def eval_model(request): 87 | """Get the evaluation model from command line or environment variable.""" 88 | return request.config.getoption("--eval-model") or os.environ.get("EVAL_MODEL", "openai:gpt-4-turbo") 89 | 90 | @pytest.fixture 91 | def models(request, research_agent): 92 | """Get model configurations based on agent type.""" 93 | if research_agent == "multi_agent": 94 | return { 95 | "supervisor_model": ( 96 | request.config.getoption("--supervisor-model") or 97 | os.environ.get("SUPERVISOR_MODEL", "anthropic:claude-3-7-sonnet-latest") 98 | ), 99 | "researcher_model": ( 100 | request.config.getoption("--researcher-model") or 101 | os.environ.get("RESEARCHER_MODEL", "anthropic:claude-3-5-sonnet-latest") 102 | ), 103 | } 104 | else: # graph agent 105 | return { 106 | "planner_provider": ( 107 | request.config.getoption("--planner-provider") or 108 | os.environ.get("PLANNER_PROVIDER", "anthropic") 109 | ), 110 | "planner_model": ( 111 | request.config.getoption("--planner-model") or 112 | os.environ.get("PLANNER_MODEL", "claude-3-7-sonnet-latest") 113 | ), 114 | "writer_provider": ( 115 | request.config.getoption("--writer-provider") or 116 | os.environ.get("WRITER_PROVIDER", "anthropic") 117 | ), 118 | "writer_model": ( 119 | request.config.getoption("--writer-model") or 120 | os.environ.get("WRITER_MODEL", "claude-3-5-sonnet-latest") 121 | ), 122 | "max_search_depth": int( 123 | request.config.getoption("--max-search-depth") or 124 | os.environ.get("MAX_SEARCH_DEPTH", "2") 125 | ), 126 | } 127 | 128 | # Note: Command line options are defined in conftest.py 129 | # These fixtures still work with options defined there 130 | 131 | @pytest.mark.langsmith 132 | def test_response_criteria_evaluation(research_agent, search_api, models, eval_model): 133 | """Test if a report meets the specified quality criteria.""" 134 | print(f"Testing {research_agent} report generation with {search_api} search...") 135 | print(f"Models: {models}") 136 | print(f"Eval model: {eval_model}") 137 | 138 | # Log inputs to LangSmith 139 | t.log_inputs({ 140 | "agent_type": research_agent, 141 | "search_api": search_api, 142 | "models": models, 143 | "eval_model": eval_model, 144 | "test": "report_quality_evaluation", 145 | "description": f"Testing report quality for {research_agent} with {search_api}" 146 | }) 147 | 148 | # Run the appropriate agent based on the parameter 149 | if research_agent == "multi_agent": 150 | 151 | # Initial messages 152 | initial_msg = [{"role": "user", "content": "What is model context protocol?"}] 153 | followup_msg = [{"role": "user", "content": "high-level overview of MCP, tell me about interesting specific MCP servers, developer audience, just focus on MCP. generate the report now and don't ask any more follow-up questions."}] 154 | 155 | # Checkpointer for the multi-agent approach 156 | checkpointer = MemorySaver() 157 | graph = supervisor_builder.compile(checkpointer=checkpointer) 158 | 159 | # Create configuration with the provided parameters 160 | config = { 161 | "thread_id": str(uuid.uuid4()), 162 | "search_api": search_api, 163 | "supervisor_model": models.get("supervisor_model"), 164 | "researcher_model": models.get("researcher_model"), 165 | } 166 | 167 | thread_config = {"configurable": config} 168 | 169 | # Run the workflow with asyncio 170 | asyncio.run(graph.ainvoke({"messages": initial_msg}, config=thread_config)) 171 | asyncio.run(graph.ainvoke({"messages": followup_msg}, config=thread_config)) 172 | 173 | # Get the final state once both invocations are complete 174 | final_state = graph.get_state(thread_config) 175 | print(f"Final state values: {final_state.values}") 176 | report = final_state.values.get('final_report', "No report generated") 177 | print(f"Report length: {len(report)}") 178 | 179 | elif research_agent == "graph": 180 | 181 | # Topic query 182 | topic_query = "What is model context protocol? high-level overview of MCP, tell me about interesting specific MCP servers, developer audience, just focus on MCP" 183 | 184 | # Checkpointer for the graph approach 185 | checkpointer = MemorySaver() 186 | graph = builder.compile(checkpointer=checkpointer) 187 | 188 | # Configuration for the graph agent with provided parameters 189 | thread = {"configurable": { 190 | "thread_id": str(uuid.uuid4()), 191 | "search_api": search_api, 192 | "planner_provider": models.get("planner_provider", "anthropic"), 193 | "planner_model": models.get("planner_model", "claude-3-7-sonnet-latest"), 194 | "writer_provider": models.get("writer_provider", "anthropic"), 195 | "writer_model": models.get("writer_model", "claude-3-5-sonnet-latest"), 196 | "max_search_depth": models.get("max_search_depth", 2), 197 | }} 198 | 199 | async def run_graph_agent(thread): 200 | # Run the graph until the interruption 201 | async for event in graph.astream({"topic":topic_query}, thread, stream_mode="updates"): 202 | if '__interrupt__' in event: 203 | interrupt_value = event['__interrupt__'][0].value 204 | 205 | # Pass True to approve the report plan 206 | async for event in graph.astream(Command(resume=True), thread, stream_mode="updates"): 207 | print(event) 208 | print("\n") 209 | 210 | final_state = graph.get_state(thread) 211 | report = final_state.values.get('final_report', "No report generated") 212 | return report 213 | 214 | report = asyncio.run(run_graph_agent(thread)) 215 | 216 | # Get evaluation LLM using the specified model 217 | criteria_eval_structured_llm = get_evaluation_llm(eval_model) 218 | 219 | # Evaluate the report against our quality criteria 220 | eval_result = criteria_eval_structured_llm.invoke([ 221 | {"role": "system", "content": RESPONSE_CRITERIA_SYSTEM_PROMPT}, 222 | {"role": "user", "content": f"""\n\n Report: \n\n{report}\n\nEvaluate whether the report meets the criteria and provide detailed justification for your evaluation."""} 223 | ]) 224 | 225 | # Extract section headers for analysis 226 | import re 227 | section_headers = re.findall(r'##\s+([^\n]+)', report) 228 | 229 | # Print the evaluation results 230 | print(f"Evaluation result: {'PASSED' if eval_result.grade else 'FAILED'}") 231 | print(f"Report contains {len(section_headers)} sections: {', '.join(section_headers)}") 232 | print(f"Justification: {eval_result.justification}") 233 | 234 | # Log outputs to LangSmith 235 | t.log_outputs({ 236 | "report": report, 237 | "evaluation_result": eval_result.grade, 238 | "justification": eval_result.justification, 239 | "report_length": len(report), 240 | "section_count": len(section_headers), 241 | "section_headers": section_headers, 242 | }) 243 | 244 | # Test passes if the evaluation criteria are met 245 | assert eval_result.grade --------------------------------------------------------------------------------