├── .gitignore ├── LICENSE ├── README.md ├── assets └── logo_full.png ├── benchmark ├── README.md ├── baselines │ ├── __init__.py │ └── smolagents_baseline.py ├── benchmark_configs │ ├── base_llm_config_ds_v3.yaml │ ├── base_llm_config_llama4_m.yaml │ ├── base_llm_config_qwen2p5.yaml │ ├── langchain_deep_researcher_ds_v3.yaml │ ├── langchain_deep_researcher_llama4_m.yaml │ ├── langchain_deep_researcher_qwen2p5.yaml │ ├── open_deep_researcher_1step_ds_v3.yaml │ ├── open_deep_researcher_1step_llama4_m.yaml │ ├── open_deep_researcher_1step_qwen2p5.yaml │ ├── open_deep_researcher_3step_ds_v3.yaml │ ├── open_deep_researcher_3step_llama4_m.yaml │ ├── open_deep_researcher_3step_qwen2p5.yaml │ ├── smolagent_config_ds_v3.yaml │ ├── smolagent_config_llama4_m.yaml │ └── smolagent_config_qwen2p5.yaml ├── scoring.py └── summary.py ├── config.env.dev ├── configs ├── base_llm_config.yaml ├── open_deep_researcher_config.yaml └── smolagent_config.yaml ├── pyproject.toml ├── requirements.txt ├── src ├── libs │ ├── __init__.py │ └── utils │ │ ├── __init__.py │ │ ├── agent_factory.py │ │ ├── data_types.py │ │ ├── evals.py │ │ ├── generation.py │ │ ├── llms.py │ │ ├── log.py │ │ ├── podcast.py │ │ └── tavily_search.py ├── prompts.yaml ├── together_open_deep_research.py └── webapp.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- 1 | debug.txt 2 | pyrightconfig.json 3 | benchmark/benchmark_results/* 4 | benchmark/datasets/* 5 | *.html 6 | *.pdf 7 | *.mp3 8 | *.env 9 | .venv 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | .vscode/ 16 | # C extensions 17 | *.so 18 | 19 | # Ruff stuff: 20 | .ruff_cache/ 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Together 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | Together Open Deep Research Logo 3 |

Together Open Deep Research

4 |

AI-powered comprehensive research on complex topics

5 | 6 |

7 | Blog Post • 8 | Overview • 9 | Features • 10 | Installation • 11 | Usage • 12 | Disclaimer 13 |

14 |
15 | 16 | --- 17 | 18 | ## ✨ Overview 19 | 20 | Together Open Deep Research is an agentic LLM workflow that delivers in-depth research on complex topics requiring multi-hop reasoning. It enhances traditional web search by producing comprehensive, well-cited content that mimics the human research process - planning, searching, evaluating information, and iterating until completion. 21 | 22 | ## 🎯 Features 23 | 24 | - **Comprehensive Research Reports** - Generates long-form, well-cited content on complex topics 25 | - **Multi-Stage Process** - Uses multiple self-reflection stages for quality information gathering 26 | - **Source Verification** - Provides citations for all information sources 27 | - **Extensible Architecture** - Designed with a flexible foundation for community extension 28 | 29 | ## 🔧 Installation 30 | 31 | ### Prerequisites 32 | 33 | Before installing, ensure you have Python 3.12+ and the following tools: 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
ToolmacOSUbuntu/DebianWindows
Pandocbrew install pandocsudo apt-get install pandocDownload installer
pdfLaTeXbrew install basictexsudo apt-get install texlive-xetexDownload MiKTeX
55 | 56 | ### Setup Environment 57 | 58 | ```bash 59 | # Install uv (faster alternative to pip) 60 | curl -LsSf https://astral.sh/uv/install.sh | sh 61 | 62 | # Create and activate virtual environment 63 | uv venv --python=3.12 64 | source .venv/bin/activate 65 | 66 | # Install project dependencies 67 | uv pip install -r pyproject.toml 68 | uv lock --check 69 | 70 | # Optional: install with open-deep-research package (for langgraph evals) 71 | uv pip install -e ".[with-open-deep-research]" 72 | ``` 73 | 74 | ### Configure API Keys 75 | 76 | ```bash 77 | export TOGETHER_API_KEY=your_key_here 78 | export TAVILY_API_KEY=your_key_here 79 | export HUGGINGFACE_TOKEN=your_token_here 80 | ``` 81 | 82 | ## 🚀 Usage 83 | 84 | Run the deep research workflow: 85 | 86 | ```bash 87 | # Set Python path 88 | export PYTHONPATH=$PYTHONPATH:$(pwd)/src 89 | 90 | # Run with default options 91 | python src/together_open_deep_research.py --config configs/open_deep_researcher_config.yaml 92 | ``` 93 | 94 | Or run the gradio webapp: 95 | 96 | ```bash 97 | python src/webapp.py 98 | ``` 99 | 100 | ### Options 101 | 102 | - `--write-pdf` - Generate a PDF document of the report 103 | - `--write-html` - Create an HTML version of the report 104 | - `--write-podcast` - Create a Podcast of the entire artcle 105 | - `--add-toc-image` - Add a visual table of contents image 106 | - `--config PATH` - Specify a custom configuration file (default: `configs/open_deep_researcher_config.yaml`) 107 | 108 | ## ⚠️ Disclaimer 109 | 110 | As an LLM-based system, this tool may occasionally: 111 | 112 | - Generate hallucinations or fabricate information that appears plausible 113 | - Contain biases present in its training data 114 | - Misinterpret complex queries or provide incomplete analyses 115 | - Present outdated information 116 | 117 | **Always verify important information from generated reports with primary sources.** 118 | 119 | -------------------------------------------------------------------------------- /assets/logo_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/togethercomputer/open_deep_research/66e43b47bfb8722ef9aad38139453922d1feef2a/assets/logo_full.png -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | ### Running Benchmarks 2 | 3 | ```bash 4 | # Set Python path 5 | export PYTHONPATH=$PYTHONPATH:$(pwd)/../src 6 | 7 | # Run benchmarks 8 | python scoring.py --datasets together-search-bench --agent-config ../configs/base_llm_config.yaml --max-workers 10 9 | 10 | # Summarize results 11 | python summary.py 12 | ``` 13 | 14 | #### Benchmark Options 15 | - `--datasets`: Benchmark dataset to use (e.g., `together-search-bench`) 16 | - `--limit`: Number of examples to process 17 | - `--agent-config`: Path to LLM configuration file 18 | - `--max-workers`: Number of parallel workers 19 | 20 | > **Note:** For LangChain's Open-Deep-Research benchmarks, we replace instructions with "You must perform in-depth research to answer the question" instead of "Rules: The answer is usually very short. It might be a number or two words. It's definitely not a sentence." Otherwise, the planner will refuse to generate research reports. -------------------------------------------------------------------------------- /benchmark/baselines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/togethercomputer/open_deep_research/66e43b47bfb8722ef9aad38139453922d1feef2a/benchmark/baselines/__init__.py -------------------------------------------------------------------------------- /benchmark/baselines/smolagents_baseline.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from smolagents import Tool 4 | 5 | from libs.utils.tavily_search import SearchResults, tavily_search 6 | 7 | 8 | class SmolAgentsTavilySearchTool(Tool): 9 | name = "tavily_web_search" 10 | description = ( 11 | """Performs a Tavily web search based on your query (similar to a Google search) and returns the top search results.""" 12 | ) 13 | inputs = {"query": {"type": "string", 14 | "description": "The search query to perform."}} 15 | output_type = "string" 16 | 17 | def __init__(self, max_results=3, include_raw=True, **kwargs): 18 | super().__init__() 19 | self.max_results = max_results 20 | self.include_raw = include_raw 21 | 22 | if not os.getenv("TAVILY_API_KEY"): 23 | raise ValueError("TAVILY_API_KEY environment variable is not set") 24 | 25 | def forward(self, query: str) -> str: 26 | try: 27 | results: SearchResults = tavily_search( 28 | query=query, max_results=self.max_results, include_raw=self.include_raw) 29 | 30 | if len(results.results) == 0: 31 | raise Exception( 32 | "No results found! Try a less restrictive/shorter query.") 33 | 34 | postprocessed_results = [] 35 | for result in results.results: 36 | postprocessed_results.append( 37 | f"[{result.title}]({result.link})\n{result.content}") 38 | 39 | return "## Search Results\n\n" + "\n\n".join(postprocessed_results) 40 | except Exception as e: 41 | return f"Error performing search: {str(e)}" 42 | 43 | 44 | if __name__ == "__main__": 45 | from smolagents import CodeAgent, LiteLLMModel 46 | 47 | model = LiteLLMModel(model_id="together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", 48 | api_key=os.environ["TOGETHER_API_KEY"]) 49 | agent = CodeAgent(tools=[SmolAgentsTavilySearchTool()], model=model) 50 | 51 | result = agent.run( 52 | "How many seconds would it take for a leopard at full speed to run through the Eiffel Tower?") 53 | print(result) 54 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/base_llm_config_ds_v3.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: base_llm 3 | model: "together_ai/deepseek-ai/DeepSeek-V3" 4 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/base_llm_config_llama4_m.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: base_llm 3 | model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 4 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/base_llm_config_qwen2p5.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: base_llm 3 | model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 4 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/langchain_deep_researcher_ds_v3.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: langchain_deep_researcher 3 | search_api: "tavily" 4 | planner_provider: "together" 5 | planner_model: "deepseek-ai/DeepSeek-V3" 6 | writer_provider: "together" 7 | writer_model: "deepseek-ai/DeepSeek-V3" 8 | max_search_depth: 2 -------------------------------------------------------------------------------- /benchmark/benchmark_configs/langchain_deep_researcher_llama4_m.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: langchain_deep_researcher 3 | search_api: "tavily" 4 | planner_provider: "together" 5 | planner_model: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 6 | writer_provider: "together" 7 | writer_model: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 8 | max_search_depth: 2 -------------------------------------------------------------------------------- /benchmark/benchmark_configs/langchain_deep_researcher_qwen2p5.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: langchain_deep_researcher 3 | search_api: "tavily" 4 | planner_provider: "together" 5 | planner_model: "Qwen/Qwen2.5-72B-Instruct-Turbo" 6 | writer_provider: "together" 7 | writer_model: "Qwen/Qwen2.5-72B-Instruct-Turbo" 8 | max_search_depth: 2 -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_1step_ds_v3.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 0 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/deepseek-ai/DeepSeek-V3" 13 | summarization_model: "together_ai/deepseek-ai/DeepSeek-V3" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/deepseek-ai/DeepSeek-V3" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_1step_llama4_m.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 0 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 13 | summarization_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_1step_qwen2p5.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 0 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 13 | summarization_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_3step_ds_v3.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 2 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/deepseek-ai/DeepSeek-V3" 13 | summarization_model: "together_ai/deepseek-ai/DeepSeek-V3" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/deepseek-ai/DeepSeek-V3" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_3step_llama4_m.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 2 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 13 | summarization_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/open_deep_researcher_3step_qwen2p5.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 2 4 | max_queries: 2 5 | max_sources: 10 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: false 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 13 | summarization_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 16 | -------------------------------------------------------------------------------- /benchmark/benchmark_configs/smolagent_config_ds_v3.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: smolagents 3 | model: together_ai/deepseek-ai/DeepSeek-V3 4 | tools: 5 | - TavilySearch: 6 | params: 7 | include_raw: false -------------------------------------------------------------------------------- /benchmark/benchmark_configs/smolagent_config_llama4_m.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: smolagents 3 | model: together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 4 | tools: 5 | - TavilySearch: 6 | params: 7 | include_raw: false -------------------------------------------------------------------------------- /benchmark/benchmark_configs/smolagent_config_qwen2p5.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: smolagents 3 | model: together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo 4 | tools: 5 | - TavilySearch: 6 | params: 7 | include_raw: false -------------------------------------------------------------------------------- /benchmark/scoring.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import concurrent.futures 3 | import json 4 | import os 5 | import time 6 | from typing import Any, Tuple 7 | 8 | from datasets import load_dataset 9 | from huggingface_hub import login 10 | 11 | from libs.utils.agent_factory import create_agent 12 | from libs.utils.evals import Result, ScoringFunction, llm_as_a_judge_scoring 13 | from libs.utils.log import AgentLogger 14 | 15 | logging = AgentLogger() 16 | 17 | 18 | def authenticate_huggingface(): 19 | """Authenticate with Hugging Face Hub using token from environment variable.""" 20 | token = os.getenv("HUGGINGFACE_TOKEN") 21 | if not token: 22 | raise ValueError( 23 | "HUGGINGFACE_TOKEN environment variable not set. " 24 | "Please set it with your token from https://huggingface.co/settings/tokens" 25 | ) 26 | 27 | try: 28 | login(token=token) 29 | print("Successfully authenticated with Hugging Face Hub") 30 | except Exception as e: 31 | raise RuntimeError( 32 | f"Failed to authenticate with Hugging Face Hub: {str(e)}") 33 | 34 | 35 | def load_questions(dataset_names: list[str] | None = None) -> list[dict[str, str]]: 36 | """ 37 | Load questions from the specified Hugging Face dataset configurations. 38 | 39 | Args: 40 | dataset_names: List of dataset configurations to load 41 | Options: 42 | "smolagents:simpleqa", 43 | "hotpotqa", 44 | "simpleqa", 45 | "together-search-bench" 46 | If None, all available configurations except hotpotqa will be loaded 47 | 48 | Returns: 49 | List of question-answer pairs 50 | """ 51 | if dataset_names is None: 52 | dataset_names = ["smolagents:simpleqa"] 53 | 54 | all_questions = [] 55 | 56 | # Authenticate with Hugging Face Hub (once and for all) 57 | authenticate_huggingface() 58 | 59 | for dataset_name in dataset_names: 60 | print(f"Loading dataset: {dataset_name}") 61 | 62 | try: 63 | if dataset_name == "together-search-bench": 64 | # Load Together-Search-Bench dataset 65 | dataset_path = "togethercomputer/together-search-bench" 66 | ds = load_dataset(dataset_path) 67 | if "test" in ds: 68 | split_data = ds["test"] 69 | else: 70 | print( 71 | f"No 'test' split found in dataset at {dataset_path}") 72 | continue 73 | 74 | for i in range(len(split_data)): 75 | item = split_data[i] 76 | question_data = { 77 | "question": item["question"], 78 | "answer": item["answer"], 79 | "dataset": item.get("dataset", "together-search-bench"), 80 | } 81 | all_questions.append(question_data) 82 | 83 | print( 84 | f"Loaded {len(split_data)} questions from together-search-bench dataset") 85 | continue 86 | 87 | elif dataset_name == "hotpotqa": 88 | # Load HotpotQA dataset (using distractor version for validation) 89 | ds = load_dataset("hotpotqa/hotpot_qa", 90 | "distractor", trust_remote_code=True) 91 | split_name = "validation" 92 | elif dataset_name == "simpleqa": 93 | ds = load_dataset("basicv8vc/SimpleQA") 94 | split_name = "test" 95 | else: 96 | # Strip "smolagents:" prefix when loading the dataset 97 | actual_dataset = dataset_name.split(":")[-1] 98 | ds = load_dataset("smolagents/benchmark-v1", actual_dataset) 99 | split_name = "test" 100 | 101 | except Exception as e: 102 | print(f"Failed to load dataset {dataset_name}: {str(e)}") 103 | continue # Skip this dataset if it fails to load 104 | 105 | print(f"Dataset structure for {dataset_name}: {ds}") 106 | print(f"Available splits: {list(ds)}") 107 | 108 | split_data = ds[split_name] # type: ignore 109 | 110 | for i in range(len(split_data)): 111 | item = split_data[i] 112 | 113 | if dataset_name == "hotpotqa": 114 | # we remove questions that are easy or medium (if any) just to reduce the number of questions 115 | if item["level"] != "hard": 116 | continue 117 | 118 | question_data = { 119 | "question": item["question"], 120 | "answer": item["answer"], 121 | "dataset": dataset_name, 122 | } 123 | elif dataset_name == "simpleqa": 124 | # Handle SimpleQA dataset format 125 | question_data = { 126 | "question": item["problem"], 127 | "answer": item["answer"], 128 | "dataset": dataset_name, 129 | } 130 | else: 131 | question_data = { 132 | "question": item["question"], 133 | "answer": item["true_answer"], 134 | "dataset": dataset_name, 135 | } 136 | 137 | all_questions.append(question_data) 138 | 139 | print(f"Loaded {len(all_questions)} questions in total") 140 | return all_questions 141 | 142 | 143 | def process_single_question( 144 | question_data: dict[str, str], 145 | idx: int, 146 | total: int, 147 | callback: ScoringFunction, 148 | agent_config: str = "../configs/base_llm_config.yaml", 149 | ) -> dict[str, Any]: 150 | """ 151 | Process a single benchmark question with the agent. 152 | 153 | Args: 154 | question_data: Dictionary containing question and answer 155 | idx: Index of the current question 156 | total: Total number of questions 157 | model: LLM model to use 158 | max_steps: Maximum steps for the agent 159 | callback: Optional callback function for evaluation 160 | agent_config: Path to the agent config (default: ../configs/base_llm_config.yaml) 161 | 162 | Returns: 163 | Dictionary with question, answers and evaluation results 164 | """ 165 | question = question_data["question"] 166 | correct_answer = question_data["answer"] 167 | 168 | question = ( 169 | f"Question: {question}" 170 | ) 171 | 172 | print(f"Running question {idx+1}/{total}") 173 | 174 | agent = create_agent(agent_config) 175 | 176 | agent_answer = agent(goal=question) 177 | 178 | result = Result(question=question, agent_answer=agent_answer, 179 | correct_answer=correct_answer) 180 | 181 | evaluation = callback(result) 182 | 183 | single_benchmark_result = { 184 | "question": question, 185 | "correct_answer": correct_answer, 186 | "agent_answer": agent_answer, 187 | "evaluation": evaluation, 188 | "metadata": {k: v for k, v in question_data.items() if k not in ["question", "answer"]}, 189 | } 190 | 191 | return single_benchmark_result 192 | 193 | 194 | def run_benchmark( 195 | questions: list[dict[str, str]], 196 | callback: ScoringFunction, 197 | agent_config: str = "../configs/base_llm_config.yaml", 198 | max_workers: int = 2, 199 | ) -> Tuple[float, list[dict[str, Any]]]: 200 | """ 201 | Run the benchmark on a list of questions concurrently. 202 | 203 | Args: 204 | questions: List of question-answer pairs 205 | callback: Function to evaluate agent answers against ground truth 206 | model: LLM model to use for the agent 207 | max_steps: Maximum number of steps the agent can take 208 | max_workers: Number of concurrent threads to use 209 | agent_type: Type of agent to create (default: research_agent) 210 | 211 | Returns: 212 | Tuple of (accuracy score, detailed results) 213 | """ 214 | results = [] 215 | total_questions = len(questions) 216 | details = [] 217 | 218 | # Use ThreadPoolExecutor to run questions concurrently 219 | with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: 220 | # Create a list of future objects 221 | future_to_idx = { 222 | executor.submit(process_single_question, question_data, idx, total_questions, callback, agent_config): idx 223 | for idx, question_data in enumerate(questions) 224 | } 225 | 226 | # Process results as they complete 227 | for future in concurrent.futures.as_completed(future_to_idx): 228 | idx = future_to_idx[future] 229 | try: 230 | result = future.result() 231 | results.append(result["evaluation"]) 232 | details.append(result) 233 | print(f"Completed question {idx+1}/{total_questions}") 234 | except Exception as exc: 235 | import traceback 236 | traceback.print_exc() 237 | print(f"Question {idx+1} generated an exception: {exc}") 238 | results.append(0) 239 | details.append({"question": questions[idx]["question"], "agent_answer": str( 240 | exc), "evaluation": 0}) 241 | 242 | return sum(results) / len(results), details 243 | 244 | 245 | def main(): 246 | """ 247 | Main function to run the benchmark. 248 | """ 249 | 250 | # Set up argument parser 251 | parser = argparse.ArgumentParser( 252 | description="Run scoring with benchmarking options") 253 | parser.add_argument( 254 | "--datasets", 255 | nargs="+", 256 | choices=["smolagents:simpleqa", "hotpotqa", 257 | "simpleqa", "together-search-bench"], 258 | help="Specific datasets to load (default: all)", 259 | default=["together-search-bench"], 260 | ) 261 | parser.add_argument("--limit", type=int, default=None, 262 | help="Limit number of questions to process (default: all)") 263 | parser.add_argument( 264 | "--agent-config", 265 | default="../configs/base_llm_config.yaml", 266 | help="Agent config to use (default: ../configs/base_llm_config.yaml)", 267 | ) 268 | parser.add_argument( 269 | "--max-workers", 270 | type=int, 271 | default=1, 272 | help="Number of concurrent workers (default: 1)", 273 | ) 274 | 275 | args = parser.parse_args() 276 | 277 | questions = load_questions(args.datasets) 278 | 279 | if args.limit is not None: 280 | questions = questions[: args.limit] 281 | print(f"Limited to {len(questions)} questions") 282 | 283 | results, details = run_benchmark( 284 | questions, 285 | callback=llm_as_a_judge_scoring, 286 | max_workers=args.max_workers, 287 | agent_config=args.agent_config, 288 | ) 289 | 290 | print(f"Completed benchmark with {results} accuracy") 291 | 292 | benchmark_results_dir = os.path.join(os.path.dirname( 293 | os.path.dirname(__file__)), "benchmark", "benchmark_results") 294 | os.makedirs(benchmark_results_dir, exist_ok=True) 295 | 296 | output_file = os.path.join( 297 | benchmark_results_dir, 298 | f"benchmark_{'_'.join(args.datasets)}_{time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime())}.json", 299 | ) 300 | 301 | output_data = { 302 | "metadata": { 303 | "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 304 | "datasets": args.datasets, 305 | "agent_config": args.agent_config, 306 | "scoring_method": "llm_as_a_judge_scoring", 307 | "sample_count": len(questions), 308 | }, 309 | "overall_accuracy": results, 310 | "question_details": details, 311 | } 312 | 313 | with open(output_file, "w") as f: 314 | json.dump(output_data, f, indent=2) 315 | 316 | return results 317 | 318 | 319 | if __name__ == "__main__": 320 | main() 321 | -------------------------------------------------------------------------------- /benchmark/summary.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import glob 3 | import json 4 | import os 5 | 6 | import pandas as pd 7 | import yaml 8 | 9 | 10 | def process_benchmark_results(directory_path): 11 | # Get all JSON files in the directory 12 | json_files = glob.glob(os.path.join(directory_path, "*.json")) 13 | 14 | # Prepare CSV output 15 | csv_file = os.path.join(directory_path, "benchmark_stats.csv") 16 | 17 | results = [] 18 | 19 | for file_path in json_files: 20 | try: 21 | with open(file_path, 'r') as f: 22 | data = json.load(f) 23 | 24 | metadata = data.get('metadata', {}) 25 | # Get the config information 26 | config_file_path = metadata.get('agent_config', {}) 27 | try: 28 | with open(config_file_path, 'r') as config_file: 29 | config_data = yaml.safe_load(config_file) 30 | 31 | agent = config_data.get('agent', {}) 32 | # Extract config values 33 | model = agent.get('model', 'unknown') 34 | if agent.get('type') == 'base_llm': 35 | agent_type = 'base_llm' 36 | model = agent.get('model', 'unknown') 37 | elif agent.get('type') == 'deep_researcher': 38 | if '1step' in config_file_path: 39 | agent_type = 'deep_researcher_1step' 40 | else: 41 | agent_type = 'deep_researcher_3step' 42 | model = agent.get('answer_model', 'unknown') 43 | elif agent.get('type') == 'smolagents': 44 | agent_type = 'smolagents' 45 | model = agent.get('model', 'unknown') 46 | elif agent.get('type') == 'langchain_deep_researcher': 47 | agent_type = 'langchain_deep_researcher' 48 | model = agent.get('writer_model', 'unknown') 49 | else: 50 | agent_type = 'unknown' 51 | model = 'unknown' 52 | 53 | except Exception as e: 54 | print(f"Error reading config file {config_file_path}: {e}") 55 | 56 | # Process the results 57 | total_count = 0 58 | correct_count = 0 59 | invalid_count = 0 60 | 61 | for result in data.get('question_details', []): 62 | evaluation = result.get('evaluation') 63 | 64 | # Skip entries with no evaluation 65 | if evaluation is None: 66 | continue 67 | 68 | total_count += 1 69 | 70 | # Count correct answers 71 | if evaluation == True: 72 | correct_count += 1 73 | 74 | # Count invalid answers 75 | if str(evaluation) == '0': 76 | invalid_count += 1 77 | 78 | # Calculate metrics 79 | overall_accuracy = correct_count / total_count if total_count > 0 else 0 80 | corrected_accuracy = correct_count / (total_count - invalid_count) if (total_count - invalid_count) > 0 else 0 81 | 82 | results.append({ 83 | 'agent_type': agent_type, 84 | 'model': model, 85 | 'valid_count': total_count - invalid_count, 86 | 'total_count': total_count, 87 | 'overall_accuracy': overall_accuracy, 88 | 'corrected_accuracy': corrected_accuracy, 89 | 'file': os.path.basename(file_path), 90 | }) 91 | 92 | except Exception as e: 93 | print(f"Error processing {file_path}: {e}") 94 | 95 | # Sort results by agent_type and model 96 | results.sort(key=lambda x: (x['agent_type'], x['model'])) 97 | 98 | # Write to CSV 99 | if results: 100 | fields = [ 101 | 'agent_type', 'model', 'valid_count', 'total_count', 102 | 'overall_accuracy', 'corrected_accuracy', 'file' 103 | ] 104 | 105 | with open(csv_file, 'w', newline='') as f: 106 | writer = csv.DictWriter(f, fieldnames=fields) 107 | writer.writeheader() 108 | writer.writerows(results) 109 | 110 | print(f"Results saved to {csv_file}") 111 | else: 112 | print("No results to save") 113 | 114 | 115 | def analyze_benchmark_results(csv_path): 116 | # Read the CSV file 117 | df = pd.read_csv(csv_path) 118 | 119 | # Group by agent_type and model, and calculate mean and std for relevant metrics 120 | grouped_stats = df.groupby(['agent_type', 'model']).agg({ 121 | 'valid_count': ['mean', 'std'], 122 | 'total_count': ['mean', 'std'], 123 | 'overall_accuracy': ['mean', 'std'], 124 | 'corrected_accuracy': ['mean', 'std'] 125 | }).reset_index() 126 | 127 | # Flatten the multi-level column names 128 | grouped_stats.columns = [ 129 | '_'.join(col).strip('_') for col in grouped_stats.columns.values 130 | ] 131 | 132 | # Create output file path 133 | directory = os.path.dirname(csv_path) 134 | output_path = os.path.join(directory, "benchmark_summary.csv") 135 | 136 | # Write to CSV 137 | grouped_stats.to_csv(output_path, index=False) 138 | print(f"Statistics saved to {output_path}") 139 | 140 | # Print the summary for quick review 141 | print("\nSummary Statistics:") 142 | for _, row in grouped_stats.iterrows(): 143 | print(f"\nAgent Type: {row['agent_type']}") 144 | print(f"Model: {row['model']}") 145 | print(f"Overall Accuracy: {row['overall_accuracy_mean']:.4f} ± {row['overall_accuracy_std']:.4f}") 146 | print(f"Corrected Accuracy: {row['corrected_accuracy_mean']:.4f} ± {row['corrected_accuracy_std']:.4f}") 147 | print(f"Valid Count: {row['valid_count_mean']:.1f} ± {row['valid_count_std']:.1f}") 148 | print("-" * 50) 149 | 150 | return grouped_stats 151 | 152 | 153 | if __name__ == "__main__": 154 | directory_path = "./benchmark_results" 155 | process_benchmark_results(directory_path) 156 | analyze_benchmark_results(os.path.join(directory_path, "benchmark_stats.csv")) 157 | -------------------------------------------------------------------------------- /config.env.dev: -------------------------------------------------------------------------------- 1 | TOGETHER_API_KEY= 2 | TAVILY_API_KEY= 3 | HUGGINGFACE_TOKEN= 4 | -------------------------------------------------------------------------------- /configs/base_llm_config.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: base_llm 3 | model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 4 | -------------------------------------------------------------------------------- /configs/open_deep_researcher_config.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: deep_researcher 3 | max_steps: 2 4 | max_queries: 5 5 | max_sources: 40 6 | max_completion_tokens: 8192 7 | user_timeout: 30.0 8 | interactive: true 9 | use_cache: true 10 | remove_thinking_tags: true 11 | debug_file_path: "" 12 | planning_model: "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo" 13 | summarization_model: "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo" 14 | json_model: "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo" 15 | answer_model: "together_ai/deepseek-ai/DeepSeek-V3" 16 | -------------------------------------------------------------------------------- /configs/smolagent_config.yaml: -------------------------------------------------------------------------------- 1 | agent: 2 | type: smolagents 3 | model: together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo 4 | tools: 5 | - TavilySearch: 6 | params: 7 | include_raw: false -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "together-open-deep-research" 3 | version = "0.1.0" 4 | authors = [{ name = "Your Name", email = "your.email@example.com" }] 5 | description = "Together Open Deep Research" 6 | readme = "README.md" 7 | requires-python = ">=3.11" 8 | dependencies = [ 9 | "pydantic", 10 | "litellm", 11 | "datasets", 12 | "commonmark", 13 | "xhtml2pdf", 14 | "pypandoc", 15 | "pandoc", 16 | "filelock", 17 | "together>=1.3.5", 18 | "pandas>=1.5.0", 19 | "tavily-python>=0.5.1", 20 | "tenacity>=9.0.0", 21 | "pymdown-extensions>=10.14.3", 22 | "smolagents>=1.13.0", 23 | "langgraph>=0.3.29", 24 | "langchain-together>=0.3.0", 25 | "langchain>=0.3.23", 26 | "gradio>=5.25.0", 27 | ] 28 | 29 | [project.optional-dependencies] 30 | dev = [] 31 | with-open-deep-research = ["open-deep-research"] 32 | 33 | [build-system] 34 | requires = ["hatchling"] 35 | build-backend = "hatchling.build" 36 | 37 | [tool.hatch.build.targets.wheel] 38 | packages = ["apollo"] 39 | 40 | [tool.ruff] 41 | line-length = 130 42 | target-version = "py311" 43 | 44 | [tool.ruff.lint] 45 | select = ["E", "F", "I"] 46 | fixable = ["ALL"] 47 | unfixable = [] 48 | 49 | [tool.ruff.format] 50 | quote-style = "double" 51 | line-ending = "auto" 52 | 53 | [tool.ruff.lint.per-file-ignores] 54 | "tests/*" = ["E501"] 55 | "src/artemis/tests/*" = ["E501"] 56 | [tool.pytest.ini_options] 57 | pythonpath = ["."] 58 | 59 | [tool.poetry.dependencies] 60 | pytest = "^7.0.0" 61 | pytest-asyncio = "^0.18.0" 62 | 63 | [dependency-groups] 64 | dev = ["ruff>=0.11.2"] 65 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pydantic 2 | litellm 3 | datasets 4 | commonmark 5 | xhtml2pdf 6 | pypandoc 7 | pandoc 8 | filelock 9 | together>=1.3.5 10 | pandas>=1.5.0 11 | tavily-python>=0.5.1 12 | tenacity>=9.0.0 13 | pymdown-extensions>=10.14.3 14 | smolagents>=1.13.0 15 | pytest>=7.0.0 16 | pytest-asyncio>=0.18.0 17 | ruff>=0.11.2 -------------------------------------------------------------------------------- /src/libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/togethercomputer/open_deep_research/66e43b47bfb8722ef9aad38139453922d1feef2a/src/libs/__init__.py -------------------------------------------------------------------------------- /src/libs/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/togethercomputer/open_deep_research/66e43b47bfb8722ef9aad38139453922d1feef2a/src/libs/utils/__init__.py -------------------------------------------------------------------------------- /src/libs/utils/agent_factory.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import yaml 4 | 5 | from together_open_deep_research import DeepResearcher 6 | 7 | 8 | def load_config(config_path: str): 9 | with open(config_path, "r") as file: 10 | return yaml.safe_load(file) 11 | 12 | 13 | def create_agent(config: str, return_instance: bool = False) -> Any: 14 | """ 15 | Factory method to create an agent with specified configuration. 16 | """ 17 | 18 | config_dict = load_config(config) 19 | 20 | agent_config = config_dict.get("agent") 21 | agent_type = agent_config.pop("type") 22 | 23 | if agent_type == "deep_researcher": 24 | agent_config["budget"] = agent_config.pop("max_steps") 25 | researcher = DeepResearcher(**agent_config) 26 | 27 | if return_instance: 28 | return researcher 29 | 30 | def research_wrapper(goal: str): 31 | import asyncio 32 | 33 | return asyncio.run(researcher.research_topic(goal)) 34 | 35 | return research_wrapper 36 | 37 | elif agent_type == "langchain_deep_researcher": 38 | try: 39 | import uuid 40 | 41 | from langgraph.checkpoint.memory import MemorySaver 42 | from open_deep_research.graph import builder 43 | except ImportError as e: 44 | raise ImportError( 45 | f"Failed to import required modules for langchain deep researcher: {e}. Make sure langgraph and open_deep_research are installed. Also make sure that the benchmark directory is in your path. Also, you might need to install the with-open-deep-research extra dependencies (see README.md)." 46 | ) 47 | 48 | memory = MemorySaver() 49 | graph = builder.compile(checkpointer=memory) 50 | 51 | REPORT_STRUCTURE = """Use this structure to create a report on the user-provided topic: 52 | 53 | 1. Introduction (no research needed) 54 | - Brief overview of the topic area 55 | 56 | 2. Main Body Sections: 57 | - Each section should focus on a sub-topic of the user-provided topic 58 | 59 | 3. Conclusion 60 | - Aim for 1 structural element (either a list of table) that distills the main body sections 61 | - Provide a concise summary of the report""" 62 | 63 | # Extract configuration parameters 64 | search_api = agent_config.get("search_api", "tavily") 65 | planner_provider = agent_config.get("planner_provider") 66 | planner_model = agent_config.get("planner_model") 67 | writer_provider = agent_config.get("writer_provider") 68 | writer_model = agent_config.get("writer_model") 69 | max_search_depth = agent_config.get("max_search_depth", 3) 70 | 71 | def langchain_wrapper(goal: str): 72 | import asyncio 73 | 74 | thread = { 75 | "configurable": { 76 | "thread_id": str(uuid.uuid4()), 77 | "search_api": search_api, 78 | "planner_provider": planner_provider, 79 | "planner_model": planner_model, 80 | "writer_provider": writer_provider, 81 | "writer_model": writer_model, 82 | "max_search_depth": max_search_depth, 83 | "report_structure": REPORT_STRUCTURE 84 | } 85 | } 86 | 87 | # NOTE: add research prompt to the goal for robust benchmarking purposes 88 | goal=goal + " You must perform in-depth research to answer the question." 89 | 90 | results = [] 91 | 92 | async def run_graph(): 93 | async for event in graph.astream({"topic": goal}, thread, stream_mode="updates"): 94 | results.append(event) 95 | 96 | from langgraph.types import Command 97 | async for event in graph.astream(Command(resume=True), thread, stream_mode="updates"): 98 | results.append(event) 99 | 100 | final_state = graph.get_state(thread) 101 | report = final_state.values.get('final_report') 102 | 103 | return report 104 | 105 | return asyncio.run(run_graph()) 106 | 107 | return langchain_wrapper 108 | 109 | elif agent_type == "base_llm": 110 | model = agent_config.get("model") 111 | 112 | def base_llm_wrapper(goal: str): 113 | import asyncio 114 | 115 | from libs.utils.llms import asingle_shot_llm_call 116 | 117 | system_prompt = ( 118 | "You are a helpful AI assistant. Answer the user's question accurately and concisely. " 119 | "Reason through the problem step by step." 120 | ) 121 | 122 | async def get_answer(): 123 | return await asingle_shot_llm_call(model=model, system_prompt=system_prompt, message=goal) 124 | 125 | return asyncio.run(get_answer()) 126 | 127 | return base_llm_wrapper 128 | 129 | elif agent_type == "smolagents": 130 | try: 131 | from baselines.smolagents_baseline import SmolAgentsTavilySearchTool 132 | from smolagents import CodeAgent, LiteLLMModel 133 | from smolagents.default_tools import VisitWebpageTool 134 | except ImportError as e: 135 | raise ImportError( 136 | f"Failed to import required modules for smolagents: {e}. Make sure the benchmark directory is in your path." 137 | ) 138 | 139 | model_id = agent_config.get( 140 | "model", "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo") 141 | 142 | import os 143 | 144 | api_key = os.environ.get("TOGETHER_API_KEY") 145 | if not api_key: 146 | raise ValueError( 147 | "API key not provided and TOGETHER_API_KEY not found in environment") 148 | 149 | model = LiteLLMModel(model_id=model_id, api_key=api_key) 150 | 151 | tools = [] 152 | 153 | if "tools" in agent_config: 154 | tool_configs = agent_config.pop("tools") 155 | for item in tool_configs: 156 | if isinstance(item, str) and item == "TavilySearch": 157 | tools.append(SmolAgentsTavilySearchTool()) 158 | elif isinstance(item, dict): 159 | tool_name = list(item.keys())[0] 160 | if tool_name == "TavilySearch": 161 | params = item.get(tool_name, {}).get("params", {}) 162 | tools.append(SmolAgentsTavilySearchTool(**params)) 163 | 164 | tools.append(VisitWebpageTool()) 165 | agent = CodeAgent( 166 | tools=tools, 167 | model=model, 168 | additional_authorized_imports=["numpy", "sympy"], 169 | max_steps=10, 170 | ) 171 | def smolagents_wrapper(goal: str): 172 | return agent.run(goal) 173 | 174 | return smolagents_wrapper 175 | 176 | else: 177 | raise ValueError(f"Unknown agent type: {agent_type}") 178 | -------------------------------------------------------------------------------- /src/libs/utils/data_types.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import select 3 | import sys 4 | from dataclasses import dataclass 5 | 6 | from pydantic import BaseModel, Field 7 | 8 | from libs.utils.tavily_search import SearchResult, SearchResults 9 | 10 | 11 | class ResearchPlan(BaseModel): 12 | queries: list[str] = Field( 13 | description="A list of search queries to thoroughly research the topic") 14 | 15 | 16 | class SourceList(BaseModel): 17 | sources: list[int] = Field( 18 | description="A list of source numbers from the search results") 19 | 20 | 21 | class UserCommunication: 22 | """Handles user input/output interactions with timeout functionality.""" 23 | 24 | @staticmethod 25 | async def get_input_with_timeout(prompt: str, timeout: float = 30.0) -> str: 26 | """ 27 | Get user input with a timeout. 28 | Returns empty string if timeout occurs or no input is provided. 29 | 30 | Args: 31 | prompt: The prompt to display to the user 32 | timeout: Number of seconds to wait for user input (default: 30.0) 33 | 34 | Returns: 35 | str: User input or empty string if timeout occurs 36 | """ 37 | print(prompt, end="", flush=True) 38 | 39 | # Different implementation for Windows vs Unix-like systems 40 | if sys.platform == "win32": 41 | # Windows implementation 42 | try: 43 | # Run input in an executor to make it async 44 | loop = asyncio.get_event_loop() 45 | user_input = await asyncio.wait_for(loop.run_in_executor(None, input), timeout) 46 | return user_input.strip() 47 | except TimeoutError: 48 | print("\nTimeout reached, continuing...") 49 | return "" 50 | else: 51 | # Unix-like implementation 52 | i, _, _ = select.select([sys.stdin], [], [], timeout) 53 | if i: 54 | return sys.stdin.readline().strip() 55 | else: 56 | print("\nTimeout reached, continuing...") 57 | return "" 58 | 59 | 60 | @dataclass(frozen=True, kw_only=True) 61 | class DeepResearchResult(SearchResult): 62 | """Wrapper on top of SearchResults to adapt it to the DeepResearch. 63 | 64 | This class extends the basic SearchResult by adding a filtered version of the raw content 65 | that has been processed and refined for the specific research context. It maintains 66 | the original search result while providing additional research-specific information. 67 | 68 | Attributes: 69 | filtered_raw_content: A processed version of the raw content that has been filtered 70 | and refined for relevance to the research topic 71 | """ 72 | 73 | filtered_raw_content: str 74 | 75 | def __str__(self): 76 | return f"Title: {self.title}\n" f"Link: {self.link}\n" f"Refined Content: {self.filtered_raw_content[:10000]}" 77 | 78 | def short_str(self): 79 | return f"Title: {self.title}\nLink: {self.link}\nRaw Content: {self.content[:10000]}" 80 | 81 | 82 | @dataclass(frozen=True, kw_only=True) 83 | class DeepResearchResults(SearchResults): 84 | results: list[DeepResearchResult] 85 | 86 | def __add__(self, other): 87 | return DeepResearchResults(results=self.results + other.results) 88 | 89 | def dedup(self): 90 | def deduplicate_by_link(results): 91 | seen_links = set() 92 | unique_results = [] 93 | 94 | for result in results: 95 | if result.link not in seen_links: 96 | seen_links.add(result.link) 97 | unique_results.append(result) 98 | 99 | return unique_results 100 | 101 | return DeepResearchResults(results=deduplicate_by_link(self.results)) 102 | -------------------------------------------------------------------------------- /src/libs/utils/evals.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Callable 3 | 4 | import tenacity 5 | from litellm import completion 6 | 7 | 8 | @dataclass 9 | class Result: 10 | question: str 11 | agent_answer: str 12 | correct_answer: str 13 | 14 | 15 | ScoringFunction = Callable[[Result], bool] 16 | 17 | 18 | @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_exponential(multiplier=1, min=4, max=15)) 19 | def llm_as_a_judge_scoring(result: Result) -> bool: 20 | prompt = f""" 21 | Given the following question and answer, evaluate the answer against the correct answer: 22 | 23 | 24 | {result.question} 25 | 26 | 27 | 28 | {result.agent_answer} 29 | 30 | 31 | 32 | {result.correct_answer} 33 | 34 | 35 | Note that the agent answer might be a long text containing a lot of information or it might be a short answer. 36 | 37 | You should read the entire text and think if the agent answers the question somewhere 38 | in the text. You should try to be flexible with the answer but careful. 39 | 40 | For example, answering with names instead of name and surname is fine. 41 | 42 | The important thing is that the answer of the agent either contains the correct answer or is equal to the correct answer. 43 | 44 | 45 | The agent answer is correct because I can read that .... 46 | 47 | 48 | 49 | 1 50 | 51 | 52 | Otherwise, return 53 | 54 | 55 | The agent answer is incorrect because there is ... 56 | 57 | 58 | 59 | 0 60 | 61 | 62 | """ 63 | 64 | messages = [ 65 | {"role": "system", "content": "You are an helpful assistant that returns a number between 0 and 1."}, 66 | {"role": "user", "content": prompt}, 67 | ] 68 | answer = ( 69 | completion( 70 | model="together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", 71 | messages=messages, 72 | max_tokens=1000, 73 | temperature=0.0, 74 | ) 75 | .choices[0] # type: ignore 76 | .message["content"] # type: ignore 77 | ) 78 | 79 | return bool(int(answer.split("")[1].split("")[0].strip())) 80 | -------------------------------------------------------------------------------- /src/libs/utils/generation.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Optional 3 | 4 | import pypandoc 5 | from pymdownx.superfences import SuperFencesCodeExtension 6 | 7 | 8 | def generate_pdf(answer: str, filename: str = "research_report.pdf"): 9 | """ 10 | Generate a PDF report from the markdown formatted research answer. 11 | Uses the first line of the answer as the title. 12 | 13 | Attempts to use pypandoc first, with fallbacks to: 14 | 1. commonmark + xhtml2pdf if pypandoc fails 15 | 2. A clear error message if all methods fail 16 | """ 17 | # Extract the first line as title and rest as content 18 | lines = answer.split("\n") 19 | title = lines[0].strip("# ") # Remove any markdown heading characters 20 | content = "\n".join(lines[1:]).strip() # Get the rest of the content 21 | 22 | # Remove mermaid diagram blocks for pdf rendering 23 | content = re.sub(r"\*Figure.*?\*.*?```mermaid.*?```|```mermaid.*?```.*?\*Figure.*?\*", "\n", content, flags=re.DOTALL) 24 | content = content.strip() # Remove any extra whitespace that might remain 25 | 26 | disclaimer = ( 27 | "Disclaimer: This AI-generated report may contain hallucinations, bias, or inaccuracies. Always verify information " 28 | "from independent sources before making decisions based on this content." 29 | ) 30 | content = f"{disclaimer}\n\n{content}" 31 | 32 | # Create markdown with the extracted title - properly quote the title for YAML 33 | markdown_with_title = f'---\ntitle: "{title}"\n---\n\n{content}' 34 | 35 | # Try pypandoc first 36 | try: 37 | pdf_options = [ 38 | "--pdf-engine=pdflatex", 39 | "--variable", 40 | "urlcolor=blue", 41 | "--variable", 42 | "colorlinks=true", 43 | "--variable", 44 | "linkcolor=blue", 45 | "--variable", 46 | "geometry:margin=1in", 47 | ] 48 | 49 | pypandoc.convert_text(markdown_with_title, "pdf", format="markdown", outputfile=filename, extra_args=pdf_options) 50 | print(f"PDF generated successfully using pypandoc: {filename}") 51 | return 52 | except Exception as pandoc_error: 53 | print(f"Pypandoc conversion failed: {str(pandoc_error)}") 54 | print("Trying alternative conversion methods...") 55 | 56 | # Try commonmark + xhtml2pdf as a backup 57 | try: 58 | import commonmark 59 | from xhtml2pdf import pisa 60 | 61 | # Convert markdown to HTML using commonmark 62 | html_content = commonmark.commonmark(content) 63 | 64 | # Add basic HTML structure with the title 65 | html_doc = f""" 66 | 67 | 68 | 69 | {title} 70 | 71 | 78 | 79 | 80 |

{title}

81 | {html_content} 82 | 83 | 84 | """ 85 | 86 | # Convert HTML to PDF using xhtml2pdf 87 | with open(filename, "w+b") as pdf_file: 88 | pisa_status = pisa.CreatePDF(html_doc, dest=pdf_file) 89 | 90 | if pisa_status.err: 91 | raise Exception("xhtml2pdf encountered errors") 92 | else: 93 | print(f"PDF generated successfully using commonmark + xhtml2pdf: {filename}") 94 | return 95 | 96 | except Exception as alt_error: 97 | error_msg = f"All PDF conversion methods failed. Last error: {str(alt_error)}" 98 | print(error_msg) 99 | raise Exception(error_msg) 100 | 101 | 102 | def generate_html( 103 | markdown_content: str, toc_image_url: Optional[str] = None, title: Optional[str] = None, base64_audio: Optional[str] = None 104 | ) -> str: 105 | """ 106 | Generate an HTML report from markdown formatted content. 107 | Returns the generated HTML as a string. 108 | """ 109 | try: 110 | import datetime 111 | 112 | import markdown 113 | 114 | year = datetime.datetime.now().year 115 | month = datetime.datetime.now().strftime("%B") 116 | day = datetime.datetime.now().day 117 | 118 | # Extract title from first line if not provided 119 | lines = markdown_content.split("\n") 120 | if not title: 121 | # Remove any markdown heading characters 122 | title = lines[0].strip("# ") 123 | 124 | content = markdown_content 125 | 126 | # Convert markdown to HTML with table support 127 | html_body = markdown.markdown( 128 | content, 129 | extensions=[ 130 | "tables", 131 | "fenced_code", 132 | SuperFencesCodeExtension(custom_fences=[{"name": "mermaid", "class": "mermaid", "format": fence_mermaid}]), 133 | ], 134 | ) 135 | 136 | # Add mermaid header 137 | mermaid_header = """""" 141 | 142 | # Directly parse HTML to extract headings and build TOC 143 | heading_pattern = re.compile(r'([^<]+)') 144 | toc_items = [] 145 | section_count = 0 146 | subsection_counts = {} 147 | 148 | # First pass: Add IDs to all headings that don't have them 149 | modified_html = html_body 150 | for match in heading_pattern.finditer(html_body): 151 | level = match.group(1) 152 | heading_id = match.group(2) 153 | heading_text = match.group(3) 154 | 155 | # If heading doesn't have an ID, create one and update the HTML 156 | if not heading_id: 157 | heading_id = re.sub(r"[^\w\-]", "-", heading_text.lower()) 158 | heading_id = re.sub(r"-+", "-", heading_id).strip("-") 159 | 160 | # Replace the heading without ID with one that has an ID 161 | original = f"{heading_text}" 162 | replacement = f'{heading_text}' 163 | modified_html = modified_html.replace(original, replacement) 164 | 165 | # Update the HTML body with the added IDs 166 | html_body = modified_html 167 | 168 | # Second pass: Build the TOC items 169 | for match in heading_pattern.finditer(modified_html): 170 | level = match.group(1) 171 | heading_id = match.group(2) or re.sub(r"[^\w\-]", "-", match.group(3).lower()) 172 | heading_text = match.group(3) 173 | 174 | if level == "2": # Main section (h2) 175 | section_count += 1 176 | subsection_counts[section_count] = 0 177 | toc_items.append(f'{section_count}. {heading_text}') 178 | elif level == "3": # Subsection (h3) 179 | parent_section = section_count 180 | subsection_counts[parent_section] += 1 181 | subsection_num = subsection_counts[parent_section] 182 | toc_items.append( 183 | f'{parent_section}.{subsection_num}. {heading_text}' 184 | ) 185 | 186 | current_date = datetime.datetime.now().strftime("%B %Y") 187 | 188 | # Create a complete HTML document with enhanced styling and structure 189 | html_doc = f""" 190 | 191 | 192 | 193 | 194 | 195 | {title} 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | {mermaid_header} 205 | 380 | 381 | 382 |
383 |
384 |
385 | Research Report | Published: {current_date} 386 |
387 | {f'Report Header Image' if toc_image_url else ''} 388 |

{title}

389 |
390 |

391 | Disclaimer: 392 | This AI-generated report may contain hallucinations, bias, or inaccuracies. 393 | Always verify information from independent sources before making 394 | decisions based on this content. 395 |

396 |
397 | {f'
' 398 | f'' 399 | f'
' if base64_audio else ''} 403 |
404 | 412 |
413 |
Table of Contents
414 | 417 |
418 | {html_body} 419 |
420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 451 | 452 | 453 | """ 454 | 455 | return html_doc 456 | 457 | except Exception as error: 458 | error_msg = f"HTML conversion failed: {str(error)}" 459 | raise Exception(error_msg) 460 | 461 | 462 | def save_and_generate_html( 463 | markdown_content: str, 464 | filename: Optional[str] = None, 465 | toc_image_url: Optional[str] = None, 466 | title: Optional[str] = None, 467 | base64_audio: Optional[str] = None, 468 | ) -> str: 469 | """ 470 | Generate an HTML report from markdown formatted content and save it to a file if filename is provided. 471 | Returns the generated HTML. 472 | """ 473 | # Generate the HTML content 474 | html_doc = generate_html(markdown_content, toc_image_url, title, base64_audio) 475 | 476 | # Save to file if filename is provided 477 | if filename: 478 | # Ensure the filename has an .html extension 479 | if not filename.lower().endswith(".html"): 480 | filename += ".html" 481 | 482 | with open(filename, "w", encoding="utf-8") as f: 483 | f.write(html_doc) 484 | print(f"HTML report generated successfully: {filename}") 485 | 486 | return html_doc 487 | 488 | 489 | def fence_mermaid(source, language, css_class, options, md, **kwargs): 490 | """Clean and process mermaid code blocks.""" 491 | # Filter out title lines and clean whitespace 492 | cleaned_lines = [line.rstrip() for line in source.split("\n") if "title" not in line] 493 | cleaned_source = "\n".join(cleaned_lines).strip() 494 | 495 | return f'
{cleaned_source}
' 496 | -------------------------------------------------------------------------------- /src/libs/utils/llms.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | import tenacity 4 | from litellm import acompletion, completion 5 | from together import Together 6 | 7 | 8 | @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_exponential(multiplier=1, min=4, max=15)) 9 | async def asingle_shot_llm_call( 10 | model: str, 11 | system_prompt: str, 12 | message: str, 13 | response_format: Optional[dict[str, str | dict[str, Any]]] = None, 14 | max_completion_tokens: int | None = None, 15 | ) -> str: 16 | response = await acompletion( 17 | model=model, 18 | messages=[{"role": "system", "content": system_prompt}, 19 | {"role": "user", "content": message}], 20 | temperature=0.0, 21 | response_format=response_format, 22 | # NOTE: max_token is deprecated per OpenAI API docs, use max_completion_tokens instead if possible 23 | # NOTE: max_completion_tokens is not currently supported by Together AI, so we use max_tokens instead 24 | max_tokens=max_completion_tokens, 25 | timeout=600, 26 | ) 27 | return response.choices[0].message["content"] # type: ignore 28 | 29 | 30 | @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_exponential(multiplier=1, min=4, max=15)) 31 | def single_shot_llm_call( 32 | model: str, 33 | system_prompt: str, 34 | message: str, 35 | response_format: Optional[dict[str, str | dict[str, Any]]] = None, 36 | max_completion_tokens: int | None = None, 37 | ) -> str: 38 | response = completion( 39 | model=model, 40 | messages=[{"role": "system", "content": system_prompt}, 41 | {"role": "user", "content": message}], 42 | temperature=0.0, 43 | response_format=response_format, 44 | # NOTE: max_token is deprecated per OpenAI API docs, use max_completion_tokens instead if possible 45 | # NOTE: max_completion_tokens is not currently supported by Together AI, so we use max_tokens instead 46 | max_tokens=max_completion_tokens, 47 | timeout=600, 48 | ) 49 | return response.choices[0].message["content"] # type: ignore 50 | 51 | 52 | 53 | def generate_toc_image(prompt: str, planning_model: str, topic: str) -> str: 54 | """Generate a table of contents image""" 55 | 56 | image_generation_prompt = single_shot_llm_call( 57 | model=planning_model, system_prompt=prompt, message=f"Research Topic: {topic}") 58 | 59 | if image_generation_prompt is None: 60 | raise ValueError("Image generation prompt is None") 61 | 62 | # HERE WE CALL THE TOGETHER API SINCE IT'S AN IMAGE GENERATION REQUEST 63 | client = Together() 64 | imageCompletion = client.images.generate( 65 | model="black-forest-labs/FLUX.1-dev", 66 | width=1024, 67 | height=768, 68 | steps=28, 69 | prompt=image_generation_prompt, 70 | ) 71 | 72 | return imageCompletion.data[0].url # type: ignore 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/libs/utils/log.py: -------------------------------------------------------------------------------- 1 | """Logging utilities for Together Open Deep Research.""" 2 | 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | from typing import Optional, Union 7 | 8 | import litellm 9 | 10 | 11 | class AgentLogger: 12 | """Logger class for agent operations.""" 13 | 14 | def __init__( 15 | self, 16 | name: str = "root", 17 | level: Union[int, str, None] = logging.INFO, 18 | log_file: Optional[Path] = None, 19 | configure_root: bool = False, 20 | ): 21 | self.logger = logging.getLogger(name) 22 | 23 | # Prevent propagation to parent loggers to avoid duplicate messages 24 | self.logger.propagate = False 25 | 26 | # Handle level parameter correctly regardless of type 27 | if isinstance(level, str): 28 | level_value = getattr(logging, level.upper(), logging.INFO) 29 | else: 30 | level_value = level if level is not None else logging.INFO 31 | 32 | self.logger.setLevel(level_value) 33 | 34 | # Clear existing handlers if any 35 | if self.logger.handlers: 36 | self.logger.handlers.clear() 37 | 38 | # Create formatters 39 | console_formatter = logging.Formatter( 40 | "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", style="%", validate=True 41 | ) 42 | file_formatter = logging.Formatter( 43 | "%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s", 44 | datefmt="%Y-%m-%d %H:%M:%S", 45 | style="%", 46 | validate=True, 47 | ) 48 | 49 | # Console handler 50 | console_handler = logging.StreamHandler(sys.stdout) 51 | console_handler.setFormatter(console_formatter) 52 | self.logger.addHandler(console_handler) 53 | 54 | # File handler (optional) 55 | if log_file: 56 | file_handler = logging.FileHandler( 57 | str(log_file)) # Convert Path to str 58 | file_handler.setFormatter(file_formatter) 59 | self.logger.addHandler(file_handler) 60 | 61 | # If configure_root is True, also configure the root logger and suppress noisy loggers 62 | if configure_root: 63 | # Configure root logger 64 | root_logger = logging.getLogger() 65 | root_logger.setLevel(level_value) 66 | 67 | # Clear any existing handlers on root logger 68 | if root_logger.handlers: 69 | root_logger.handlers.clear() 70 | 71 | # Add a handler to root logger 72 | root_handler = logging.StreamHandler(sys.stdout) 73 | root_handler.setFormatter(console_formatter) 74 | root_logger.addHandler(root_handler) 75 | 76 | # Suppress noisy third-party loggers 77 | logging.getLogger("httpx").setLevel(logging.ERROR) 78 | 79 | litellm.suppress_debug_info = True 80 | 81 | # Disable specific litellm loggers 82 | litellm_loggers = ["LiteLLM Proxy", "LiteLLM Router", "LiteLLM"] 83 | for logger_name in litellm_loggers: 84 | logger = logging.getLogger(logger_name) 85 | # Set higher than any standard level 86 | logger.setLevel(logging.CRITICAL + 1) 87 | logger.propagate = False # Also prevent propagation to parent loggers 88 | 89 | def debug(self, msg: str) -> None: 90 | """Log debug message.""" 91 | self.logger.debug(msg) 92 | 93 | def info(self, msg: str) -> None: 94 | """Log info message.""" 95 | self.logger.info(msg) 96 | 97 | def warning(self, msg: str) -> None: 98 | """Log warning message.""" 99 | self.logger.warning(msg) 100 | 101 | def error(self, msg: str) -> None: 102 | """Log error message.""" 103 | self.logger.error(msg) 104 | 105 | def critical(self, msg: str) -> None: 106 | """Log critical message.""" 107 | self.logger.critical(msg) 108 | -------------------------------------------------------------------------------- /src/libs/utils/podcast.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import subprocess 4 | import tempfile 5 | from dataclasses import dataclass 6 | from typing import List, Literal 7 | 8 | from pydantic import BaseModel, ValidationError 9 | 10 | from libs.utils.llms import single_shot_llm_call 11 | 12 | 13 | @dataclass(frozen=True, kw_only=True) 14 | class PodcastDialogue: 15 | speaker: str 16 | text: str 17 | 18 | 19 | @dataclass(frozen=True, kw_only=True) 20 | class PodcastScript: 21 | title: str 22 | host_voice: str 23 | guest_voice: str 24 | dialogue: List[PodcastDialogue] 25 | 26 | 27 | class LineItem(BaseModel): 28 | speaker: Literal["Host", "Guest"] 29 | text: str 30 | 31 | 32 | class Script(BaseModel): 33 | script_data: List[LineItem] 34 | 35 | 36 | def _generate_audio_segment(text: str, voice: str) -> bytes: 37 | """Generate a single audio segment using Together AI API.""" 38 | import requests 39 | 40 | url = "https://api.together.ai/v1/audio/generations" 41 | 42 | headers = {"Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"} 43 | 44 | data = { 45 | "input": "-" + text, # Add hyphen for slight pause 46 | "voice": voice, 47 | "response_format": "mp3", # Get MP3 directly instead of raw PCM 48 | "sample_rate": 44100, 49 | "stream": False, 50 | "model": "cartesia/sonic", 51 | } 52 | 53 | response = requests.post(url, headers=headers, json=data) 54 | response.raise_for_status() 55 | return response.content 56 | 57 | 58 | def generate_podcast_audio(script: PodcastScript) -> bytes: 59 | """ 60 | Generate podcast audio from a script using the Together AI API. 61 | 62 | Parameters: 63 | script (PodcastScript): The podcast script to generate 64 | api_key (str): Together AI API key 65 | 66 | Returns: 67 | bytes: Raw audio data from the API 68 | 69 | Raises: 70 | ValueError: If API key is missing or voices are invalid 71 | """ 72 | 73 | available_voices = ["laidback woman", "customer support man"] 74 | 75 | if script.host_voice not in available_voices or script.guest_voice not in available_voices: 76 | raise ValueError(f"Invalid voice selected. Available voices: {', '.join(available_voices)}") 77 | 78 | # Combine all audio segments into one response 79 | audio_data = bytearray() 80 | for line in script.dialogue: 81 | voice = script.host_voice if line.speaker == "Host" else script.guest_voice 82 | segment_audio = _generate_audio_segment(line.text, voice) 83 | audio_data.extend(segment_audio) 84 | 85 | return bytes(audio_data) 86 | 87 | 88 | def generate_podcast_script( 89 | *, 90 | system_prompt: str, 91 | input_text: str, 92 | podcast_name: str = "My Podcast", 93 | host_voice: str = "laidback woman", 94 | guest_voice: str = "customer support man" 95 | ) -> PodcastScript: 96 | """ 97 | Generate a podcast script using an LLM. 98 | 99 | Args: 100 | system_prompt: The prompt to guide the LLM 101 | input_text: The content to base the podcast on 102 | podcast_name: Title of the podcast 103 | host_voice: Voice style for the host 104 | guest_voice: Voice style for the guest 105 | 106 | Returns: 107 | A PodcastScript object with the generated dialogue 108 | """ 109 | try: 110 | response = single_shot_llm_call( 111 | model="together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 112 | system_prompt=system_prompt, 113 | message=input_text, 114 | response_format={ 115 | "type": "json_object", 116 | "schema": Script.model_json_schema(), 117 | }, 118 | ) 119 | llm_script = Script.model_validate_json(response) 120 | except ValidationError as e: 121 | raise ValueError(f"Invalid script format: {e}") 122 | 123 | 124 | dialogue = [ 125 | PodcastDialogue( 126 | speaker="Host" if item.speaker == "Host" else "Guest", 127 | text=item.text 128 | ) for item in llm_script.script_data 129 | ] 130 | 131 | return PodcastScript( 132 | title=podcast_name, 133 | host_voice=host_voice, 134 | guest_voice=guest_voice, 135 | dialogue=dialogue 136 | ) 137 | 138 | 139 | # Available voices in the cartesia/sonic model 140 | AVAILABLE_VOICES = [ 141 | "laidback woman", # Good for hosts 142 | "customer support man", # Good for guests 143 | ] 144 | 145 | 146 | def full_podcast_generation(*, system_prompt: str, text: str, podcast_name: str = "My Podcast", host_voice: str = "laidback woman", guest_voice: str = "customer support man") -> str: 147 | script = generate_podcast_script(system_prompt=system_prompt, input_text=text, podcast_name=podcast_name, host_voice=host_voice, guest_voice=guest_voice) 148 | audio = generate_podcast_audio(script) 149 | base64_audio = get_base64_audio(audio) 150 | return base64_audio 151 | 152 | 153 | def pcm_to_wav_bytes(pcm_data, sample_rate=44100): 154 | """ 155 | Convert raw PCM float32le data to WAV format using ffmpeg. 156 | This creates temporary files but doesn't save the audio permanently. 157 | """ 158 | with tempfile.NamedTemporaryFile(suffix='.pcm', delete=True) as pcm_file: 159 | with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as wav_file: 160 | # Write PCM data to temp file 161 | pcm_file.write(pcm_data) 162 | pcm_file.flush() 163 | 164 | # Use ffmpeg to convert (same as the working implementation) 165 | cmd = [ 166 | "ffmpeg", 167 | "-y", # Overwrite output files 168 | "-f", "f32le", # Input format is 32-bit float PCM 169 | "-ar", str(sample_rate), # Sample rate 170 | "-ac", "1", # Mono audio 171 | "-i", pcm_file.name, # Input file 172 | wav_file.name # Output file 173 | ] 174 | 175 | # Run ffmpeg 176 | result = subprocess.run(cmd, capture_output=True, text=True) 177 | if result.returncode != 0: 178 | raise RuntimeError(f"Error converting audio format: {result.stderr}") 179 | 180 | # Read the WAV data 181 | wav_file.seek(0) 182 | return wav_file.read() 183 | 184 | 185 | def get_base64_audio(audio_bytes: bytes) -> str: 186 | """ 187 | Convert audio bytes to a base64-encoded data URL for HTML embedding. 188 | 189 | Args: 190 | audio_bytes: Raw audio data bytes 191 | 192 | Returns: 193 | String containing a data URL with base64-encoded audio 194 | """ 195 | encoded = base64.b64encode(audio_bytes).decode('utf-8') 196 | return f"data:audio/mp3;base64,{encoded}" 197 | 198 | 199 | def save_podcast_html(script: PodcastScript, audio_bytes: bytes, output_path: str = "podcast_test.html"): 200 | """ 201 | Save a podcast as an HTML file with embedded audio player. 202 | 203 | Args: 204 | script: The podcast script 205 | audio_bytes: Raw audio data bytes 206 | output_path: Where to save the HTML file 207 | 208 | Returns: 209 | Path to the HTML file 210 | """ 211 | # Convert audio to base64 212 | base64_audio = get_base64_audio(audio_bytes) 213 | 214 | # Create HTML content 215 | html_content = f""" 216 | 217 | 218 | {script.title} 219 | 227 | 228 | 229 |

{script.title}

230 | 231 |
232 |

Listen to the Podcast

233 | 237 |
238 | 239 |
240 |

Transcript

241 | {''.join(f'
{line.speaker}: {line.text}
' for line in script.dialogue)} 242 |
243 | 244 | 245 | """ 246 | 247 | # Save to file 248 | with open(output_path, "w", encoding="utf-8") as f: 249 | f.write(html_content) 250 | 251 | return output_path 252 | 253 | 254 | def save_podcast_to_disk(audio_bytes: bytes, output_path: str) -> str: 255 | """ 256 | Save podcast audio bytes to disk in MP3 format. 257 | 258 | Args: 259 | audio_bytes: Raw MP3 audio data bytes 260 | output_path: Path to save the audio file 261 | 262 | Returns: 263 | Path to the saved audio file 264 | """ 265 | os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True) 266 | 267 | with open(output_path, "wb") as f: 268 | f.write(audio_bytes) 269 | 270 | return output_path 271 | 272 | 273 | if __name__ == "__main__": 274 | text = "today we are talking about the latest trends in AI and the future of the industry" 275 | 276 | # Generate podcast script and audio 277 | system_prompt = "Generate a podcast script from the following text" 278 | script = generate_podcast_script( 279 | system_prompt=system_prompt, 280 | input_text=text, 281 | podcast_name="AI Trends Podcast", 282 | host_voice="laidback woman", 283 | guest_voice="customer support man" 284 | ) 285 | print(script) 286 | audio = generate_podcast_audio(script) 287 | 288 | # Save as HTML with embedded audio 289 | html_path = save_podcast_html(script, audio) 290 | print(f"HTML file with embedded audio generated at: {html_path}") -------------------------------------------------------------------------------- /src/libs/utils/tavily_search.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | from tavily import AsyncTavilyClient, TavilyClient 7 | 8 | 9 | @dataclass(frozen=True, kw_only=True) 10 | class SearchResult: 11 | title: str 12 | link: str 13 | content: str 14 | raw_content: Optional[str] = None 15 | 16 | def __str__(self, include_raw=True): 17 | result = f"Title: {self.title}\n" f"Link: {self.link}\n" f"Content: {self.content}" 18 | if include_raw and self.raw_content: 19 | result += f"\nRaw Content: {self.raw_content}" 20 | return result 21 | 22 | def short_str(self): 23 | return self.__str__(include_raw=False) 24 | 25 | 26 | @dataclass(frozen=True, kw_only=True) 27 | class SearchResults: 28 | results: list[SearchResult] 29 | 30 | def __str__(self, short=False): 31 | if short: 32 | result_strs = [result.short_str() for result in self.results] 33 | else: 34 | result_strs = [str(result) for result in self.results] 35 | return "\n\n".join(f"[{i+1}] {result_str}" for i, result_str in enumerate(result_strs)) 36 | 37 | def __add__(self, other): 38 | return SearchResults(results=self.results + other.results) 39 | 40 | def short_str(self): 41 | return self.__str__(short=True) 42 | 43 | 44 | def extract_tavily_results(response) -> SearchResults: 45 | """Extract key information from Tavily search results.""" 46 | results = [] 47 | for item in response.get("results", []): 48 | results.append( 49 | SearchResult( 50 | title=item.get("title", ""), 51 | link=item.get("url", ""), 52 | content=item.get("content", ""), 53 | raw_content=item.get("raw_content", ""), 54 | ) 55 | ) 56 | return SearchResults(results=results) 57 | 58 | 59 | def tavily_search(query: str, max_results=3, include_raw: bool = True) -> SearchResults: 60 | """ 61 | Perform a search using the Tavily Search API with the official client. 62 | 63 | Parameters: 64 | query (str): The search query. 65 | search_depth (str): The depth of search - 'basic' or 'deep'. 66 | max_results (int): Maximum number of results to return. 67 | 68 | Returns: 69 | list: Formatted search results with title, link, and snippet. 70 | """ 71 | api_key = os.getenv("TAVILY_API_KEY") 72 | 73 | if not api_key: 74 | raise ValueError("TAVILY_API_KEY environment variable is not set") 75 | 76 | client = TavilyClient(api_key) 77 | 78 | response = client.search(query=query, search_depth="basic", max_results=max_results, include_raw_content=include_raw) 79 | 80 | return extract_tavily_results(response) 81 | 82 | 83 | async def atavily_search_results(query: str, max_results=3, include_raw: bool = True) -> SearchResults: 84 | """ 85 | Perform asynchronous search using the Tavily Search API with the official client. 86 | 87 | Parameters: 88 | query (str): The search query. 89 | max_results (int): Maximum number of results to return. 90 | """ 91 | api_key = os.getenv("TAVILY_API_KEY") 92 | 93 | if not api_key: 94 | raise ValueError("TAVILY_API_KEY environment variable is not set") 95 | 96 | client = AsyncTavilyClient(api_key) 97 | 98 | response = await client.search(query=query, search_depth="basic", max_results=max_results, include_raw_content=include_raw) 99 | 100 | return extract_tavily_results(response) 101 | 102 | 103 | if __name__ == "__main__": 104 | print(asyncio.run(atavily_search_results("What is the capital of France?"))) -------------------------------------------------------------------------------- /src/prompts.yaml: -------------------------------------------------------------------------------- 1 | clarification_prompt: | 2 | You are a research assistant helping to clarify research topics. 3 | Analyze the given topic and if needed, ask focused questions to better understand: 4 | 1. The scope and specific aspects to be researched 5 | 2. Any time period or geographical constraints 6 | 3. The desired depth and technical level 7 | 4. Any specific aspects to include or exclude 8 | 9 | If the topic is already clear and specific, acknowledge that and don't ask unnecessary questions. 10 | Keep your response concise and focused. 11 | 12 | answer_prompt: | 13 | You are a senior research analyst tasked with creating a professional, publication-ready report. 14 | Using ONLY the provided sources, produce a markdown document (at least 5 pages) following these exact requirements: 15 | 16 | # Structure Guidelines 17 | 18 | 1. **Abstract** 19 | - Provide a concise (250-300 words) summary of the entire research 20 | - State the main research question/objective 21 | - Highlight key findings and their significance 22 | - Summarize major conclusions and implications 23 | - Write in a self-contained manner that can stand alone 24 | 2. **Introduction** 25 | - Contextualize the research topic 26 | - State the report's scope and objectives 27 | - Preview key themes 28 | 3. **Analysis** 29 | - Group findings into thematic categories 30 | - Compare/contrast different sources' perspectives 31 | - Highlight patterns, contradictions, and evidence quality 32 | - MUST include numbered citations [1][2]... to support all key claims and analysis. Never make factual statements without providing the corresponding citation. Format citations as [n] directly after the relevant text. 33 | 4. **Conclusion** 34 | - Synthesize overarching insights 35 | - Discuss practical implications 36 | - Identify knowledge gaps and research limitations 37 | - Suggest areas for further investigation 38 | 5. **References** 39 | - MUST be included in the report to improve the readability and credibility. 40 | - Include ALL sources in the references section, even those not directly cited in the report 41 | - Number references consecutively (1, 2, 3...) without gaps 42 | 43 | # Composition Rules 44 | * Strict source adherence: Every claim must cite sources using [n] notation 45 | * Analytical depth: Prioritize insight generation over mere information listing 46 | * Objective framing: Present conflicting evidence without bias 47 | * Information hierarchy: Use H2 headers for main sections, H3 for subsections 48 | * Visual clarity: Format tables with | delimiters and alignment markers 49 | * Citation integrity: Include numbered references with full source metadata 50 | 51 | # Prohibitions 52 | * Bullet points/listicles 53 | * Unsupported assertions 54 | * Informal language 55 | * Repetitive content 56 | * Source aggregation without analysis 57 | * External knowledge beyond provided sources 58 | 59 | # Formatting Requirements 60 | 61 | [Research Topic] 62 | 63 | ## Abstract 64 | [Abstract content...] 65 | 66 | ## Introduction 67 | [Cohesive opening paragraph...] 68 | [More details about the research topic...] 69 | [General overview of the report...] 70 | 71 | ## [Primary Theme] 72 | [Detailed analysis with integrated citations [1][3]. Compare multiple sources...] 73 | [Additional details)] 74 | 75 | ### [Subtheme] 76 | [Specific insights...] 77 | 78 | ### [Subtheme Where Table or Chart is Helpful] 79 | 80 | [Table Analysis in full paragraphs, avoid bullet points...] 81 | 82 | *Table X: Caption...[citation] (MUST be put above the table and seperated by a blank line)* 83 | 84 | | Comparison Aspect | Source A [2] | Source B [4] | 85 | |--------------------|--------------|--------------| 86 | | Key metric | xx% | xx% | 87 | 88 | 89 | [Chart Analysis in full paragraphs, avoid bullet points...] 90 | ```mermaid 91 | %% Choose one: flowchart, sequenceDiagram, classDiagram, stateDiagram, gantt, pie, xychart-beta 92 | %% DO NOT PUT TITLE in MERMAID CODE! titles should be put in THE FIGURE CAPTION 93 | %% To reduce the rendering difficulty, avoid multiple series, stacked charts, or complex features. 94 | %% DATA ARRAYS and AXIS RANGES MUST CONTAIN NUMBERS ONLY [10, 20, 30], e.g. for units like heights, use inches (74) instead of feet inches (6'2") 95 | %% NEVER include values that are null, n/a, or undefined in the data series. 96 | [CHART_TYPE] 97 | %% For xy/bar charts: 98 | xlabel "[X_AXIS_LABEL]" 99 | ylabel "[Y_AXIS_LABEL]" 100 | 101 | %% For data series, use one of these formats: 102 | %% Format 1 - Simple bar/line: 103 | "[LABEL1]" [VALUE1] 104 | "[LABEL2]" [VALUE2] 105 | 106 | %% Format 2 - Array style (xychart-beta): 107 | %% For measurements with special units (feet/inches, degrees°, minutes', arc-seconds''), you MUST use double single-quotes ('') to escape, e.g., ["6'2''", "45°2''", "23'45''"] NOT ["6'2\"", "45°2\""] 108 | xychart-beta 109 | x-axis "[X_AXIS_LABEL]" ["Label1", "Label2", "Label3"] 110 | y-axis "[Y_AXIS_LABEL]" MIN_VALUE --> MAX_VALUE 111 | bar [value1, value2, value3] 112 | ``` 113 | *Figure X: Caption...[citation] (MUST be put below the figure and seperated by a blank line)* 114 | 115 | ## Conclusion 116 | [Synthesized takeaways...] [5][6] 117 | [Explicit limitations discussion...] 118 | [Overall summary with 5/6 paragraphs] 119 | 120 | ### References 121 | 1. [Title of Source](https://url-of-source) 122 | 2. [Complete Source Title](https://example.com/full-url) 123 | 124 | # Reference Rules 125 | * Number all citations consecutively: [1], [2], [3], etc. 126 | * Include ALL sources in the reference list, whether cited in the report or not 127 | * No gaps allowed in the reference numbering 128 | * Format each reference as: [Title](URL) 129 | * For consecutive citations in text, use ranges: [1-3] instead of [1][2][3] 130 | 131 | # Example 132 | If your research report mentioned sources 1, 3, list ALL of them in references including 2 to avoid gaps: 133 | 1. [First Source](https://example.com/first) 134 | 2. [Second Source](https://example.com/second) 135 | 3. [Third Source](https://example.com/third) 136 | 137 | Begin by analyzing source relationships before writing. Verify all citations match reference numbers. Maintain academic tone throughout. 138 | While you think, consider that the sections you need to write should be 3/4 paragraphs each. We do not want to end up with a list of bullet points. Or very short sections. 139 | Think like a writer, you are optimizing coherence and readability. 140 | In terms of content is like you are writing the chapter of a book, with a few headings and lots of paragraphs. Plan to write at least 3 paragraphs for each heading you want to 141 | include in the report. 142 | 143 | 144 | raw_content_summarizer_prompt: | 145 | You are a research extraction specialist. Given a research topic and raw web content, create a thoroughly detailed synthesis as a cohesive narrative that flows naturally between key concepts. 146 | 147 | Extract the most valuable information related to the research topic, including relevant facts, statistics, methodologies, claims, and contextual information. Preserve technical terminology and domain-specific language from the source material. 148 | 149 | Structure your synthesis as a coherent document with natural transitions between ideas. Begin with an introduction that captures the core thesis and purpose of the source material. Develop the narrative by weaving together key findings and their supporting details, ensuring each concept flows logically to the next. 150 | 151 | Integrate specific metrics, dates, and quantitative information within their proper context. Explore how concepts interconnect within the source material, highlighting meaningful relationships between ideas. Acknowledge limitations by noting where information related to aspects of the research topic may be missing or incomplete. 152 | 153 | Important guidelines: 154 | - Maintain original data context (e.g., "2024 study of 150 patients" rather than generic "recent study") 155 | - Preserve the integrity of information by keeping details anchored to their original context 156 | - Create a cohesive narrative rather than disconnected bullet points or lists 157 | - Use paragraph breaks only when transitioning between major themes 158 | 159 | Critical Reminder: If content lacks a specific aspect of the research topic, clearly state that in the synthesis, and you should NEVER make up information and NEVER rely on external knowledge. 160 | 161 | evaluation_parsing_prompt: | 162 | You are a research assistant, you will be provided with a some reasoning and a list of queries, and you will need to parse the list into a list of queries. 163 | 164 | 165 | evaluation_prompt: | 166 | You are a research query optimizer. Your task is to analyze search results against the original research goal and generate follow-up queries to fill in missing information. 167 | 168 | PROCESS: 169 | 1. Identify ALL information explicitly requested in the original research goal 170 | 2. Analyze what specific information has been successfully retrieved in the search results 171 | 3. Identify ALL information gaps between what was requested and what was found 172 | 4. For entity-specific gaps: Create targeted queries for each missing attribute of identified entities 173 | 5. For general knowledge gaps: Create focused queries to find the missing conceptual information 174 | 175 | QUERY GENERATION RULES: 176 | - IF specific entities were identified AND specific attributes are missing: 177 | * Create direct queries for each entity-attribute pair (e.g., "LeBron James height") 178 | - IF general knowledge gaps exist: 179 | * Create focused queries to address each conceptual gap (e.g., "criteria for ranking basketball players") 180 | - Queries must be constructed to directly retrieve EXACTLY the missing information 181 | - Avoid tangential or merely interesting information not required by the original goal 182 | - Prioritize queries that will yield the most critical missing information first 183 | 184 | OUTPUT FORMAT: 185 | First, briefly state: 186 | 1. What specific information was found 187 | 2. What specific information is still missing 188 | 3. What type of knowledge gaps exist (entity-specific or general knowledge) 189 | 190 | Then provide up to 5 targeted queries that directly address the identified gaps, ordered by importance. Please consider that you 191 | need to generate queries that tackle a single goal at a time (searching for A AND B will return bad results). Be specific! 192 | 193 | 194 | plan_parsing_prompt: | 195 | You are a research assistant, you will be provided with a plan of action to research a topic, identify the queries that we should run to search for the topic. Look carefully 196 | at the general plan provided and identify the key queries that we should run. For dependent queries (those requiring results from earlier searches), leave them for later execution and focus only on the self-contained queries that can be run immediately. 197 | 198 | planning_prompt: | 199 | You are a strategic research planner with expertise in breaking down complex questions into logical search steps. When given a research topic or question, you'll analyze what specific information is needed and develop a sequential research plan. 200 | 201 | First, identify the core components of the question and any implicit information needs. 202 | 203 | Then provide a numbered list of 3-5 sequential search queries 204 | 205 | Your queries should be: 206 | - Specific and focused (avoid broad queries that return general information) 207 | - Written in natural language without Boolean operators (no AND/OR) 208 | - Designed to progress logically from foundational to specific information 209 | 210 | It's perfectly acceptable to start with exploratory queries to "test the waters" before diving deeper. Initial queries can help establish baseline information or verify assumptions before proceeding to more targeted searches. 211 | 212 | search_prompt: | 213 | You are a web-search assistant, you will be provided with a user query, and you will need to come up with a list of web-search queries to research the topic. 214 | 215 | These should be relevant and precise queries. Also generate at most 2 queries. You must add user query at the begining of the query list. 216 | 217 | Queries should be single shot queries, don't use OR or AND in the queries. 218 | 219 | filter_prompt: | 220 | You are a web-search filter assistant. Your task is to filter and rank search results based on the research topic, to help your colleague create a comprehensive, in-depth, and detailed research report. 221 | 222 | You will be given the research topic, and the current search results: their titles, links, and contents. Your goal is to: 223 | 1. Rank ALL results that have ANY relevance to the topic, even if the connection is indirect 224 | 2. Use the following relevance categories: 225 | - High relevance: Directly addresses the main topic 226 | - Medium relevance: Contains useful supporting information or related concepts 227 | - Low relevance: Has tangential or contextual information that might be valuable for background or broader perspective 228 | - No relevance: Completely unrelated or irrelevant (only these should be excluded) 229 | 230 | Remember: 231 | - Keep sources that might provide valuable context or supporting information, even if not directly focused on the main topic 232 | - Sources with partial relevance should be ranked lower rather than excluded 233 | - Consider how each source might contribute to different aspects of the research report (background, context, examples, etc.) 234 | 235 | At the end of your response, return a LIST of source numbers in order of relevance, including ALL sources that have any potential value (high, medium, or low relevance). Only exclude sources that are completely irrelevant to the topic. 236 | 237 | filter_parsing_prompt: | 238 | You are a research assistant, you will be provided with a relevance analysis of the search results. 239 | 240 | You need to return a list of source numbers corresponding to the search results, in the order of relevance to the research topic. 241 | 242 | data_visualization_prompt: | 243 | You are a creative desinger. You will be provided with a research topic, and you need to 244 | come up with an idea that will help your colleague create a cool figure that will engage the reader. 245 | 246 | You need to return a descriptive phrase for the drawing. 247 | The goal is not to address the topic, but to create a figure that will be interesting and engaging. 248 | 249 | Any specific names, brands, or other trademarked contents are STRICTLY PROHIBITED. ONLY reply with the idea. 250 | 251 | 252 | 253 | create_podcast_script_prompt: | 254 | 255 | You are a world-class podcast producer tasked with transforming the provided input text into an engaging and informative podcast script. The input may be unstructured or messy, sourced from PDFs or web pages. Your goal is to extract the most interesting and insightful content for a compelling podcast discussion. 256 | 257 | # Steps to Follow: 258 | 259 | 1. **Analyze the Input:** 260 | Carefully examine the text, identifying key topics, points, and interesting facts or anecdotes that could drive an engaging podcast conversation. Disregard irrelevant information or formatting issues. 261 | 262 | 2. **Brainstorm Ideas:** 263 | In the ``, creatively brainstorm ways to present the key points engagingly. Consider: 264 | - Analogies, storytelling techniques, or hypothetical scenarios to make content relatable 265 | - Ways to make complex topics accessible to a general audience 266 | - Thought-provoking questions to explore during the podcast 267 | - Creative approaches to fill any gaps in the information 268 | 269 | 3. **Craft the Dialogue:** 270 | Develop a natural, conversational flow between the host (Jane) and the guest speaker (the author or an expert on the topic). Incorporate: 271 | - The best ideas from your brainstorming session 272 | - Clear explanations of complex topics 273 | - An engaging and lively tone to captivate listeners 274 | - A balance of information and entertainment 275 | 276 | Rules for the dialogue: 277 | - The host (Jane) always initiates the conversation and interviews the guest 278 | - Include thoughtful questions from the host to guide the discussion 279 | - Incorporate natural speech patterns, including occasional verbal fillers (e.g., "Uhh", "Hmmm", "um," "well," "you know") 280 | - Allow for natural interruptions and back-and-forth between host and guest - this is very important to make the conversation feel authentic 281 | - Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims 282 | - Maintain a PG-rated conversation appropriate for all audiences 283 | - Avoid any marketing or self-promotional content from the guest 284 | - The host concludes the conversation 285 | 286 | 4. **Summarize Key Insights:** 287 | Naturally weave a summary of key points into the closing part of the dialogue. This should feel like a casual conversation rather than a formal recap, reinforcing the main takeaways before signing off. 288 | 289 | 5. **Maintain Authenticity:** 290 | Throughout the script, strive for authenticity in the conversation. Include: 291 | - Moments of genuine curiosity or surprise from the host 292 | - Instances where the guest might briefly struggle to articulate a complex idea 293 | - Light-hearted moments or humor when appropriate 294 | - Brief personal anecdotes or examples that relate to the topic (within the bounds of the input text) 295 | 296 | 6. **Consider Pacing and Structure:** 297 | Ensure the dialogue has a natural ebb and flow: 298 | - Start with a strong hook to grab the listener's attention 299 | - Gradually build complexity as the conversation progresses 300 | - Include brief "breather" moments for listeners to absorb complex information 301 | - For complicated concepts, reasking similar questions framed from a different perspective is recommended 302 | - End on a high note, perhaps with a thought-provoking question or a call-to-action for listeners 303 | 304 | IMPORTANT RULE: Each line of dialogue should be no more than 100 characters (e.g., can finish within 5-8 seconds) 305 | 306 | Remember: Always reply in valid JSON format, without code blocks. Begin directly with the JSON output. -------------------------------------------------------------------------------- /src/together_open_deep_research.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import hashlib 4 | import json 5 | import os 6 | import pickle 7 | import re 8 | from contextlib import contextmanager 9 | from pathlib import Path 10 | from typing import Callable, List 11 | 12 | import yaml 13 | from dotenv import load_dotenv 14 | from filelock import FileLock 15 | from libs.utils.data_types import DeepResearchResult, DeepResearchResults, ResearchPlan, SourceList, UserCommunication 16 | from libs.utils.generation import generate_pdf, save_and_generate_html 17 | from libs.utils.llms import asingle_shot_llm_call, generate_toc_image 18 | from libs.utils.log import AgentLogger 19 | from libs.utils.podcast import generate_podcast_audio, generate_podcast_script, get_base64_audio, save_podcast_to_disk 20 | from libs.utils.tavily_search import atavily_search_results 21 | 22 | logging = AgentLogger("together.open_deep_research") 23 | 24 | TIME_LIMIT_MULTIPLIER = 5 25 | 26 | 27 | class DeepResearcher: 28 | def __init__( 29 | self, 30 | budget: int = 6, 31 | remove_thinking_tags: bool = False, 32 | max_queries: int = -1, 33 | max_sources: int = -1, 34 | max_completion_tokens: int = 4096, 35 | user_timeout: float = 30.0, 36 | interactive: bool = False, 37 | planning_model: str = "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 38 | summarization_model: str = "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 39 | json_model: str = "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 40 | answer_model: str = "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B", 41 | debug_file_path: str | None = None, 42 | cache_dir: str | None = None, 43 | use_cache: bool = False, 44 | observer: Callable | None = None, 45 | ): 46 | self.budget = budget 47 | self.current_spending = 0 48 | self.remove_thinking_tags = remove_thinking_tags 49 | self.max_queries = max_queries 50 | self.max_sources = max_sources 51 | self.max_completion_tokens = max_completion_tokens 52 | self.user_timeout = user_timeout 53 | self.interactive = interactive 54 | self.planning_model = planning_model 55 | self.summarization_model = summarization_model 56 | self.json_model = json_model 57 | self.answer_model = answer_model 58 | self.debug_file_path = debug_file_path 59 | self.communication = UserCommunication() 60 | self.use_cache = use_cache 61 | 62 | # this is a little hack to make the observer optional 63 | self.observer = observer if observer is not None else lambda *args, **kwargs: None 64 | 65 | if self.use_cache: 66 | self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / ".open_deep_research_cache" 67 | self.cache_dir.mkdir(parents=True, exist_ok=True) 68 | # Create a locks directory for the file locks 69 | self.locks_dir = self.cache_dir / ".locks" 70 | self.locks_dir.mkdir(parents=True, exist_ok=True) 71 | 72 | with open(os.path.join(os.path.dirname(__file__), "prompts.yaml"), "r") as f: 73 | self.prompts = yaml.safe_load(f) 74 | 75 | def __call__(self, topic: str) -> str: 76 | """ 77 | Makes the DeepResearcher instance callable. 78 | Runs research on the given topic and returns the answer. 79 | 80 | Args: 81 | topic: The research topic or question 82 | 83 | Returns: 84 | The research answer as a string 85 | """ 86 | loop = asyncio.new_event_loop() 87 | try: 88 | answer = loop.run_until_complete(self.research_topic(topic)) 89 | 90 | pending = asyncio.all_tasks(loop) 91 | if pending: 92 | loop.run_until_complete(asyncio.wait(pending, timeout=10)) 93 | 94 | return answer 95 | finally: 96 | loop.close() 97 | 98 | async def research_topic(self, topic: str) -> str: 99 | """Main method to conduct research on a topic""" 100 | 101 | self.observer(0, "Starting research") 102 | 103 | # Step 0: Clarify the research topic 104 | if self.interactive: 105 | self.observer(0.05, "Clarifying research topic") 106 | clarified_topic = await self.clarify_topic(topic) 107 | self.observer(0.1, "Research topic clarified") 108 | else: 109 | clarified_topic = topic 110 | 111 | logging.info(f"Topic: {clarified_topic}") 112 | 113 | # Step 1: Generate initial queries 114 | self.observer(0.15, "Generating research queries") 115 | queries = await self.generate_research_queries(clarified_topic) 116 | queries = [clarified_topic] + queries[: self.max_queries - 1] 117 | all_queries = queries.copy() 118 | logging.info(f"Initial queries: {queries}") 119 | self.observer(0.2, "Research queries generated") 120 | 121 | if len(queries) == 0: 122 | logging.error("No initial queries generated") 123 | return "No initial queries generated" 124 | 125 | # Step 2: Perform initial search 126 | self.observer(0.25, "Performing initial search") 127 | results = await self.search_all_queries(queries) 128 | logging.info(f"Initial search complete, found {len(results.results)} results") 129 | self.observer(0.3, "Initial search complete") 130 | 131 | # Step 3: Conduct iterative research within budget 132 | total_iterations = self.budget - self.current_spending 133 | for iteration in range(self.current_spending, self.budget): 134 | current_iteration = iteration - self.current_spending + 1 135 | progress = 0.3 + (0.4 * (current_iteration / total_iterations)) 136 | self.observer(progress, f"Conducting research iteration {current_iteration}/{total_iterations}") 137 | 138 | # Evaluate if more research is needed 139 | additional_queries = await self.evaluate_research_completeness(clarified_topic, results, all_queries) 140 | 141 | # Filter out empty strings and check if any queries remain 142 | additional_queries = [q for q in additional_queries if q] 143 | if not additional_queries: 144 | logging.info("No need for additional research") 145 | self.observer(progress + 0.05, "Research complete - no additional queries needed") 146 | break 147 | 148 | # for debugging purposes we limit the number of queries 149 | additional_queries = additional_queries[: self.max_queries] 150 | logging.info(f"Additional queries: {additional_queries}") 151 | 152 | # Expand research with new queries 153 | self.observer(progress + 0.02, f"Searching {len(additional_queries)} additional queries") 154 | new_results = await self.search_all_queries(additional_queries) 155 | logging.info(f"Follow-up search complete, found {len(new_results.results)} results") 156 | self.observer(progress + 0.05, f"Found {len(new_results.results)} additional results") 157 | 158 | results = results + new_results 159 | all_queries.extend(additional_queries) 160 | 161 | # Step 4: Generate final answer with feedback loop 162 | self.observer(0.7, "Filtering and processing results") 163 | logging.info(f"Generating final answer for topic: {clarified_topic}") 164 | results = results.dedup() 165 | logging.info(f"Deduplication complete, kept {len(results.results)} results") 166 | filtered_results, sources = await self.filter_results(clarified_topic, results) 167 | logging.info(f"LLM Filtering complete, kept {len(filtered_results.results)} results") 168 | self.observer(0.8, f"Results filtered: kept {len(filtered_results.results)} sources") 169 | 170 | if self.debug_file_path: 171 | with open(self.debug_file_path, "w") as f: 172 | f.write(f"{results}\n\n\n\n{filtered_results}") 173 | logging.info(f"Debug file (web search results and sources) saved to {self.debug_file_path}") 174 | 175 | # Generate final answer 176 | self.observer(0.9, "Generating final research report") 177 | while True: 178 | answer = await self.generate_research_answer(clarified_topic, filtered_results, self.remove_thinking_tags) 179 | 180 | if not self.interactive or self.current_spending >= self.budget: 181 | self.observer(0.95, "Research complete") 182 | return answer 183 | 184 | logging.info(f"Answer: {answer}") 185 | user_feedback = await self.communication.get_input_with_timeout( 186 | "\nAre you satisfied with this answer? (yes/no) If no, please provide feedback: ", 187 | self.user_timeout * TIME_LIMIT_MULTIPLIER, 188 | ) 189 | 190 | if user_feedback.lower() == "yes" or not user_feedback or user_feedback == "": 191 | return answer 192 | 193 | # Regenerate answer with user feedback 194 | clarified_topic = f"{clarified_topic}\n\nReport:{answer}\n\nAdditional Feedback: {user_feedback}" 195 | logging.info(f"Regenerating answer with feedback: {user_feedback}") 196 | self.current_spending += 1 197 | 198 | async def clarify_topic(self, topic: str) -> str: 199 | """ 200 | Engage in a multi-turn conversation to clarify the research topic. 201 | Returns the clarified topic after user confirmation or timeout. 202 | 203 | Args: 204 | topic: The research topic to clarify 205 | timeout: Number of seconds to wait for user input (default: 10) 206 | """ 207 | 208 | CLARIFICATION_PROMPT = self.prompts["clarification_prompt"] 209 | 210 | clarification = await asingle_shot_llm_call( 211 | model=self.planning_model, system_prompt=CLARIFICATION_PROMPT, message=f"Research Topic: {topic}" 212 | ) 213 | 214 | logging.info(f"\nTopic Clarification: {clarification}") 215 | 216 | while self.current_spending < self.budget: 217 | user_input = await self.communication.get_input_with_timeout( 218 | "\nPlease provide additional details or type 'continue' to proceed with the research: ", self.user_timeout 219 | ) 220 | 221 | if user_input.lower() == "continue" or not user_input or user_input == "": 222 | return ( 223 | topic if not hasattr(self, "_clarification_context") else f"{topic}\n\nContext: {self._clarification_context}" 224 | ) 225 | 226 | # Store the clarification context 227 | if not hasattr(self, "_clarification_context"): 228 | self._clarification_context = user_input 229 | else: 230 | self._clarification_context += f"\n{user_input}" 231 | 232 | # Get follow-up clarification if needed 233 | clarification = await asingle_shot_llm_call( 234 | model=self.planning_model, 235 | system_prompt=CLARIFICATION_PROMPT, 236 | message=f"Research Topic: {topic}\nPrevious Context: {self._clarification_context}", 237 | ) 238 | 239 | logging.info(f"\nFollow-up Clarification: {clarification}") 240 | self.current_spending += 1 241 | 242 | # helps typing 243 | return topic 244 | 245 | async def generate_research_queries(self, topic: str) -> list[str]: 246 | PLANNING_PROMPT = self.prompts["planning_prompt"] 247 | 248 | plan = await asingle_shot_llm_call( 249 | model=self.planning_model, system_prompt=PLANNING_PROMPT, message=f"Research Topic: {topic}" 250 | ) 251 | 252 | logging.info(f"\n\nGenerated deep research plan for topic: {topic}\n\nPlan: {plan}\n\n") 253 | 254 | SEARCH_PROMPT = self.prompts["plan_parsing_prompt"] 255 | 256 | response_json = await asingle_shot_llm_call( 257 | model=self.json_model, 258 | system_prompt=SEARCH_PROMPT, 259 | message=f"Plan to be parsed: {plan}", 260 | response_format={"type": "json_object", "schema": ResearchPlan.model_json_schema()}, 261 | ) 262 | 263 | plan = json.loads(response_json) 264 | 265 | return plan["queries"] 266 | 267 | def _get_cache_path(self, query: str) -> Path: 268 | """Generate a cache file path for a given query using its hash""" 269 | query_hash = hashlib.md5(query.encode()).hexdigest() 270 | return self.cache_dir / f"tavily_{query_hash}.pkl" 271 | 272 | def _get_lock_path(self, cache_path: Path) -> Path: 273 | """Generate a lock file path for a given cache file""" 274 | return self.locks_dir / f"{cache_path.name}.lock" 275 | 276 | @contextmanager 277 | def _cache_lock(self, query: str): 278 | """Context manager for thread-safe cache operations""" 279 | cache_path = self._get_cache_path(query) 280 | lock_path = self._get_lock_path(cache_path) 281 | lock = FileLock(str(lock_path)) 282 | try: 283 | with lock: 284 | yield cache_path 285 | finally: 286 | # Clean up lock file if it's stale 287 | if lock_path.exists() and not lock.is_locked: 288 | try: 289 | lock_path.unlink() 290 | except FileNotFoundError: 291 | pass 292 | 293 | def _save_to_cache(self, query: str, results: DeepResearchResults): 294 | """Save search results to cache in a thread-safe manner""" 295 | if not self.use_cache: 296 | return 297 | 298 | with self._cache_lock(query) as cache_path: 299 | with open(cache_path, "wb") as f: 300 | pickle.dump(results, f) 301 | 302 | def _load_from_cache(self, query: str) -> DeepResearchResults | None: 303 | """Load search results from cache if they exist in a thread-safe manner""" 304 | if not self.use_cache: 305 | return None 306 | 307 | try: 308 | with self._cache_lock(query) as cache_path: 309 | if cache_path.exists(): 310 | with open(cache_path, "rb") as f: 311 | return pickle.load(f) 312 | except Exception as e: 313 | logging.warning(f"Failed to load cache for query '{query}': {e}") 314 | return None 315 | 316 | async def search_all_queries(self, queries: List[str]) -> DeepResearchResults: 317 | """Execute searches for all queries in parallel, using thread-safe cache""" 318 | tasks = [] 319 | cached_results = [] 320 | results_list = [] 321 | 322 | for query in queries: 323 | # Try to load from cache first if caching is enabled 324 | cached_result = self._load_from_cache(query) 325 | if cached_result is not None: 326 | logging.info(f"Using cached results for query: {query}") 327 | cached_results.append(cached_result) 328 | else: 329 | # If not in cache, create search task 330 | tasks.append(self._search_and_cache(query)) 331 | 332 | results_list.extend(cached_results) 333 | 334 | # Execute remaining searches in parallel 335 | if tasks: 336 | res_list = await asyncio.gather(*tasks) 337 | results_list.extend(res_list) 338 | 339 | # Combine all results 340 | combined_results = DeepResearchResults(results=[]) 341 | for results in results_list: 342 | combined_results = combined_results + results 343 | 344 | return combined_results 345 | 346 | async def _search_and_cache(self, query: str) -> DeepResearchResults: 347 | """Perform a search and cache the results""" 348 | results = await self._search_engine_call(query) 349 | self._save_to_cache(query, results) 350 | return results 351 | 352 | async def _search_engine_call(self, query: str) -> DeepResearchResults: 353 | """Perform a single search""" 354 | 355 | if len(query) > 400: 356 | # NOTE: we are truncating the query to 400 characters to avoid Tavily Search issues 357 | query = query[:400] 358 | logging.info(f"Truncated query to 400 characters: {query}") 359 | 360 | response = await atavily_search_results(query) 361 | 362 | logging.info("Tavily Search Called.") 363 | 364 | RAW_CONTENT_SUMMARIZER_PROMPT = self.prompts["raw_content_summarizer_prompt"] 365 | 366 | # Create tasks for summarization 367 | summarization_tasks = [] 368 | result_info = [] 369 | for result in response.results: 370 | if result.raw_content is None: 371 | continue 372 | task = self._summarize_content_async(result.raw_content, query, RAW_CONTENT_SUMMARIZER_PROMPT) 373 | summarization_tasks.append(task) 374 | result_info.append(result) 375 | 376 | # Use return_exceptions=True to prevent exceptions from propagating 377 | summarized_contents = await asyncio.gather(*summarization_tasks, return_exceptions=True) 378 | # Filter out exceptions 379 | summarized_contents = [result for result in summarized_contents if not isinstance(result, Exception)] 380 | 381 | formatted_results = [] 382 | for result, summarized_content in zip(result_info, summarized_contents): 383 | formatted_results.append( 384 | DeepResearchResult( 385 | title=result.title, 386 | link=result.link, 387 | content=result.content, 388 | raw_content=result.raw_content, 389 | filtered_raw_content=summarized_content, 390 | ) 391 | ) 392 | return DeepResearchResults(results=formatted_results) 393 | 394 | async def _summarize_content_async(self, raw_content: str, query: str, prompt: str) -> str: 395 | """Summarize content asynchronously using the LLM""" 396 | logging.info("Summarizing content asynchronously using the LLM") 397 | 398 | result = await asingle_shot_llm_call( 399 | model=self.summarization_model, 400 | system_prompt=prompt, 401 | message=f"{raw_content}\n\n{query}", 402 | ) 403 | 404 | return result 405 | 406 | async def evaluate_research_completeness(self, topic: str, results: DeepResearchResults, queries: List[str]) -> list[str]: 407 | """ 408 | Evaluate if the current search results are sufficient or if more research is needed. 409 | Returns an empty list if research is complete, or a list of additional queries if more research is needed. 410 | """ 411 | 412 | # Format the search results for the LLM 413 | formatted_results = str(results) 414 | EVALUATION_PROMPT = self.prompts["evaluation_prompt"] 415 | 416 | evaluation = await asingle_shot_llm_call( 417 | model=self.planning_model, 418 | system_prompt=EVALUATION_PROMPT, 419 | message=( 420 | f"{topic}\n\n" 421 | f"{queries}\n\n" 422 | f"{formatted_results}" 423 | ), 424 | ) 425 | 426 | logging.info(f"Evaluation: {evaluation}") 427 | 428 | EVALUATION_PARSING_PROMPT = self.prompts["evaluation_parsing_prompt"] 429 | 430 | response_json = await asingle_shot_llm_call( 431 | model=self.json_model, 432 | system_prompt=EVALUATION_PARSING_PROMPT, 433 | message=f"Evaluation to be parsed: {evaluation}", 434 | response_format={"type": "json_object", "schema": ResearchPlan.model_json_schema()}, 435 | ) 436 | 437 | evaluation = json.loads(response_json) 438 | return evaluation["queries"] 439 | 440 | async def filter_results(self, topic: str, results: DeepResearchResults) -> tuple[DeepResearchResults, SourceList]: 441 | """Filter the search results based on the research plan""" 442 | 443 | # Format the search results for the LLM, without the raw content 444 | formatted_results = str(results) 445 | 446 | FILTER_PROMPT = self.prompts["filter_prompt"] 447 | 448 | filter_response = await asingle_shot_llm_call( 449 | model=self.planning_model, 450 | system_prompt=FILTER_PROMPT, 451 | message=( 452 | f"{topic}\n\n" 453 | f"{formatted_results}" 454 | ), 455 | # NOTE: This is the max_token parameter for the LLM call on Together AI, may need to be changed for other providers 456 | max_completion_tokens=4096, 457 | ) 458 | 459 | logging.info(f"Filter response: {filter_response}") 460 | 461 | FILTER_PARSING_PROMPT = self.prompts["filter_parsing_prompt"] 462 | 463 | response_json = await asingle_shot_llm_call( 464 | model=self.json_model, 465 | system_prompt=FILTER_PARSING_PROMPT, 466 | message=f"Filter response to be parsed: {filter_response}", 467 | response_format={"type": "json_object", "schema": SourceList.model_json_schema()}, 468 | ) 469 | 470 | sources = json.loads(response_json)["sources"] 471 | 472 | logging.info(f"Filtered sources: {sources}") 473 | 474 | if self.max_sources != -1: 475 | sources = sources[: self.max_sources] 476 | 477 | # Filter the results based on the source list 478 | filtered_results = [results.results[i - 1] for i in sources if i - 1 < len(results.results)] 479 | 480 | return DeepResearchResults(results=filtered_results), sources 481 | 482 | async def generate_research_answer(self, topic: str, results: DeepResearchResults, remove_thinking_tags: bool = False): 483 | """ 484 | Generate a comprehensive answer to the research topic based on the search results. 485 | Returns a detailed response that synthesizes information from all search results. 486 | """ 487 | 488 | formatted_results = str(results) 489 | ANSWER_PROMPT = self.prompts["answer_prompt"] 490 | 491 | answer = await asingle_shot_llm_call( 492 | model=self.answer_model, 493 | system_prompt=ANSWER_PROMPT, 494 | message=f"Research Topic: {topic}\n\nSearch Results:\n{formatted_results}", 495 | # NOTE: This is the max_token parameter for the LLM call on Together AI, may need to be changed for other providers 496 | max_completion_tokens=self.max_completion_tokens, 497 | ) 498 | 499 | # this is just to avoid typing complaints 500 | if answer is None or not isinstance(answer, str): 501 | logging.error("No answer generated") 502 | return "No answer generated" 503 | 504 | if remove_thinking_tags: 505 | # Remove content within tags 506 | answer = self._remove_thinking_tags(answer) 507 | 508 | # Remove markdown code block markers if they exist at the beginning 509 | if answer.lstrip().startswith("```"): 510 | # Find the first line break after the opening backticks 511 | first_linebreak = answer.find("\n", answer.find("```")) 512 | if first_linebreak != -1: 513 | # Remove everything up to and including the first line break 514 | answer = answer[first_linebreak + 1 :] 515 | 516 | # Remove closing code block if it exists 517 | if answer.rstrip().endswith("```"): 518 | answer = answer.rstrip()[:-3].rstrip() 519 | 520 | return answer.strip() 521 | 522 | def _remove_thinking_tags(self, answer: str) -> str: 523 | """Remove content within tags""" 524 | while "" in answer and "" in answer: 525 | start = answer.find("") 526 | end = answer.find("") + len("") 527 | answer = answer[:start] + answer[end:] 528 | return answer 529 | 530 | 531 | def main(): 532 | from libs.utils.agent_factory import create_agent 533 | 534 | # Set up argument parser 535 | parser = argparse.ArgumentParser(description="Deep Research Tool") 536 | parser.add_argument("--write-pdf", action="store_true", help="Generate a PDF report of the research") 537 | parser.add_argument("--write-html", action="store_true", help="Generate an HTML report of the research") 538 | parser.add_argument("--add-toc-image", action="store_true", help="Generate a table of contents image") 539 | parser.add_argument("--write-podcast", action="store_true", help="Generate and save a podcast of the research") 540 | parser.add_argument( 541 | "--config", 542 | type=str, 543 | default="configs/open_deep_researcher_config.yaml", 544 | help="Path to configuration file (required)", 545 | ) 546 | 547 | parser.add_argument("--output_file", type=str, default="", help="Path to the output file") 548 | args = parser.parse_args() 549 | 550 | load_dotenv() 551 | 552 | if os.environ.get("TOGETHER_API_KEY") is None or os.environ.get("TAVILY_API_KEY") is None: 553 | raise ValueError("TOGETHER_API_KEY and TAVILY_API_KEY must be set") 554 | 555 | # Use the agent factory with the config file 556 | logging.info(f"Using configuration from file: {args.config}") 557 | 558 | # Get the researcher instance directly instead of the callable function 559 | researcher_instance = create_agent(args.config, return_instance=True) 560 | 561 | # Get the callable function 562 | def researcher(topic): 563 | return researcher_instance(topic) 564 | 565 | topic = input("Enter your research topic: ") 566 | 567 | answer = researcher(topic) 568 | 569 | toc_image_url = None 570 | base64_audio = None 571 | audio_bytes = None 572 | 573 | if args.add_toc_image: 574 | # Use the instance's prompts 575 | toc_image_url = generate_toc_image( 576 | researcher_instance.prompts["data_visualization_prompt"], "together_ai/deepseek-ai/DeepSeek-V3", topic 577 | ) 578 | logging.info(f"Table of contents image generated: {toc_image_url}") 579 | 580 | sanitized_topic_name = re.sub(r'[\\/*?:"<>|]', "_", topic) 581 | 582 | if args.write_podcast: 583 | logging.info("Generating podcast") 584 | script = generate_podcast_script( 585 | system_prompt=researcher_instance.prompts["create_podcast_script_prompt"], 586 | input_text=answer, 587 | podcast_name=f"Research on {topic[:50]}" if len(topic) > 50 else f"Research on {topic}", 588 | ) 589 | audio_bytes = generate_podcast_audio(script) 590 | podcast_filename = f"podcast_{sanitized_topic_name}.mp3" if args.output_file == "" else f"{args.output_file}.mp3" 591 | save_podcast_to_disk(audio_bytes, podcast_filename) 592 | 593 | base64_audio = get_base64_audio(audio_bytes) 594 | 595 | if args.write_pdf: 596 | filename = f"research_report_{sanitized_topic_name}.pdf" if args.output_file == "" else f"{args.output_file}.pdf" 597 | generate_pdf(answer, filename) 598 | 599 | if args.write_html: 600 | filename = f"research_report_{sanitized_topic_name}.html" if args.output_file == "" else f"{args.output_file}.html" 601 | save_and_generate_html(answer, filename, toc_image_url, base64_audio=base64_audio) 602 | 603 | print(answer) 604 | 605 | 606 | if __name__ == "__main__": 607 | main() 608 | -------------------------------------------------------------------------------- /src/webapp.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gradio as gr 4 | 5 | from libs.utils.generation import generate_html 6 | from libs.utils.llms import generate_toc_image 7 | from libs.utils.podcast import full_podcast_generation 8 | from together_open_deep_research import DeepResearcher 9 | 10 | 11 | def wrap_in_iframe(html_content, width="100%", height="600px"): 12 | iframe_html = f""" 13 | 20 | """ 21 | return iframe_html 22 | 23 | def func(query, budget, max_queries, max_sources, 24 | planning_model, summarization_model, json_model, answer_model, 25 | generate_podcast=True, progress=gr.Progress()): 26 | 27 | if not os.environ.get("TOGETHER_API_KEY") or not os.environ.get("TAVILY_API_KEY"): 28 | missing_keys = [] 29 | if not os.environ.get("TOGETHER_API_KEY"): 30 | missing_keys.append("TOGETHER_API_KEY") 31 | if not os.environ.get("TAVILY_API_KEY"): 32 | missing_keys.append("TAVILY_API_KEY") 33 | 34 | error_message = f"Missing API keys in environment variables: {', '.join(missing_keys)}." 35 | return gr.Warning(error_message) 36 | 37 | try: 38 | researcher = DeepResearcher( 39 | budget=budget, 40 | max_queries=max_queries if max_queries > 0 else -1, 41 | max_sources=max_sources if max_sources > 0 else -1, 42 | planning_model=planning_model, 43 | summarization_model=summarization_model, 44 | json_model=json_model, 45 | answer_model=answer_model, 46 | observer=progress 47 | ) 48 | 49 | answer = researcher(query) 50 | 51 | progress(0.98, "Generating Cover Image") 52 | 53 | toc_image_url = generate_toc_image( 54 | researcher.prompts["data_visualization_prompt"], answer_model, query 55 | ) 56 | 57 | base64_audio = None 58 | if generate_podcast: 59 | progress(0.99, "Generating Podcast") 60 | base64_audio = full_podcast_generation(system_prompt=researcher.prompts["create_podcast_script_prompt"], text=answer) 61 | 62 | html_content = generate_html(answer, toc_image_url, base64_audio=base64_audio) 63 | 64 | iframe_content = wrap_in_iframe(html_content) 65 | 66 | return iframe_content 67 | 68 | # this is a catch ALL for the gradio app 69 | except Exception as e: 70 | error_message = f"An error occurred: {str(e)}" 71 | return gr.Warning(error_message) 72 | 73 | with gr.Blocks(title="Together Open Deep Research") as demo: 74 | gr.Markdown("# Together Open Deep Research") 75 | 76 | with gr.Accordion("⚠️ DISCLAIMER ⚠️", open=False): 77 | gr.Markdown( 78 | """**Please be aware:** 79 | 80 | - AI research tools may produce content with bias, stereotypes, or hallucinations 81 | - Results may contain inaccurate or misleading information 82 | - Always verify and fact-check the information provided 83 | - Do not make important decisions based solely on these results without independent verification""" 84 | ) 85 | 86 | with gr.Row(): 87 | query_input = gr.Textbox(placeholder="Enter your research topic-..", label="Search Topic", scale=3) 88 | 89 | with gr.Accordion("Environment Variables Requirements", open=True): 90 | gr.Markdown("""**Required Environment Variables:** 91 | - TOGETHER_API_KEY: Get from [Together AI](https://together.ai/) 92 | - TAVILY_API_KEY: Get from [Tavily](https://tavily.com/) 93 | 94 | These must be set in your environment before running the application.""") 95 | 96 | with gr.Accordion("Advanced Settings", open=False): 97 | with gr.Row(): 98 | with gr.Column(scale=1): 99 | budget = gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Research Budget (iterations)") 100 | 101 | with gr.Column(scale=1): 102 | max_queries = gr.Slider(minimum=-1, maximum=5, value=3, step=1, label="Max Queries (-1 for unlimited)") 103 | max_sources = gr.Slider(minimum=-1, maximum=10, value=10, step=1, label="Max Sources (-1 for unlimited)") 104 | 105 | with gr.Row(): 106 | planning_model = gr.Dropdown( 107 | choices=[ 108 | "together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo", 109 | "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 110 | ], 111 | value="together_ai/Qwen/Qwen2.5-72B-Instruct-Turbo", 112 | label="Planning Model" 113 | ) 114 | 115 | summarization_model = gr.Dropdown( 116 | choices=[ 117 | "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", 118 | "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 119 | ], 120 | value="together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo", 121 | label="Summarization Model" 122 | ) 123 | 124 | with gr.Row(): 125 | json_model = gr.Dropdown( 126 | choices=[ 127 | "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 128 | ], 129 | value="together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", 130 | label="JSON Model" 131 | ) 132 | 133 | answer_model = gr.Dropdown( 134 | choices=[ 135 | "together_ai/deepseek-ai/DeepSeek-V3", 136 | "together_ai/deepseek-ai/DeepSeek-R1-Distill-Llama-70B", 137 | ], 138 | value="together_ai/deepseek-ai/DeepSeek-V3", 139 | label="Answer Generation Model" 140 | ) 141 | 142 | with gr.Row(): 143 | generate_podcast = gr.Checkbox(value=True, label="Generate Podcast") 144 | 145 | 146 | 147 | with gr.Row(): 148 | submit_btn = gr.Button("Submit", variant="primary") 149 | 150 | with gr.Row(): 151 | output = gr.HTML(value="

Enter a search query and click Submit to see results here

") 152 | 153 | submit_btn.click( 154 | fn=func, 155 | inputs=[ 156 | query_input, budget, max_queries, max_sources, 157 | planning_model, summarization_model, json_model, answer_model, 158 | generate_podcast 159 | ], 160 | outputs=output 161 | , concurrency_limit=10) 162 | 163 | demo.launch() --------------------------------------------------------------------------------