├── assets
└── demo.mp4
├── src
├── __init__.py
├── utils
│ ├── __init__.py
│ ├── cache.py
│ ├── history.py
│ ├── citations.py
│ ├── exports.py
│ ├── credibility.py
│ ├── web_utils.py
│ └── tools.py
├── state.py
├── llm_tracker.py
├── config.py
├── graph.py
├── callbacks.py
└── agents.py
├── .gitignore
├── requirements.txt
├── pyproject.toml
├── LICENSE
├── main.py
├── outputs
├── small language models_20251113_230645.txt
└── small language models_20251113_230645.md
├── app.py
└── README.md
/assets/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tarun7r/deep-research-agent/HEAD/assets/demo.mp4
--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research Agent - An efficient research agent using LangGraph."""
2 |
3 | __version__ = "0.1.0"
4 |
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # Virtual environments
6 | .venv/
7 | venv/
8 |
9 | # Environment variables
10 | .env
11 |
12 | # IDE
13 | .vscode/
14 | .idea/
15 |
16 | # OS
17 | .DS_Store
18 |
19 | # Project
20 | .cache/
21 | .chainlit/
22 | .files/
23 | outputs/
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langgraph>=0.2.57
2 | langchain>=0.3.13
3 | langchain-core>=0.3.13
4 | langchain-google-genai>=2.0.8
5 | langchain-ollama>=0.2.0
6 | langchain-openai>=0.2.0
7 | langchain-community>=0.3.13
8 | ddgs>=1.0.0
9 | python-dotenv>=1.0.1
10 | beautifulsoup4>=4.12.3
11 | requests>=2.32.3
12 | markdown>=3.7
13 | pydantic>=2.10.6
14 | aiohttp>=3.11.11
15 | chainlit>=1.0.0
16 |
17 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "deep-research-agent"
3 | version = "0.1.0"
4 | description = "An efficient deep research agent using LangGraph with free tools"
5 | requires-python = ">=3.11"
6 | dependencies = [
7 | "langgraph>=0.2.57",
8 | "langchain>=0.3.13",
9 | "langchain-google-genai>=2.0.8",
10 | "langchain-community>=0.3.13",
11 | "duckduckgo-search>=7.0.0",
12 | "python-dotenv>=1.0.1",
13 | "beautifulsoup4>=4.12.3",
14 | "requests>=2.32.3",
15 | "markdown>=3.7",
16 | "pydantic>=2.10.6",
17 | "aiohttp>=3.11.11",
18 | ]
19 |
20 | [project.optional-dependencies]
21 | dev = [
22 | "pytest>=8.3.4",
23 | "black>=24.10.0",
24 | "ruff>=0.8.4",
25 | ]
26 |
27 | [build-system]
28 | requires = ["setuptools>=61.0"]
29 | build-backend = "setuptools.build_meta"
30 |
31 | [tool.black]
32 | line-length = 100
33 | target-version = ['py311']
34 |
35 | [tool.ruff]
36 | line-length = 100
37 | target-version = "py311"
38 |
39 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Deep Research Agent
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utility modules for the Deep Research Agent."""
2 |
3 | # LLM-invokable tools
4 | from src.utils.tools import (
5 | get_research_tools,
6 | web_search,
7 | extract_webpage_content,
8 | analyze_research_topic,
9 | extract_insights_from_text,
10 | format_citation,
11 | validate_section_quality,
12 | all_research_tools
13 | )
14 |
15 | # Web utilities (for internal use)
16 | from src.utils.web_utils import WebSearchTool, ContentExtractor, is_valid_url
17 |
18 | # Other utilities
19 | from src.utils.cache import ResearchCache
20 | from src.utils.exports import ReportExporter
21 | from src.utils.credibility import CredibilityScorer
22 | from src.utils.citations import CitationFormatter
23 | from src.utils.history import ResearchHistory
24 |
25 | __all__ = [
26 | # LLM Tools
27 | 'research_tools',
28 | 'get_research_tools',
29 | 'web_search',
30 | 'extract_webpage_content',
31 | # Web Utils
32 | 'WebSearchTool',
33 | 'ContentExtractor',
34 | 'is_valid_url',
35 | # Other Utils
36 | 'ResearchCache',
37 | 'ReportExporter',
38 | 'CredibilityScorer',
39 | 'CitationFormatter',
40 | 'ResearchHistory',
41 | ]
42 |
43 |
--------------------------------------------------------------------------------
/src/utils/cache.py:
--------------------------------------------------------------------------------
1 | """Caching layer for research results to avoid redundant searches."""
2 |
3 | import json
4 | import hashlib
5 | from pathlib import Path
6 | from typing import Optional, Dict, Any
7 | from datetime import datetime, timedelta
8 | import logging
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class ResearchCache:
14 | """Simple file-based cache for research results."""
15 |
16 | def __init__(self, cache_dir: Path = Path(".cache/research")):
17 | self.cache_dir = cache_dir
18 | self.cache_dir.mkdir(parents=True, exist_ok=True)
19 | self.cache_file = self.cache_dir / "cache.json"
20 | self.cache_ttl_days = 7 # Cache expires after 7 days
21 |
22 | # Load existing cache
23 | self._cache: Dict[str, Dict[str, Any]] = self._load_cache()
24 |
25 | def _load_cache(self) -> Dict[str, Dict[str, Any]]:
26 | """Load cache from disk."""
27 | if self.cache_file.exists():
28 | try:
29 | with open(self.cache_file, 'r', encoding='utf-8') as f:
30 | cache = json.load(f)
31 | # Filter expired entries
32 | now = datetime.now()
33 | valid_cache = {}
34 | for key, value in cache.items():
35 | cached_time = datetime.fromisoformat(value.get('timestamp', '2000-01-01'))
36 | if (now - cached_time).days < self.cache_ttl_days:
37 | valid_cache[key] = value
38 | return valid_cache
39 | except Exception as e:
40 | logger.warning(f"Failed to load cache: {e}")
41 | return {}
42 | return {}
43 |
44 | def _save_cache(self):
45 | """Save cache to disk."""
46 | try:
47 | with open(self.cache_file, 'w', encoding='utf-8') as f:
48 | json.dump(self._cache, f, indent=2, default=str)
49 | except Exception as e:
50 | logger.warning(f"Failed to save cache: {e}")
51 |
52 | def _get_key(self, topic: str) -> str:
53 | """Generate cache key from topic."""
54 | # Normalize topic (lowercase, strip whitespace)
55 | normalized = topic.lower().strip()
56 | return hashlib.md5(normalized.encode()).hexdigest()
57 |
58 | def get(self, topic: str) -> Optional[Dict[str, Any]]:
59 | """Get cached research result for a topic."""
60 | key = self._get_key(topic)
61 | if key in self._cache:
62 | logger.info(f"Cache hit for topic: {topic}")
63 | return self._cache[key].get('data')
64 | logger.info(f"Cache miss for topic: {topic}")
65 | return None
66 |
67 | def set(self, topic: str, data: Dict[str, Any]):
68 | """Cache research result for a topic."""
69 | key = self._get_key(topic)
70 | self._cache[key] = {
71 | 'topic': topic,
72 | 'data': data,
73 | 'timestamp': datetime.now().isoformat()
74 | }
75 | self._save_cache()
76 | logger.info(f"Cached research result for topic: {topic}")
77 |
78 | def clear(self):
79 | """Clear all cached entries."""
80 | self._cache = {}
81 | self._save_cache()
82 | logger.info("Cache cleared")
83 |
84 | def get_stats(self) -> Dict[str, Any]:
85 | """Get cache statistics."""
86 | return {
87 | 'total_entries': len(self._cache),
88 | 'cache_dir': str(self.cache_dir),
89 | 'cache_file': str(self.cache_file)
90 | }
91 |
92 |
--------------------------------------------------------------------------------
/src/state.py:
--------------------------------------------------------------------------------
1 | """State management for the Deep Research Agent."""
2 |
3 | from typing import Annotated, List, Dict, Optional, Literal
4 | from pydantic import BaseModel, Field
5 | from langgraph.graph import MessagesState
6 | from langchain_core.messages import BaseMessage
7 |
8 |
9 | class SearchQuery(BaseModel):
10 | """A search query with metadata."""
11 | query: str = Field(description="The search query text")
12 | purpose: str = Field(description="Why this query is being made")
13 | completed: bool = Field(default=False)
14 |
15 |
16 | class SearchResult(BaseModel):
17 | """A search result with content."""
18 | query: str = Field(description="The original query")
19 | title: str = Field(description="Result title")
20 | url: str = Field(description="Result URL")
21 | snippet: str = Field(description="Result snippet/summary")
22 | content: Optional[str] = Field(default=None, description="Full scraped content if available")
23 |
24 |
25 | class ReportSection(BaseModel):
26 | """A section of the research report."""
27 | title: str = Field(description="Section title")
28 | content: str = Field(description="Section content in markdown")
29 | sources: List[str] = Field(default_factory=list, description="Source URLs used")
30 |
31 |
32 | class ResearchPlan(BaseModel):
33 | """Research plan with queries and outline."""
34 | topic: str = Field(description="The research topic")
35 | objectives: List[str] = Field(description="Research objectives")
36 | search_queries: List[SearchQuery] = Field(description="Search queries to execute")
37 | report_outline: List[str] = Field(description="Outline of report sections")
38 |
39 |
40 | class ResearchState(BaseModel):
41 | """State for the research workflow."""
42 |
43 | # User input
44 | research_topic: str = Field(description="The topic to research")
45 |
46 | # Planning phase
47 | plan: Optional[ResearchPlan] = Field(default=None, description="Research plan")
48 |
49 | # Search phase
50 | search_results: List[SearchResult] = Field(
51 | default_factory=list,
52 | description="All search results collected"
53 | )
54 |
55 | # Synthesis phase
56 | key_findings: List[str] = Field(
57 | default_factory=list,
58 | description="Key findings extracted from search results"
59 | )
60 |
61 | # Report generation phase
62 | report_sections: List[ReportSection] = Field(
63 | default_factory=list,
64 | description="Generated report sections"
65 | )
66 |
67 | final_report: Optional[str] = Field(
68 | default=None,
69 | description="Complete final report in markdown"
70 | )
71 |
72 | # Workflow control
73 | current_stage: Literal[
74 | "planning", "searching", "synthesizing", "reporting", "complete"
75 | ] = Field(default="planning")
76 |
77 | error: Optional[str] = Field(default=None, description="Error message if any")
78 |
79 | # Metadata
80 | iterations: int = Field(default=0, description="Number of iterations")
81 |
82 | # Quality and metrics
83 | quality_score: Optional[Dict] = Field(default=None, description="Report quality metrics")
84 | credibility_scores: List[Dict] = Field(default_factory=list, description="Source credibility scores")
85 |
86 | # LLM tracking
87 | llm_calls: int = Field(default=0, description="Total number of LLM API calls")
88 | total_input_tokens: int = Field(default=0, description="Total input tokens used")
89 | total_output_tokens: int = Field(default=0, description="Total output tokens generated")
90 | llm_call_details: List[Dict] = Field(default_factory=list, description="Details of each LLM call")
91 |
92 | class Config:
93 | arbitrary_types_allowed = True
94 |
95 |
--------------------------------------------------------------------------------
/src/utils/history.py:
--------------------------------------------------------------------------------
1 | """Research history tracking and persistence."""
2 |
3 | import json
4 | from pathlib import Path
5 | from typing import List, Dict, Optional
6 | from datetime import datetime
7 | import logging
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class ResearchHistory:
13 | """Track and manage research history."""
14 |
15 | def __init__(self, history_file: Path = Path(".cache/research_history.json")):
16 | self.history_file = history_file
17 | self.history_file.parent.mkdir(parents=True, exist_ok=True)
18 | self._history: List[Dict] = self._load_history()
19 |
20 | def _load_history(self) -> List[Dict]:
21 | """Load history from disk."""
22 | if self.history_file.exists():
23 | try:
24 | with open(self.history_file, 'r', encoding='utf-8') as f:
25 | return json.load(f)
26 | except Exception as e:
27 | logger.warning(f"Failed to load history: {e}")
28 | return []
29 | return []
30 |
31 | def _save_history(self):
32 | """Save history to disk."""
33 | try:
34 | with open(self.history_file, 'w', encoding='utf-8') as f:
35 | json.dump(self._history, f, indent=2, default=str)
36 | except Exception as e:
37 | logger.warning(f"Failed to save history: {e}")
38 |
39 | def add_research(
40 | self,
41 | topic: str,
42 | output_file: Optional[Path] = None,
43 | quality_score: Optional[Dict] = None,
44 | metadata: Optional[Dict] = None
45 | ):
46 | """Add a research entry to history."""
47 | entry = {
48 | 'topic': topic,
49 | 'timestamp': datetime.now().isoformat(),
50 | 'output_file': str(output_file) if output_file else None,
51 | 'quality_score': quality_score,
52 | 'metadata': metadata or {}
53 | }
54 |
55 | # Add to beginning of list (most recent first)
56 | self._history.insert(0, entry)
57 |
58 | # Keep only last 100 entries
59 | if len(self._history) > 100:
60 | self._history = self._history[:100]
61 |
62 | self._save_history()
63 | logger.info(f"Added research to history: {topic}")
64 |
65 | def get_recent(self, limit: int = 10) -> List[Dict]:
66 | """Get recent research entries."""
67 | return self._history[:limit]
68 |
69 | def search_history(self, query: str) -> List[Dict]:
70 | """Search history by topic."""
71 | query_lower = query.lower()
72 | return [
73 | entry for entry in self._history
74 | if query_lower in entry.get('topic', '').lower()
75 | ]
76 |
77 | def get_by_topic(self, topic: str) -> Optional[Dict]:
78 | """Get most recent research for a topic."""
79 | for entry in self._history:
80 | if entry.get('topic', '').lower() == topic.lower():
81 | return entry
82 | return None
83 |
84 | def clear_history(self):
85 | """Clear all history."""
86 | self._history = []
87 | self._save_history()
88 | logger.info("History cleared")
89 |
90 | def get_stats(self) -> Dict:
91 | """Get history statistics."""
92 | if not self._history:
93 | return {
94 | 'total_researches': 0,
95 | 'oldest': None,
96 | 'newest': None
97 | }
98 |
99 | timestamps = [datetime.fromisoformat(e['timestamp']) for e in self._history if 'timestamp' in e]
100 |
101 | return {
102 | 'total_researches': len(self._history),
103 | 'oldest': min(timestamps).isoformat() if timestamps else None,
104 | 'newest': max(timestamps).isoformat() if timestamps else None
105 | }
106 |
107 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """Main entry point for the Deep Research Agent."""
2 |
3 | import asyncio
4 | import sys
5 | from pathlib import Path
6 | import logging
7 |
8 | from src.config import config
9 | from src.graph import run_research
10 |
11 | logging.basicConfig(
12 | level=logging.INFO,
13 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
14 | )
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | async def main():
19 | """Main function to run the research agent."""
20 |
21 | # Validate configuration
22 | try:
23 | config.validate_config()
24 | except ValueError as e:
25 | logger.error(f"Configuration error: {e}")
26 | sys.exit(1)
27 |
28 | # Get research topic
29 | if len(sys.argv) > 1:
30 | topic = " ".join(sys.argv[1:])
31 | else:
32 | print("\nDeep Research Agent")
33 | print("=" * 50)
34 | topic = input("\nEnter your research topic: ").strip()
35 |
36 | if not topic:
37 | logger.error("No research topic provided")
38 | sys.exit(1)
39 |
40 | print(f"\n[INFO] Starting deep research on: {topic}\n")
41 | print("This may take several minutes. Please wait...\n")
42 |
43 | try:
44 | # Run the research workflow
45 | final_state = await run_research(topic, verbose=True)
46 |
47 | # LangGraph returns dict with state - access fields directly
48 | # Check for errors
49 | if final_state.get("error"):
50 | logger.error(f"Research failed: {final_state.get('error')}")
51 | sys.exit(1)
52 |
53 | # Display results
54 | print("\n" + "=" * 80)
55 | print("RESEARCH COMPLETE")
56 | print("=" * 80)
57 |
58 | if final_state.get("plan"):
59 | plan = final_state["plan"]
60 | print(f"\nResearch Plan Summary:")
61 | print(f" - Objectives: {len(plan.objectives)}")
62 | print(f" - Search Queries: {len(plan.search_queries)}")
63 | print(f" - Report Sections: {len(plan.report_outline)}")
64 |
65 | print(f"\nResearch Data Summary:")
66 | print(f" - Search Results: {len(final_state.get('search_results', []))}")
67 | print(f" - Key Findings: {len(final_state.get('key_findings', []))}")
68 | print(f" - Report Sections: {len(final_state.get('report_sections', []))}")
69 | print(f" - Iterations: {final_state.get('iterations', 0)}")
70 |
71 | # Save the report
72 | if final_state.get("final_report"):
73 | output_dir = Path("outputs")
74 | output_dir.mkdir(exist_ok=True)
75 |
76 | # Create safe filename
77 | safe_topic = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in topic)
78 | safe_topic = safe_topic[:50].strip()
79 |
80 | output_file = output_dir / f"{safe_topic}.md"
81 | final_report = final_state["final_report"]
82 | output_file.write_text(final_report, encoding='utf-8')
83 |
84 | print(f"\n[SUCCESS] Report saved to: {output_file}")
85 | print(f" Report length: {len(final_report)} characters")
86 |
87 | # Display a preview
88 | print("\n" + "=" * 80)
89 | print("REPORT PREVIEW")
90 | print("=" * 80)
91 | print(final_report[:1500])
92 | if len(final_report) > 1500:
93 | print(f"\n... (showing first 1500 of {len(final_report)} characters)")
94 | print("\n" + "=" * 80)
95 |
96 | else:
97 | logger.warning("No report was generated")
98 |
99 | except KeyboardInterrupt:
100 | print("\n\n[WARNING] Research interrupted by user")
101 | sys.exit(0)
102 | except Exception as e:
103 | logger.error(f"[ERROR] Unexpected error: {e}", exc_info=True)
104 | sys.exit(1)
105 |
106 |
107 | if __name__ == "__main__":
108 | asyncio.run(main())
109 |
110 |
--------------------------------------------------------------------------------
/src/utils/citations.py:
--------------------------------------------------------------------------------
1 | """Citation formatting utilities for different citation styles."""
2 |
3 | from typing import List, Dict
4 | from datetime import datetime
5 | import re
6 | import logging
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | class CitationFormatter:
12 | """Format citations in different academic styles."""
13 |
14 | def __init__(self):
15 | self.styles = ['apa', 'mla', 'chicago', 'ieee']
16 |
17 | def format_apa(self, url: str, title: str = "", author: str = "", date: str = "") -> str:
18 | """Format citation in APA style."""
19 | if author and date:
20 | return f"{author} ({date}). {title}. Retrieved from {url}"
21 | elif title:
22 | return f"{title}. (n.d.). Retrieved from {url}"
23 | else:
24 | return f"Retrieved from {url}"
25 |
26 | def format_mla(self, url: str, title: str = "", author: str = "", date: str = "") -> str:
27 | """Format citation in MLA style."""
28 | parts = []
29 | if author:
30 | parts.append(author)
31 | if title:
32 | parts.append(f'"{title}"')
33 | if date:
34 | parts.append(date)
35 | parts.append(f"Web. {datetime.now().strftime('%d %b. %Y')}")
36 | parts.append(f"<{url}>")
37 | return ". ".join(parts)
38 |
39 | def format_chicago(self, url: str, title: str = "", author: str = "", date: str = "") -> str:
40 | """Format citation in Chicago style."""
41 | if author:
42 | return f"{author}. \"{title}.\" Accessed {datetime.now().strftime('%B %d, %Y')}. {url}."
43 | else:
44 | return f"\"{title}.\" Accessed {datetime.now().strftime('%B %d, %Y')}. {url}."
45 |
46 | def format_ieee(self, url: str, title: str = "", author: str = "", date: str = "") -> str:
47 | """Format citation in IEEE style."""
48 | if author:
49 | return f"{author}, \"{title},\" {url}, accessed {datetime.now().strftime('%B %d, %Y')}."
50 | else:
51 | return f"\"{title},\" {url}, accessed {datetime.now().strftime('%B %d, %Y')}."
52 |
53 | def format_references_section(
54 | self,
55 | urls: List[str],
56 | style: str = 'apa',
57 | search_results: List = None
58 | ) -> str:
59 | """Format a references section in the specified style.
60 |
61 | Args:
62 | urls: List of URLs to cite
63 | style: Citation style ('apa', 'mla', 'chicago', 'ieee')
64 | search_results: Optional search results to extract metadata
65 |
66 | Returns:
67 | Formatted references section
68 | """
69 | style = style.lower()
70 | if style not in self.styles:
71 | style = 'apa'
72 | logger.warning(f"Unknown style {style}, defaulting to APA")
73 |
74 | # Create URL to metadata mapping
75 | url_metadata = {}
76 | if search_results:
77 | for result in search_results:
78 | if hasattr(result, 'url') and result.url:
79 | url_metadata[result.url] = {
80 | 'title': getattr(result, 'title', ''),
81 | 'snippet': getattr(result, 'snippet', '')
82 | }
83 |
84 | references = []
85 | for i, url in enumerate(urls, 1):
86 | metadata = url_metadata.get(url, {})
87 | title = metadata.get('title', '')
88 |
89 | if style == 'apa':
90 | citation = self.format_apa(url, title)
91 | elif style == 'mla':
92 | citation = self.format_mla(url, title)
93 | elif style == 'chicago':
94 | citation = self.format_chicago(url, title)
95 | elif style == 'ieee':
96 | citation = self.format_ieee(url, title)
97 | else:
98 | citation = url
99 |
100 | references.append(f"{i}. {citation}")
101 |
102 | return "\n".join(references)
103 |
104 | def update_report_citations(
105 | self,
106 | report_content: str,
107 | style: str = 'apa',
108 | search_results: List = None
109 | ) -> str:
110 | """Update citations in a report to use specified style.
111 |
112 | This updates the references section but keeps inline citations as [1], [2], etc.
113 | """
114 | # Extract URLs from references section
115 | references_match = re.search(
116 | r'## References\n\n(.*?)(?=\n##|\Z)',
117 | report_content,
118 | re.DOTALL
119 | )
120 |
121 | if not references_match:
122 | return report_content
123 |
124 | # Extract URLs from existing references
125 | url_pattern = r'https?://[^\s\)]+'
126 | existing_refs = references_match.group(1)
127 | urls = re.findall(url_pattern, existing_refs)
128 |
129 | if not urls:
130 | return report_content
131 |
132 | # Format new references section
133 | new_references = f"## References\n\n{self.format_references_section(urls, style, search_results)}"
134 |
135 | # Replace references section
136 | updated_report = re.sub(
137 | r'## References\n\n.*?(?=\n##|\Z)',
138 | new_references,
139 | report_content,
140 | flags=re.DOTALL
141 | )
142 |
143 | return updated_report
144 |
145 |
--------------------------------------------------------------------------------
/src/llm_tracker.py:
--------------------------------------------------------------------------------
1 | """LLM call tracking and token usage monitoring."""
2 |
3 | from typing import Dict, Optional, Any
4 | import logging
5 | from functools import wraps
6 | import time
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | class LLMCallTracker:
12 | """Track LLM calls and token usage."""
13 |
14 | def __init__(self):
15 | self.calls = []
16 | self.total_input_tokens = 0
17 | self.total_output_tokens = 0
18 |
19 | def track_call(
20 | self,
21 | agent_name: str,
22 | operation: str,
23 | input_tokens: int = 0,
24 | output_tokens: int = 0,
25 | duration: float = 0.0,
26 | model: str = "",
27 | success: bool = True,
28 | error: Optional[str] = None
29 | ) -> Dict[str, Any]:
30 | """Track an LLM call."""
31 | call_info = {
32 | 'agent': agent_name,
33 | 'operation': operation,
34 | 'model': model,
35 | 'input_tokens': input_tokens,
36 | 'output_tokens': output_tokens,
37 | 'total_tokens': input_tokens + output_tokens,
38 | 'duration': round(duration, 2),
39 | 'success': success,
40 | 'error': error,
41 | 'timestamp': time.time()
42 | }
43 |
44 | self.calls.append(call_info)
45 | self.total_input_tokens += input_tokens
46 | self.total_output_tokens += output_tokens
47 |
48 | logger.info(
49 | f"LLM Call [{agent_name}/{operation}]: "
50 | f"{input_tokens} in + {output_tokens} out = {input_tokens + output_tokens} tokens "
51 | f"({duration:.2f}s)"
52 | )
53 |
54 | return call_info
55 |
56 | def get_summary(self) -> Dict[str, Any]:
57 | """Get summary of all LLM calls."""
58 | total_tokens = self.total_input_tokens + self.total_output_tokens
59 | total_duration = sum(call['duration'] for call in self.calls)
60 |
61 | # Group by agent
62 | by_agent = {}
63 | for call in self.calls:
64 | agent = call['agent']
65 | if agent not in by_agent:
66 | by_agent[agent] = {
67 | 'calls': 0,
68 | 'input_tokens': 0,
69 | 'output_tokens': 0,
70 | 'total_tokens': 0,
71 | 'duration': 0.0
72 | }
73 | by_agent[agent]['calls'] += 1
74 | by_agent[agent]['input_tokens'] += call['input_tokens']
75 | by_agent[agent]['output_tokens'] += call['output_tokens']
76 | by_agent[agent]['total_tokens'] += call['total_tokens']
77 | by_agent[agent]['duration'] += call['duration']
78 |
79 | return {
80 | 'total_calls': len(self.calls),
81 | 'total_input_tokens': self.total_input_tokens,
82 | 'total_output_tokens': self.total_output_tokens,
83 | 'total_tokens': total_tokens,
84 | 'total_duration': round(total_duration, 2),
85 | 'by_agent': by_agent,
86 | 'successful_calls': sum(1 for c in self.calls if c['success']),
87 | 'failed_calls': sum(1 for c in self.calls if not c['success'])
88 | }
89 |
90 | def get_calls(self) -> list:
91 | """Get all tracked calls."""
92 | return self.calls
93 |
94 |
95 | def estimate_tokens(text: str) -> int:
96 | """Estimate token count for text (rough approximation: 1 token ≈ 4 chars)."""
97 | return max(1, len(text) // 4)
98 |
99 |
100 | def track_llm_call(agent_name: str, operation: str, model: str = ""):
101 | """Decorator to track LLM calls."""
102 | def decorator(func):
103 | @wraps(func)
104 | async def async_wrapper(*args, **kwargs):
105 | start_time = time.time()
106 | try:
107 | result = await func(*args, **kwargs)
108 | duration = time.time() - start_time
109 |
110 | # Try to extract token info from result if available
111 | input_tokens = kwargs.get('_input_tokens', 0)
112 | output_tokens = kwargs.get('_output_tokens', 0)
113 |
114 | # If not provided, estimate based on result
115 | if output_tokens == 0 and isinstance(result, str):
116 | output_tokens = estimate_tokens(result)
117 |
118 | return result, {
119 | 'agent': agent_name,
120 | 'operation': operation,
121 | 'model': model,
122 | 'input_tokens': input_tokens,
123 | 'output_tokens': output_tokens,
124 | 'duration': duration,
125 | 'success': True
126 | }
127 | except Exception as e:
128 | duration = time.time() - start_time
129 | logger.error(f"LLM call failed: {e}")
130 | raise
131 |
132 | @wraps(func)
133 | def sync_wrapper(*args, **kwargs):
134 | start_time = time.time()
135 | try:
136 | result = func(*args, **kwargs)
137 | duration = time.time() - start_time
138 | return result
139 | except Exception as e:
140 | duration = time.time() - start_time
141 | logger.error(f"LLM call failed: {e}")
142 | raise
143 |
144 | # Return appropriate wrapper based on function type
145 | import inspect
146 | if inspect.iscoroutinefunction(func):
147 | return async_wrapper
148 | else:
149 | return sync_wrapper
150 |
151 | return decorator
152 |
153 |
--------------------------------------------------------------------------------
/src/utils/exports.py:
--------------------------------------------------------------------------------
1 | """Export research reports to various formats."""
2 |
3 | from pathlib import Path
4 | from typing import Optional
5 | import logging
6 | import markdown
7 | from datetime import datetime
8 |
9 | logger = logging.getLogger(__name__)
10 |
11 |
12 | class ReportExporter:
13 | """Export reports to various formats."""
14 |
15 | def __init__(self):
16 | self.supported_formats = ['markdown', 'html', 'txt']
17 |
18 | def export_markdown(self, content: str, output_path: Path) -> Path:
19 | """Export as markdown (already in markdown format)."""
20 | output_path.write_text(content, encoding='utf-8')
21 | logger.info(f"Exported markdown to {output_path}")
22 | return output_path
23 |
24 | def export_html(self, content: str, output_path: Path) -> Path:
25 | """Export as HTML with styling."""
26 | # Convert markdown to HTML
27 | html_content = markdown.markdown(
28 | content,
29 | extensions=['extra', 'codehilite', 'tables']
30 | )
31 |
32 | # Wrap in styled HTML template
33 | html_template = f"""
34 |
35 |
36 |
37 |
38 | Research Report
39 |
111 |
112 |
113 | {html_content}
114 |
117 |
118 | """
119 |
120 | output_path.write_text(html_template, encoding='utf-8')
121 | logger.info(f"Exported HTML to {output_path}")
122 | return output_path
123 |
124 | def export_txt(self, content: str, output_path: Path) -> Path:
125 | """Export as plain text (strip markdown)."""
126 | import re
127 | # Remove markdown formatting
128 | text = content
129 | # Remove headers
130 | text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE)
131 | # Remove bold/italic
132 | text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
133 | text = re.sub(r'\*([^*]+)\*', r'\1', text)
134 | # Remove links but keep text
135 | text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
136 | # Remove code blocks
137 | text = re.sub(r'```[^`]+```', '', text, flags=re.DOTALL)
138 | text = re.sub(r'`([^`]+)`', r'\1', text)
139 |
140 | output_path.write_text(text, encoding='utf-8')
141 | logger.info(f"Exported text to {output_path}")
142 | return output_path
143 |
144 | def export(self, content: str, output_path: Path, format: str = 'markdown') -> Path:
145 | """Export content to specified format.
146 |
147 | Args:
148 | content: Report content (markdown)
149 | output_path: Output file path
150 | format: Export format ('markdown', 'html', 'txt')
151 |
152 | Returns:
153 | Path to exported file
154 | """
155 | format = format.lower()
156 |
157 | if format not in self.supported_formats:
158 | raise ValueError(f"Unsupported format: {format}. Supported: {self.supported_formats}")
159 |
160 | # Adjust file extension if needed
161 | if format == 'html' and not output_path.suffix == '.html':
162 | output_path = output_path.with_suffix('.html')
163 | elif format == 'txt' and not output_path.suffix == '.txt':
164 | output_path = output_path.with_suffix('.txt')
165 | elif format == 'markdown' and not output_path.suffix in ['.md', '.markdown']:
166 | output_path = output_path.with_suffix('.md')
167 |
168 | if format == 'markdown':
169 | return self.export_markdown(content, output_path)
170 | elif format == 'html':
171 | return self.export_html(content, output_path)
172 | elif format == 'txt':
173 | return self.export_txt(content, output_path)
174 | else:
175 | raise ValueError(f"Export not implemented for format: {format}")
176 |
177 |
--------------------------------------------------------------------------------
/src/config.py:
--------------------------------------------------------------------------------
1 | """Configuration management for the Deep Research Agent."""
2 |
3 | import os
4 | from typing import Optional
5 | from pathlib import Path
6 | from pydantic import BaseModel, Field
7 | from dotenv import load_dotenv
8 |
9 | # Load environment variables from .env file
10 | env_path = Path(__file__).parent.parent / ".env"
11 | load_dotenv(dotenv_path=env_path)
12 |
13 |
14 | class ResearchConfig(BaseModel):
15 | """Configuration for the research agent."""
16 |
17 | # Model Provider Configuration
18 | model_provider: str = Field(
19 | default=os.getenv("MODEL_PROVIDER", "gemini"),
20 | description="Model provider: 'gemini', 'ollama', 'openai', or 'llamacpp'"
21 | )
22 |
23 | # API Keys
24 | google_api_key: str = Field(
25 | default_factory=lambda: os.getenv("GEMINI_API_KEY", ""),
26 | description="Google/Gemini API key (required if using Gemini)"
27 | )
28 |
29 | openai_api_key: str = Field(
30 | default_factory=lambda: os.getenv("OPENAI_API_KEY", ""),
31 | description="OpenAI API key (required if using OpenAI)"
32 | )
33 |
34 | # Ollama Configuration
35 | ollama_base_url: str = Field(
36 | default=os.getenv("OLLAMA_BASE_URL", "http://localhost:11434"),
37 | description="Ollama server URL"
38 | )
39 |
40 | # llama.cpp Server Configuration
41 | llamacpp_base_url: str = Field(
42 | default=os.getenv("LLAMACPP_BASE_URL", "http://localhost:8080"),
43 | description="llama.cpp server URL (OpenAI-compatible API)"
44 | )
45 |
46 | # Model Configuration
47 | model_name: str = Field(
48 | default=os.getenv("MODEL_NAME", "gemini-2.5-flash"),
49 | description="Model to use for research and generation"
50 | )
51 |
52 | summarization_model: str = Field(
53 | default=os.getenv("SUMMARIZATION_MODEL", "gemini-2.5-flash"),
54 | description="Model for summarizing search results (faster/cheaper)"
55 | )
56 |
57 | # Search Configuration
58 | max_search_queries: int = Field(
59 | default=int(os.getenv("MAX_SEARCH_QUERIES", "3")),
60 | description="Maximum number of search queries to generate"
61 | )
62 |
63 | max_search_results_per_query: int = Field(
64 | default=int(os.getenv("MAX_SEARCH_RESULTS_PER_QUERY", "3")),
65 | description="Maximum results to fetch per search query"
66 | )
67 |
68 | max_parallel_searches: int = Field(
69 | default=int(os.getenv("MAX_PARALLEL_SEARCHES", "3")),
70 | description="Maximum number of parallel search operations"
71 | )
72 |
73 | # Credibility Configuration
74 | min_credibility_score: int = Field(
75 | default=int(os.getenv("MIN_CREDIBILITY_SCORE", "40")),
76 | description="Minimum credibility score (0-100) to filter low-quality sources"
77 | )
78 |
79 | # Report Configuration
80 | max_report_sections: int = Field(
81 | default=int(os.getenv("MAX_REPORT_SECTIONS", "8")),
82 | description="Maximum number of sections in the final report"
83 | )
84 |
85 | min_section_words: int = Field(
86 | default=200,
87 | description="Minimum words per section"
88 | )
89 |
90 | # Citation Configuration
91 | citation_style: str = Field(
92 | default=os.getenv("CITATION_STYLE", "apa"),
93 | description="Citation style (apa, mla, chicago, ieee)"
94 | )
95 |
96 | # LangSmith Configuration
97 | langsmith_tracing: bool = Field(
98 | default=os.getenv("LANGCHAIN_TRACING_V2", "false").lower() == "true",
99 | description="Enable LangSmith tracing"
100 | )
101 |
102 | langsmith_project: str = Field(
103 | default=os.getenv("LANGCHAIN_PROJECT", "deep-research-agent"),
104 | description="LangSmith project name"
105 | )
106 |
107 | def validate_config(self) -> bool:
108 | """Validate that required configuration is present."""
109 | if self.model_provider == "gemini":
110 | if not self.google_api_key:
111 | raise ValueError(
112 | "GEMINI_API_KEY is required when using Gemini. Get one from https://makersuite.google.com/app/apikey"
113 | )
114 | elif self.model_provider == "ollama":
115 | # Validate Ollama is accessible
116 | try:
117 | import requests
118 | response = requests.get(f"{self.ollama_base_url}/api/tags", timeout=5)
119 | if response.status_code != 200:
120 | raise ValueError(f"Ollama server not accessible at {self.ollama_base_url}")
121 | except requests.exceptions.RequestException as e:
122 | raise ValueError(f"Cannot connect to Ollama server at {self.ollama_base_url}: {e}")
123 | elif self.model_provider == "openai":
124 | if not self.openai_api_key:
125 | raise ValueError(
126 | "OPENAI_API_KEY is required when using OpenAI. Get one from https://platform.openai.com/api-keys"
127 | )
128 | elif self.model_provider == "llamacpp":
129 | # Validate llama.cpp server is accessible
130 | try:
131 | import requests
132 | response = requests.get(f"{self.llamacpp_base_url}/health", timeout=5)
133 | if response.status_code not in [200, 404]: # 404 is ok, means server is running but no health endpoint
134 | raise ValueError(f"llama.cpp server not accessible at {self.llamacpp_base_url}")
135 | except requests.exceptions.RequestException as e:
136 | raise ValueError(f"Cannot connect to llama.cpp server at {self.llamacpp_base_url}: {e}")
137 | else:
138 | raise ValueError(f"Invalid MODEL_PROVIDER: {self.model_provider}. Must be 'gemini', 'ollama', 'openai', or 'llamacpp'")
139 |
140 | return True
141 |
142 |
143 | # Global configuration instance
144 | config = ResearchConfig()
145 |
146 | # Log configuration for debugging
147 | import logging
148 | logging.basicConfig(level=logging.INFO)
149 | logger = logging.getLogger(__name__)
150 | logger.info(f"Configuration loaded - MAX_SEARCH_QUERIES: {config.max_search_queries}, "
151 | f"MAX_SEARCH_RESULTS_PER_QUERY: {config.max_search_results_per_query}, "
152 | f"MAX_REPORT_SECTIONS: {config.max_report_sections}")
153 |
154 |
--------------------------------------------------------------------------------
/src/graph.py:
--------------------------------------------------------------------------------
1 | """LangGraph workflow for deep research following best practices.
2 |
3 | Nodes return dict updates that LangGraph automatically merges into state.
4 | This is the recommended pattern per LangGraph documentation.
5 | """
6 |
7 | from langgraph.graph import StateGraph, START, END
8 | from src.state import ResearchState
9 | from src.agents import ResearchPlanner, ResearchSearcher, ResearchSynthesizer, ReportWriter
10 | from src.utils.cache import ResearchCache
11 | from src.config import config
12 | import logging
13 |
14 | logging.basicConfig(level=logging.INFO)
15 | logger = logging.getLogger(__name__)
16 |
17 |
18 | def create_research_graph():
19 | """Create the research workflow graph with enhanced routing and error handling."""
20 |
21 | # Initialize agents
22 | planner = ResearchPlanner()
23 | searcher = ResearchSearcher()
24 | synthesizer = ResearchSynthesizer()
25 | writer = ReportWriter(citation_style=config.citation_style)
26 |
27 | # Define the graph
28 | workflow = StateGraph(ResearchState)
29 |
30 | # Add nodes - functions return dicts that LangGraph merges into state
31 | workflow.add_node("plan", planner.plan)
32 | workflow.add_node("search", searcher.search)
33 | workflow.add_node("synthesize", synthesizer.synthesize)
34 | workflow.add_node("write_report", writer.write_report)
35 |
36 | # Define entry point using START constant (v1.0 best practice)
37 | workflow.add_edge(START, "plan")
38 |
39 | def should_continue_after_plan(state: ResearchState) -> str:
40 | """Validate planning output and route appropriately."""
41 | if state.error:
42 | logger.error(f"Planning failed: {state.error}")
43 | return END
44 |
45 | if not state.plan or not state.plan.search_queries:
46 | logger.error("No search queries generated in plan")
47 | state.error = "Failed to generate valid research plan"
48 | return END
49 |
50 | logger.info(f"Plan validated: {len(state.plan.search_queries)} queries")
51 | return "search"
52 |
53 | def should_continue_after_search(state: ResearchState) -> str:
54 | """Validate search results and route appropriately."""
55 | if state.error:
56 | logger.error(f"Search failed: {state.error}")
57 | return END
58 |
59 | if not state.search_results:
60 | logger.warning("No search results found")
61 | state.error = "No search results available for synthesis"
62 | return END
63 |
64 | # Check minimum threshold
65 | if len(state.search_results) < 2:
66 | logger.warning(f"Insufficient search results: {len(state.search_results)}")
67 | state.error = "Insufficient data for comprehensive research"
68 | return END
69 |
70 | logger.info(f"Search validated: {len(state.search_results)} results")
71 | return "synthesize"
72 |
73 | def should_continue_after_synthesize(state: ResearchState) -> str:
74 | """Validate synthesis output and route appropriately."""
75 | if state.error:
76 | logger.error(f"Synthesis failed: {state.error}")
77 | return END
78 |
79 | if not state.key_findings:
80 | logger.warning("No key findings extracted")
81 | state.error = "Failed to extract findings from search results"
82 | return END
83 |
84 | logger.info(f"Synthesis validated: {len(state.key_findings)} findings")
85 | return "write_report"
86 |
87 | def should_continue_after_report(state: ResearchState) -> str:
88 | """Validate final report and complete workflow."""
89 | if state.error:
90 | logger.error(f"Report generation failed: {state.error}")
91 | elif not state.final_report:
92 | logger.error("No report generated")
93 | state.error = "Report generation produced no output"
94 | else:
95 | logger.info("Report generation complete")
96 |
97 | return END
98 |
99 | # Add conditional edges with validation
100 | workflow.add_conditional_edges(
101 | "plan",
102 | should_continue_after_plan,
103 | {
104 | "search": "search",
105 | END: END
106 | }
107 | )
108 |
109 | workflow.add_conditional_edges(
110 | "search",
111 | should_continue_after_search,
112 | {
113 | "synthesize": "synthesize",
114 | END: END
115 | }
116 | )
117 |
118 | workflow.add_conditional_edges(
119 | "synthesize",
120 | should_continue_after_synthesize,
121 | {
122 | "write_report": "write_report",
123 | END: END
124 | }
125 | )
126 |
127 | workflow.add_conditional_edges(
128 | "write_report",
129 | should_continue_after_report,
130 | {
131 | END: END
132 | }
133 | )
134 |
135 | # Compile the graph
136 | return workflow.compile()
137 |
138 |
139 | async def run_research(topic: str, verbose: bool = True, use_cache: bool = True) -> dict:
140 | """Run the research workflow for a given topic.
141 |
142 | Args:
143 | topic: Research topic
144 | verbose: Enable verbose logging
145 | use_cache: Use cached results if available
146 |
147 | Returns the complete accumulated state as a dict.
148 | """
149 | logger.info(f"Starting research on: {topic}")
150 |
151 | # Check cache first
152 | cache = ResearchCache()
153 | if use_cache:
154 | cached_result = cache.get(topic)
155 | if cached_result:
156 | logger.info("Using cached research result")
157 | return cached_result
158 |
159 | # Initialize state
160 | initial_state = ResearchState(research_topic=topic)
161 |
162 | # Create and run the graph
163 | graph = create_research_graph()
164 |
165 | # Execute the workflow using invoke to get complete final state
166 | # Note: invoke runs once and returns the complete accumulated state
167 | final_state = await graph.ainvoke(initial_state)
168 |
169 | # Cache the result
170 | if use_cache and not final_state.get("error"):
171 | cache.set(topic, final_state)
172 |
173 | if verbose:
174 | logger.info("Workflow completed")
175 | if final_state.get("final_report"):
176 | logger.info(f"Report generated: {len(final_state['final_report'])} characters")
177 |
178 | return final_state
179 |
180 |
--------------------------------------------------------------------------------
/src/utils/credibility.py:
--------------------------------------------------------------------------------
1 | """Source credibility scoring based on domain authority and other factors."""
2 |
3 | import re
4 | from typing import List, Dict, Any
5 | from urllib.parse import urlparse
6 | import logging
7 |
8 | logger = logging.getLogger(__name__)
9 |
10 |
11 | class CredibilityScorer:
12 | """Score sources based on domain authority and other credibility factors."""
13 |
14 | # Trusted domains
15 | TRUSTED_DOMAINS = {
16 | # Academic institutions (global)
17 | '.edu', '.ac.uk', '.ac.in', '.edu.in', '.edu.au', '.ac.jp',
18 |
19 | # Government (global)
20 | '.gov', '.gov.uk', '.gov.au', '.gov.ca', '.gov.in', '.europa.eu',
21 |
22 | # International news organizations
23 | 'bbc.com', 'bbc.co.uk', 'reuters.com', 'ap.org', 'npr.org',
24 | 'theguardian.com', 'nytimes.com', 'washingtonpost.com', 'wsj.com',
25 | 'ft.com', 'economist.com', 'bloomberg.com', 'cnbc.com',
26 | 'cnn.com', 'aljazeera.com', 'france24.com', 'dw.com',
27 |
28 | # Indian news organizations
29 | 'thehindu.com', 'indianexpress.com', 'timesofindia.com', 'indiatimes.com',
30 | 'economictimes.com', 'financialexpress.com', 'livemint.com',
31 | 'business-standard.com', 'moneycontrol.com', 'businessline.in',
32 | 'businesstoday.in', 'businessinsider.in',
33 |
34 | # Academic & Research platforms
35 | 'arxiv.org', 'scholar.google.com', 'researchgate.net', 'semanticscholar.org',
36 | 'pubmed.ncbi.nlm.nih.gov', 'ncbi.nlm.nih.gov', 'nih.gov', 'nature.com',
37 | 'sciencedirect.com', 'springer.com', 'wiley.com', 'ieee.org',
38 | 'jstor.org', 'plos.org', 'sciencemag.org', 'cell.com',
39 |
40 | # Medical & Health organizations
41 | 'who.int', 'cdc.gov', 'mayoclinic.org', 'nih.gov', 'webmd.com',
42 |
43 | # International organizations
44 | 'un.org', 'worldbank.org', 'imf.org', 'wto.org', 'oecd.org',
45 |
46 | # Tech & Science publications
47 | 'nature.com', 'scientificamerican.com', 'newscientist.com',
48 | 'technologyreview.com', 'spectrum.ieee.org', 'arstechnica.com',
49 | 'wired.com', 'techcrunch.com', 'theverge.com',
50 |
51 | # Wikipedia & educational resources
52 | 'wikipedia.org', 'britannica.com', 'khanacademy.org',
53 |
54 | # Legal & policy
55 | 'supremecourt.gov', 'congress.gov', 'loc.gov',
56 |
57 | # Statistics & data
58 | 'census.gov', 'bls.gov', 'data.gov', 'worldbank.org',
59 | 'statista.com', 'pewresearch.org', 'gallup.com'
60 | }
61 |
62 | # Suspicious patterns
63 | SUSPICIOUS_PATTERNS = [
64 | r'\.(xyz|tk|ml|ga|cf|gq)$', # Suspicious TLDs
65 | r'bit\.ly|tinyurl|t\.co', # URL shorteners
66 | r'blogspot|wordpress\.com', # Personal blogs (lower credibility)
67 | ]
68 |
69 | def score_url(self, url: str) -> Dict[str, Any]:
70 | """Score a URL's credibility.
71 |
72 | Returns:
73 | Dict with 'score' (0-100), 'factors', and 'level' (low/medium/high)
74 | """
75 | if not url:
76 | return {'score': 0, 'factors': ['No URL'], 'level': 'low'}
77 |
78 | score = 50 # Base score
79 | factors = []
80 |
81 | try:
82 | parsed = urlparse(url)
83 | domain = parsed.netloc.lower()
84 |
85 | # Check for trusted domains
86 | is_trusted = False
87 | for trusted in self.TRUSTED_DOMAINS:
88 | if trusted in domain:
89 | score += 30
90 | factors.append(f'Trusted domain: {trusted}')
91 | is_trusted = True
92 | break
93 |
94 | # Check for suspicious patterns
95 | is_suspicious = False
96 | for pattern in self.SUSPICIOUS_PATTERNS:
97 | if re.search(pattern, domain):
98 | score -= 20
99 | factors.append(f'Suspicious pattern: {pattern}')
100 | is_suspicious = True
101 | break
102 |
103 | # HTTPS bonus
104 | if parsed.scheme == 'https':
105 | score += 5
106 | factors.append('HTTPS enabled')
107 | else:
108 | score -= 10
109 | factors.append('No HTTPS')
110 |
111 | # Domain age indicators (heuristic based on domain structure)
112 | if not is_trusted and not is_suspicious:
113 | # Longer domains might be less credible (often spam)
114 | if len(domain.split('.')) > 3:
115 | score -= 5
116 | factors.append('Complex domain structure')
117 |
118 | # Academic paths
119 | if '/papers/' in parsed.path or '/research/' in parsed.path or '/publications/' in parsed.path:
120 | score += 10
121 | factors.append('Academic/research path')
122 |
123 | # Normalize score to 0-100
124 | score = max(0, min(100, score))
125 |
126 | # Determine level
127 | if score >= 70:
128 | level = 'high'
129 | elif score >= 40:
130 | level = 'medium'
131 | else:
132 | level = 'low'
133 |
134 | return {
135 | 'score': score,
136 | 'factors': factors if factors else ['Standard domain'],
137 | 'level': level,
138 | 'domain': domain
139 | }
140 |
141 | except Exception as e:
142 | logger.warning(f"Error scoring URL {url}: {e}")
143 | return {'score': 30, 'factors': ['Scoring error'], 'level': 'low'}
144 |
145 | def score_search_results(self, results: List) -> List[Dict]:
146 | """Score a list of search results."""
147 | scored = []
148 | for result in results:
149 | if hasattr(result, 'url'):
150 | url = result.url
151 | elif isinstance(result, dict):
152 | url = result.get('url', '')
153 | else:
154 | url = str(result)
155 |
156 | credibility = self.score_url(url)
157 | scored.append({
158 | 'result': result,
159 | 'credibility': credibility
160 | })
161 |
162 | # Sort by credibility score (highest first)
163 | scored.sort(key=lambda x: x['credibility']['score'], reverse=True)
164 | return scored
165 |
166 | def filter_by_credibility(self, results: List, min_score: int = 40) -> List:
167 | """Filter results by minimum credibility score."""
168 | scored = self.score_search_results(results)
169 | filtered = [
170 | item['result'] for item in scored
171 | if item['credibility']['score'] >= min_score
172 | ]
173 | logger.info(f"Filtered {len(results)} -> {len(filtered)} results (min_score={min_score})")
174 | return filtered
175 |
176 |
--------------------------------------------------------------------------------
/outputs/small language models_20251113_230645.txt:
--------------------------------------------------------------------------------
1 | small language models
2 | Deep Research Report
3 |
4 | Executive Summary
5 | This report provides a comprehensive analysis of small language models. The research was conducted across 7 sources and synthesized into 2 key sections.
6 |
7 | Research Objectives
8 | 1. To define small language models (SLMs), identify their key characteristics, and differentiate them from large language models (LLMs).
9 | 2. To analyze the primary advantages (e.g., cost-efficiency, speed, local deployment) and disadvantages (e.g., performance limitations, data requirements) of SLMs.
10 | 3. To explore current and emerging applications of SLMs across various industries and use cases, including edge computing and specialized tasks.
11 | 4. To investigate the technological advancements, optimization techniques (e.g., fine-tuning, quantization), and research trends driving the development and adoption of SLMs.
12 | 5. To assess the future outlook, potential societal impact, and strategic role of SLMs in the broader AI landscape.
13 |
14 | ---
15 |
16 | 1. Introduction to Small Language Models (SLMs)
17 |
18 | 1. Introduction to Small Language Models (SLMs)
19 |
20 | Language models are sophisticated computational frameworks designed to comprehend and generate human language, representing a cornerstone in the field of natural language processing (NLP) [1]. Over recent years, large language models (LLMs) have garnered significant attention due to their remarkable effectiveness and versatility across a myriad of domains. These models, characterized by their immense scale, vast knowledge bases, and deep reasoning capabilities, have demonstrated breakthroughs in applications ranging from complex table processing to advanced medical diagnostics, often leveraging cutting-edge generative techniques [2, 3, 4]. The success of LLMs in numerous domains has solidified their position as a leading technology in artificial intelligence [4].
21 |
22 | However, the rapid evolution of AI research and application has also brought forth the emergence of small language models (SLMs). SLMs are a class of AI models specifically engineered to process and generate human language, akin to LLMs, but with a critical emphasis on efficiency, specialized tasks, and a reduced computational footprint [5]. Unlike their larger counterparts, which are typically designed for broad, general-purpose applications and possess extensive knowledge, SLMs are often more constrained in their scope. This allows for significant optimization in terms of model size, the volume of training data required, and the operational resources necessary for deployment and inference [6].
23 |
24 | The distinction between LLMs and SLMs is increasingly pertinent as the practical deployment of language models expands across various industries and environments. While LLMs are widely recognized for their "vast knowledge and deep reasoning" capabilities, SLMs distinguish themselves through their efficiency and targeted specialization [7]. This fundamental difference highlights that while LLMs excel in handling broad, knowledge-intensive tasks, SLMs are strategically developed to address specific requirements with enhanced agility and frequently with substantially reduced infrastructure demands [7]. The growing interest in SLMs signifies a strategic shift towards more accessible, cost-effective, and deployable AI solutions, particularly advantageous for scenarios involving edge computing, resource-constrained environments, or highly specialized applications. This introductory section lays the groundwork for a comprehensive exploration of SLMs, delving into their architectural nuances, diverse applications, and their evolving role within the broader ecosystem of language AI.
25 |
26 | 2. Strategic Importance and Future Landscape of SLMs
27 |
28 | 2. Strategic Importance and Future Landscape of SLMs
29 |
30 | While Large Language Models (LLMs) have garnered significant attention for their extensive knowledge and deep reasoning capabilities across diverse applications, including table processing and medical advancements [4], [6], Small Language Models (SLMs) are emerging as strategically important due to their specialized nature and efficiency [1], [2], [3]. LLMs, characterized by their vast computational models and capacity to comprehend and generate human language, have demonstrated effectiveness in numerous domains, from general language understanding to complex memory mechanisms [5], [7]. However, their substantial resource requirements for training and deployment present challenges for certain applications and environments.
31 |
32 | SLMs are AI models specifically designed for processing and generating human language, much like LLMs, but with a focus on smaller scales and specialized tasks [2]. This specialization allows SLMs to offer distinct advantages. For instance, SLMs are often quicker to train and deploy, requiring fewer computational resources compared to their larger counterparts [1], [3]. This makes them particularly suitable for edge computing, mobile devices, and applications where latency and resource consumption are critical factors. Their smaller footprint also translates to reduced energy consumption, addressing growing concerns about the environmental impact of large-scale AI models. They excel in specific tasks where their targeted architecture can outperform a generalized LLM, offering a balance between performance and resource efficiency.
33 |
34 | The future landscape of SLMs is poised for significant growth, driven by the increasing demand for efficient, specialized, and accessible AI solutions. While LLMs excel in broad, general-purpose tasks, SLMs are well-suited for niche applications that require high performance within constrained environments. This includes tasks such as localized content moderation, personalized customer support, efficient data summarization on devices, and specialized translation services. The development of SLMs will likely focus on optimizing performance for specific tasks, potentially leading to highly accurate and reliable models for particular domains. The comparison between LLMs and SLMs highlights a complementary relationship rather than a purely competitive one, where each model type serves different strategic purposes within the evolving AI ecosystem [1], [2], [3]. As AI integration becomes more pervasive across various industries, the strategic importance of SLMs will continue to grow, offering scalable and sustainable solutions for a wider range of real-world applications.
35 |
36 | ---
37 |
38 | References
39 |
40 | 1. Large Language Model vs Small Language Model - ML Journey. (n.d.). Retrieved from https://mljourney.com/large-language-model-vs-small-language-model/
41 | 2. Large Language Model for Table Processing: A Survey. (n.d.). Retrieved from https://arxiv.org/html/2402.05121v3
42 | 3. A Survey on the Memory Mechanism of Large Language Model based. (n.d.). Retrieved from https://arxiv.org/html/2404.13501v1
43 | 4. Differences and Comparisons: Small LLMs vs Large Language Models. (n.d.). Retrieved from https://www.ema.co/additional-blogs/addition-blogs/small-llm-vs-large-language-models
44 | 5. Deconfusing ‘AI’ and ‘evolution’ - LessWrong 2.0 viewer. (n.d.). Retrieved from https://www.greaterwrong.com/posts/qvgEbZDcxwTSEBdwD/implicit-and-explicit-learning
45 | 6. Advances in Large Language Models for Medicine. (n.d.). Retrieved from https://arxiv.org/html/2509.18690v1
46 | 7. Fairness in Large Language Models: A Taxonomic Survey. (n.d.). Retrieved from https://arxiv.org/html/2404.01349v2
--------------------------------------------------------------------------------
/outputs/small language models_20251113_230645.md:
--------------------------------------------------------------------------------
1 | # small language models
2 | **Deep Research Report**
3 |
4 | ## Executive Summary
5 | This report provides a comprehensive analysis of small language models. The research was conducted across **7 sources** and synthesized into **2 key sections**.
6 |
7 | ## Research Objectives
8 | 1. To define small language models (SLMs), identify their key characteristics, and differentiate them from large language models (LLMs).
9 | 2. To analyze the primary advantages (e.g., cost-efficiency, speed, local deployment) and disadvantages (e.g., performance limitations, data requirements) of SLMs.
10 | 3. To explore current and emerging applications of SLMs across various industries and use cases, including edge computing and specialized tasks.
11 | 4. To investigate the technological advancements, optimization techniques (e.g., fine-tuning, quantization), and research trends driving the development and adoption of SLMs.
12 | 5. To assess the future outlook, potential societal impact, and strategic role of SLMs in the broader AI landscape.
13 |
14 | ---
15 |
16 | ## 1. Introduction to Small Language Models (SLMs)
17 |
18 | ### 1. Introduction to Small Language Models (SLMs)
19 |
20 | Language models are sophisticated computational frameworks designed to comprehend and generate human language, representing a cornerstone in the field of natural language processing (NLP) [1]. Over recent years, large language models (LLMs) have garnered significant attention due to their remarkable effectiveness and versatility across a myriad of domains. These models, characterized by their immense scale, vast knowledge bases, and deep reasoning capabilities, have demonstrated breakthroughs in applications ranging from complex table processing to advanced medical diagnostics, often leveraging cutting-edge generative techniques [2, 3, 4]. The success of LLMs in numerous domains has solidified their position as a leading technology in artificial intelligence [4].
21 |
22 | However, the rapid evolution of AI research and application has also brought forth the emergence of small language models (SLMs). SLMs are a class of AI models specifically engineered to process and generate human language, akin to LLMs, but with a critical emphasis on efficiency, specialized tasks, and a reduced computational footprint [5]. Unlike their larger counterparts, which are typically designed for broad, general-purpose applications and possess extensive knowledge, SLMs are often more constrained in their scope. This allows for significant optimization in terms of model size, the volume of training data required, and the operational resources necessary for deployment and inference [6].
23 |
24 | The distinction between LLMs and SLMs is increasingly pertinent as the practical deployment of language models expands across various industries and environments. While LLMs are widely recognized for their "vast knowledge and deep reasoning" capabilities, SLMs distinguish themselves through their efficiency and targeted specialization [7]. This fundamental difference highlights that while LLMs excel in handling broad, knowledge-intensive tasks, SLMs are strategically developed to address specific requirements with enhanced agility and frequently with substantially reduced infrastructure demands [7]. The growing interest in SLMs signifies a strategic shift towards more accessible, cost-effective, and deployable AI solutions, particularly advantageous for scenarios involving edge computing, resource-constrained environments, or highly specialized applications. This introductory section lays the groundwork for a comprehensive exploration of SLMs, delving into their architectural nuances, diverse applications, and their evolving role within the broader ecosystem of language AI.
25 |
26 | ## 2. Strategic Importance and Future Landscape of SLMs
27 |
28 | ### 2. Strategic Importance and Future Landscape of SLMs
29 |
30 | While Large Language Models (LLMs) have garnered significant attention for their extensive knowledge and deep reasoning capabilities across diverse applications, including table processing and medical advancements [4], [6], Small Language Models (SLMs) are emerging as strategically important due to their specialized nature and efficiency [1], [2], [3]. LLMs, characterized by their vast computational models and capacity to comprehend and generate human language, have demonstrated effectiveness in numerous domains, from general language understanding to complex memory mechanisms [5], [7]. However, their substantial resource requirements for training and deployment present challenges for certain applications and environments.
31 |
32 | SLMs are AI models specifically designed for processing and generating human language, much like LLMs, but with a focus on smaller scales and specialized tasks [2]. This specialization allows SLMs to offer distinct advantages. For instance, SLMs are often quicker to train and deploy, requiring fewer computational resources compared to their larger counterparts [1], [3]. This makes them particularly suitable for edge computing, mobile devices, and applications where latency and resource consumption are critical factors. Their smaller footprint also translates to reduced energy consumption, addressing growing concerns about the environmental impact of large-scale AI models. They excel in specific tasks where their targeted architecture can outperform a generalized LLM, offering a balance between performance and resource efficiency.
33 |
34 | The future landscape of SLMs is poised for significant growth, driven by the increasing demand for efficient, specialized, and accessible AI solutions. While LLMs excel in broad, general-purpose tasks, SLMs are well-suited for niche applications that require high performance within constrained environments. This includes tasks such as localized content moderation, personalized customer support, efficient data summarization on devices, and specialized translation services. The development of SLMs will likely focus on optimizing performance for specific tasks, potentially leading to highly accurate and reliable models for particular domains. The comparison between LLMs and SLMs highlights a complementary relationship rather than a purely competitive one, where each model type serves different strategic purposes within the evolving AI ecosystem [1], [2], [3]. As AI integration becomes more pervasive across various industries, the strategic importance of SLMs will continue to grow, offering scalable and sustainable solutions for a wider range of real-world applications.
35 |
36 | ---
37 |
38 | ## References
39 |
40 | 1. Large Language Model vs Small Language Model - ML Journey. (n.d.). Retrieved from https://mljourney.com/large-language-model-vs-small-language-model/
41 | 2. Large Language Model for Table Processing: A Survey. (n.d.). Retrieved from https://arxiv.org/html/2402.05121v3
42 | 3. A Survey on the Memory Mechanism of Large Language Model based. (n.d.). Retrieved from https://arxiv.org/html/2404.13501v1
43 | 4. Differences and Comparisons: Small LLMs vs Large Language Models. (n.d.). Retrieved from https://www.ema.co/additional-blogs/addition-blogs/small-llm-vs-large-language-models
44 | 5. Deconfusing ‘AI’ and ‘evolution’ - LessWrong 2.0 viewer. (n.d.). Retrieved from https://www.greaterwrong.com/posts/qvgEbZDcxwTSEBdwD/implicit-and-explicit-learning
45 | 6. Advances in Large Language Models for Medicine. (n.d.). Retrieved from https://arxiv.org/html/2509.18690v1
46 | 7. Fairness in Large Language Models: A Taxonomic Survey. (n.d.). Retrieved from https://arxiv.org/html/2404.01349v2
--------------------------------------------------------------------------------
/src/utils/web_utils.py:
--------------------------------------------------------------------------------
1 | """Web search and content extraction utilities."""
2 |
3 | import asyncio
4 | import re
5 | import time
6 | from typing import List, Optional
7 | from ddgs import DDGS
8 | import requests
9 | from bs4 import BeautifulSoup
10 | from urllib.parse import urlparse
11 | import logging
12 |
13 | from src.state import SearchResult
14 |
15 | logging.basicConfig(level=logging.INFO)
16 | logger = logging.getLogger(__name__)
17 |
18 |
19 | def is_valid_url(url: str) -> bool:
20 | """Check if a URL is valid.
21 |
22 | Args:
23 | url: URL string to validate
24 |
25 | Returns:
26 | bool: True if URL is valid, False otherwise
27 | """
28 | try:
29 | result = urlparse(url)
30 | return all([result.scheme, result.netloc])
31 | except:
32 | return False
33 |
34 |
35 | class WebSearchTool:
36 | """DuckDuckGo web search tool with rate limiting."""
37 |
38 | def __init__(self, max_results: int = 5):
39 | self.max_results = max_results
40 | self.last_search_time = 0
41 | self.min_delay = 2.0 # Minimum 2 seconds between searches
42 |
43 | def search(self, query: str) -> List[SearchResult]:
44 | """Perform a web search using DuckDuckGo with rate limiting.
45 |
46 | Args:
47 | query: Search query string
48 |
49 | Returns:
50 | List[SearchResult]: List of search results
51 | """
52 | try:
53 | # Rate limiting: wait if needed
54 | elapsed = time.time() - self.last_search_time
55 | if elapsed < self.min_delay:
56 | wait_time = self.min_delay - elapsed
57 | logger.info(f"Rate limiting: waiting {wait_time:.1f}s")
58 | time.sleep(wait_time)
59 |
60 | logger.info(f"Searching for: {query}")
61 | results = []
62 |
63 | # Use DDGS with retry logic for rate limits
64 | max_retries = 3
65 | for attempt in range(max_retries):
66 | try:
67 | ddgs = DDGS()
68 | search_results = list(ddgs.text(
69 | query,
70 | max_results=self.max_results
71 | ))
72 |
73 | for result in search_results:
74 | results.append(SearchResult(
75 | query=query,
76 | title=result.get("title", ""),
77 | url=result.get("href", ""),
78 | snippet=result.get("body", "")
79 | ))
80 |
81 | self.last_search_time = time.time()
82 | logger.info(f"Found {len(results)} results for: {query}")
83 | return results
84 |
85 | except Exception as retry_error:
86 | error_str = str(retry_error).lower()
87 | if ("ratelimit" in error_str or "202" in error_str) and attempt < max_retries - 1:
88 | wait_time = (attempt + 1) * 5 # 5, 10, 15 seconds
89 | logger.warning(f"Rate limit hit, waiting {wait_time}s before retry {attempt + 2}/{max_retries}")
90 | time.sleep(wait_time)
91 | else:
92 | raise
93 |
94 | return results
95 |
96 | except Exception as e:
97 | logger.error(f"Search error for '{query}': {str(e)}")
98 | self.last_search_time = time.time()
99 | return []
100 |
101 | async def search_async(self, query: str) -> List[SearchResult]:
102 | """Async version of search.
103 |
104 | Args:
105 | query: Search query string
106 |
107 | Returns:
108 | List[SearchResult]: List of search results
109 | """
110 | return await asyncio.to_thread(self.search, query)
111 |
112 |
113 | class ContentExtractor:
114 | """Extract and clean content from web pages."""
115 |
116 | def __init__(self, timeout: int = 10):
117 | self.timeout = timeout
118 | self.headers = {
119 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
120 | }
121 |
122 | def extract_content(self, url: str) -> Optional[str]:
123 | """Extract main content from a URL.
124 |
125 | Args:
126 | url: URL to extract content from
127 |
128 | Returns:
129 | Optional[str]: Extracted content or None if extraction fails
130 | """
131 | try:
132 | logger.info(f"Extracting content from: {url}")
133 |
134 | response = requests.get(
135 | url,
136 | headers=self.headers,
137 | timeout=self.timeout,
138 | allow_redirects=True
139 | )
140 | response.raise_for_status()
141 |
142 | soup = BeautifulSoup(response.content, 'html.parser')
143 |
144 | # Remove unwanted elements
145 | for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
146 | element.decompose()
147 |
148 | # Try to find main content
149 | main_content = None
150 | for selector in ['article', 'main', '[role="main"]', '.content', '#content']:
151 | main_content = soup.select_one(selector)
152 | if main_content:
153 | break
154 |
155 | if not main_content:
156 | main_content = soup.body
157 |
158 | if main_content:
159 | text = main_content.get_text(separator='\n', strip=True)
160 | # Clean up excessive whitespace
161 | text = re.sub(r'\n\s*\n', '\n\n', text)
162 | text = re.sub(r' +', ' ', text)
163 |
164 | # Limit to reasonable length (first 5000 chars)
165 | text = text[:5000] if len(text) > 5000 else text
166 |
167 | logger.info(f"Extracted {len(text)} characters from {url}")
168 | return text
169 |
170 | return None
171 |
172 | except Exception as e:
173 | logger.warning(f"Failed to extract content from {url}: {str(e)}")
174 | return None
175 |
176 | async def extract_content_async(self, url: str) -> Optional[str]:
177 | """Async version of content extraction.
178 |
179 | Args:
180 | url: URL to extract content from
181 |
182 | Returns:
183 | Optional[str]: Extracted content or None if extraction fails
184 | """
185 | return await asyncio.to_thread(self.extract_content, url)
186 |
187 | async def enhance_search_results_async(
188 | self,
189 | results: List[SearchResult],
190 | max_concurrent: int = 3
191 | ) -> List[SearchResult]:
192 | """Enhance search results with full content extraction (async).
193 |
194 | Args:
195 | results: List of search results to enhance
196 | max_concurrent: Maximum concurrent extraction tasks
197 |
198 | Returns:
199 | List[SearchResult]: Enhanced search results with content
200 | """
201 | semaphore = asyncio.Semaphore(max_concurrent)
202 |
203 | async def enhance_one(result: SearchResult) -> SearchResult:
204 | async with semaphore:
205 | if not result.content:
206 | try:
207 | content = await self.extract_content_async(result.url)
208 | if content:
209 | result.content = content
210 | except Exception as e:
211 | logger.warning(f"Failed to enhance {result.url}: {str(e)}")
212 | return result
213 |
214 | try:
215 | tasks = [enhance_one(result) for result in results]
216 | return await asyncio.gather(*tasks)
217 | except Exception as e:
218 | logger.error(f"Error enhancing results: {str(e)}")
219 | return results
220 |
221 |
--------------------------------------------------------------------------------
/src/callbacks.py:
--------------------------------------------------------------------------------
1 | """Callback system for real-time progress updates in the research workflow."""
2 |
3 | import asyncio
4 | from typing import Callable, Optional, Dict, Any, List
5 | from enum import Enum
6 | from dataclasses import dataclass, field
7 | from datetime import datetime
8 | import logging
9 |
10 | logger = logging.getLogger(__name__)
11 |
12 |
13 | class ResearchStage(Enum):
14 | """Research workflow stages."""
15 | INITIALIZING = "initializing"
16 | PLANNING = "planning"
17 | SEARCHING = "searching"
18 | EXTRACTING = "extracting"
19 | SYNTHESIZING = "synthesizing"
20 | WRITING = "writing"
21 | COMPLETE = "complete"
22 | ERROR = "error"
23 |
24 |
25 | @dataclass
26 | class ProgressUpdate:
27 | """A progress update event."""
28 | stage: ResearchStage
29 | message: str
30 | details: Optional[str] = None
31 | progress_pct: Optional[float] = None # 0-100
32 | metadata: Dict[str, Any] = field(default_factory=dict)
33 | timestamp: datetime = field(default_factory=datetime.now)
34 |
35 |
36 | class ProgressCallback:
37 | """Manages progress callbacks for research workflow."""
38 |
39 | _instance: Optional['ProgressCallback'] = None
40 | _callbacks: List[Callable[[ProgressUpdate], None]] = []
41 | _async_callbacks: List[Callable[[ProgressUpdate], Any]] = []
42 | _updates: List[ProgressUpdate] = []
43 | _current_stage: ResearchStage = ResearchStage.INITIALIZING
44 |
45 | def __new__(cls):
46 | """Singleton pattern to ensure one global callback manager."""
47 | if cls._instance is None:
48 | cls._instance = super().__new__(cls)
49 | cls._instance._callbacks = []
50 | cls._instance._async_callbacks = []
51 | cls._instance._updates = []
52 | cls._instance._current_stage = ResearchStage.INITIALIZING
53 | return cls._instance
54 |
55 | def reset(self):
56 | """Reset state for a new research session."""
57 | self._updates = []
58 | self._current_stage = ResearchStage.INITIALIZING
59 |
60 | def register(self, callback: Callable[[ProgressUpdate], None]):
61 | """Register a synchronous callback function."""
62 | if callback not in self._callbacks:
63 | self._callbacks.append(callback)
64 |
65 | def register_async(self, callback: Callable[[ProgressUpdate], Any]):
66 | """Register an async callback function."""
67 | if callback not in self._async_callbacks:
68 | self._async_callbacks.append(callback)
69 |
70 | def unregister(self, callback: Callable):
71 | """Unregister a callback function."""
72 | if callback in self._callbacks:
73 | self._callbacks.remove(callback)
74 | if callback in self._async_callbacks:
75 | self._async_callbacks.remove(callback)
76 |
77 | def clear_callbacks(self):
78 | """Clear all registered callbacks."""
79 | self._callbacks = []
80 | self._async_callbacks = []
81 |
82 | async def emit(self, update: ProgressUpdate):
83 | """Emit a progress update to all registered callbacks."""
84 | self._current_stage = update.stage
85 | self._updates.append(update)
86 |
87 | # Log the update
88 | logger.info(f"[{update.stage.value}] {update.message}" +
89 | (f" - {update.details}" if update.details else ""))
90 |
91 | # Call sync callbacks
92 | for callback in self._callbacks:
93 | try:
94 | callback(update)
95 | except Exception as e:
96 | logger.error(f"Error in sync callback: {e}")
97 |
98 | # Call async callbacks
99 | for callback in self._async_callbacks:
100 | try:
101 | await callback(update)
102 | except Exception as e:
103 | logger.error(f"Error in async callback: {e}")
104 |
105 | @property
106 | def current_stage(self) -> ResearchStage:
107 | return self._current_stage
108 |
109 | @property
110 | def updates(self) -> List[ProgressUpdate]:
111 | return self._updates.copy()
112 |
113 |
114 | # Global progress callback instance
115 | progress_callback = ProgressCallback()
116 |
117 |
118 | # Convenience functions for emitting progress
119 | async def emit_progress(
120 | stage: ResearchStage,
121 | message: str,
122 | details: Optional[str] = None,
123 | progress_pct: Optional[float] = None,
124 | **metadata
125 | ):
126 | """Emit a progress update."""
127 | update = ProgressUpdate(
128 | stage=stage,
129 | message=message,
130 | details=details,
131 | progress_pct=progress_pct,
132 | metadata=metadata
133 | )
134 | await progress_callback.emit(update)
135 |
136 |
137 | async def emit_planning_start(topic: str):
138 | """Emit planning stage start."""
139 | await emit_progress(
140 | ResearchStage.PLANNING,
141 | "Creating research plan",
142 | f"Topic: {topic}",
143 | progress_pct=5
144 | )
145 |
146 |
147 | async def emit_planning_complete(num_queries: int, num_sections: int):
148 | """Emit planning stage completion."""
149 | await emit_progress(
150 | ResearchStage.PLANNING,
151 | "Research plan created",
152 | f"{num_queries} search queries, {num_sections} report sections planned",
153 | progress_pct=15
154 | )
155 |
156 |
157 | async def emit_search_start(query: str, query_num: int, total_queries: int):
158 | """Emit search start."""
159 | base_progress = 15
160 | search_progress_range = 35 # 15% to 50%
161 | progress = base_progress + (query_num / total_queries) * search_progress_range
162 |
163 | await emit_progress(
164 | ResearchStage.SEARCHING,
165 | f"Searching ({query_num}/{total_queries})",
166 | f"Query: {query[:60]}..." if len(query) > 60 else f"Query: {query}",
167 | progress_pct=progress
168 | )
169 |
170 |
171 | async def emit_search_results(num_results: int, query_num: int, total_queries: int):
172 | """Emit search results found."""
173 | base_progress = 15
174 | search_progress_range = 35
175 | progress = base_progress + ((query_num + 0.5) / total_queries) * search_progress_range
176 |
177 | await emit_progress(
178 | ResearchStage.SEARCHING,
179 | f"Found {num_results} results",
180 | f"Query {query_num}/{total_queries} complete",
181 | progress_pct=progress
182 | )
183 |
184 |
185 | async def emit_extraction_start(url: str, current: int, total: int):
186 | """Emit content extraction start."""
187 | base_progress = 50
188 | extract_progress_range = 15 # 50% to 65%
189 | progress = base_progress + (current / total) * extract_progress_range
190 |
191 | # Extract domain from URL for cleaner display
192 | try:
193 | from urllib.parse import urlparse
194 | domain = urlparse(url).netloc
195 | except:
196 | domain = url[:40]
197 |
198 | await emit_progress(
199 | ResearchStage.EXTRACTING,
200 | f"Extracting content ({current}/{total})",
201 | f"Source: {domain}",
202 | progress_pct=progress
203 | )
204 |
205 |
206 | async def emit_extraction_complete(num_extracted: int, total_chars: int):
207 | """Emit extraction completion."""
208 | await emit_progress(
209 | ResearchStage.EXTRACTING,
210 | f"Content extraction complete",
211 | f"{num_extracted} pages, {total_chars:,} characters extracted",
212 | progress_pct=65
213 | )
214 |
215 |
216 | async def emit_synthesis_start(num_sources: int):
217 | """Emit synthesis stage start."""
218 | await emit_progress(
219 | ResearchStage.SYNTHESIZING,
220 | "Analyzing sources",
221 | f"Synthesizing {num_sources} sources into key findings",
222 | progress_pct=68
223 | )
224 |
225 |
226 | async def emit_synthesis_progress(message: str):
227 | """Emit synthesis progress."""
228 | await emit_progress(
229 | ResearchStage.SYNTHESIZING,
230 | message,
231 | progress_pct=72
232 | )
233 |
234 |
235 | async def emit_synthesis_complete(num_findings: int):
236 | """Emit synthesis completion."""
237 | await emit_progress(
238 | ResearchStage.SYNTHESIZING,
239 | "Synthesis complete",
240 | f"Extracted {num_findings} key findings",
241 | progress_pct=78
242 | )
243 |
244 |
245 | async def emit_writing_start(num_sections: int):
246 | """Emit writing stage start."""
247 | await emit_progress(
248 | ResearchStage.WRITING,
249 | "Writing report",
250 | f"Generating {num_sections} sections",
251 | progress_pct=80
252 | )
253 |
254 |
255 | async def emit_writing_section(section_title: str, section_num: int, total_sections: int):
256 | """Emit section writing progress."""
257 | base_progress = 80
258 | writing_progress_range = 18 # 80% to 98%
259 | progress = base_progress + (section_num / total_sections) * writing_progress_range
260 |
261 | await emit_progress(
262 | ResearchStage.WRITING,
263 | f"Writing section ({section_num}/{total_sections})",
264 | f"Section: {section_title[:50]}..." if len(section_title) > 50 else f"Section: {section_title}",
265 | progress_pct=progress
266 | )
267 |
268 |
269 | async def emit_writing_complete(report_length: int):
270 | """Emit writing completion."""
271 | await emit_progress(
272 | ResearchStage.WRITING,
273 | "Report writing complete",
274 | f"Generated {report_length:,} character report",
275 | progress_pct=98
276 | )
277 |
278 |
279 | async def emit_complete(topic: str, sources: int, findings: int):
280 | """Emit research completion."""
281 | await emit_progress(
282 | ResearchStage.COMPLETE,
283 | "Research complete!",
284 | f"{sources} sources analyzed, {findings} insights extracted",
285 | progress_pct=100
286 | )
287 |
288 |
289 | async def emit_error(error_message: str):
290 | """Emit error."""
291 | await emit_progress(
292 | ResearchStage.ERROR,
293 | "Error occurred",
294 | error_message,
295 | progress_pct=None
296 | )
297 |
298 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | """Interactive Chainlit interface for Deep Research Agent with real-time progress updates."""
2 |
3 | import asyncio
4 | import chainlit as cl
5 | from pathlib import Path
6 | from datetime import datetime
7 |
8 | from src.config import config
9 | from src.state import ResearchState
10 | from src.graph import create_research_graph
11 | from src.utils.exports import ReportExporter
12 | from src.utils.history import ResearchHistory
13 | from src.callbacks import (
14 | progress_callback,
15 | ProgressUpdate,
16 | ResearchStage,
17 | emit_complete
18 | )
19 |
20 |
21 | # Stage markers for visual display
22 | STAGE_ICONS = {
23 | ResearchStage.INITIALIZING: "[...]",
24 | ResearchStage.PLANNING: "[1/5]",
25 | ResearchStage.SEARCHING: "[2/5]",
26 | ResearchStage.EXTRACTING: "[3/5]",
27 | ResearchStage.SYNTHESIZING: "[4/5]",
28 | ResearchStage.WRITING: "[5/5]",
29 | ResearchStage.COMPLETE: "[OK]",
30 | ResearchStage.ERROR: "[ERR]"
31 | }
32 |
33 | STAGE_NAMES = {
34 | ResearchStage.INITIALIZING: "Initializing",
35 | ResearchStage.PLANNING: "Planning Research",
36 | ResearchStage.SEARCHING: "Searching Web",
37 | ResearchStage.EXTRACTING: "Extracting Content",
38 | ResearchStage.SYNTHESIZING: "Synthesizing Findings",
39 | ResearchStage.WRITING: "Writing Report",
40 | ResearchStage.COMPLETE: "Complete",
41 | ResearchStage.ERROR: "Error"
42 | }
43 |
44 |
45 | class ProgressDisplay:
46 | """Manages the progress display for a research session."""
47 |
48 | def __init__(self):
49 | self.message: cl.Message = None
50 | self.updates: list[ProgressUpdate] = []
51 | self.current_stage: ResearchStage = ResearchStage.INITIALIZING
52 | self.start_time: datetime = None
53 |
54 | async def initialize(self, topic: str):
55 | """Initialize the progress display."""
56 | self.start_time = datetime.now()
57 | self.updates = []
58 | self.current_stage = ResearchStage.INITIALIZING
59 |
60 | content = self._render_progress(topic)
61 | self.message = cl.Message(content=content)
62 | await self.message.send()
63 |
64 | async def update(self, progress_update: ProgressUpdate):
65 | """Update the progress display with a new update."""
66 | self.updates.append(progress_update)
67 | self.current_stage = progress_update.stage
68 |
69 | if self.message:
70 | self.message.content = self._render_progress()
71 | await self.message.update()
72 |
73 | def _render_progress(self, topic: str = None) -> str:
74 | """Render the progress display as markdown."""
75 | # Calculate elapsed time
76 | elapsed = ""
77 | if self.start_time:
78 | delta = datetime.now() - self.start_time
79 | elapsed = f" ({delta.seconds}s)"
80 |
81 | # Build progress bar
82 | stages_order = [
83 | ResearchStage.PLANNING,
84 | ResearchStage.SEARCHING,
85 | ResearchStage.EXTRACTING,
86 | ResearchStage.SYNTHESIZING,
87 | ResearchStage.WRITING,
88 | ResearchStage.COMPLETE
89 | ]
90 |
91 | # Get current progress percentage
92 | current_pct = 0
93 | if self.updates:
94 | for update in reversed(self.updates):
95 | if update.progress_pct is not None:
96 | current_pct = update.progress_pct
97 | break
98 |
99 | # Build visual progress bar
100 | bar_length = 20
101 | filled = int(bar_length * current_pct / 100)
102 | bar = "#" * filled + "-" * (bar_length - filled)
103 |
104 | content = f"""## Research Progress{elapsed}
105 |
106 | **Progress:** [{bar}] {current_pct:.0f}%
107 |
108 | ---
109 |
110 | """
111 |
112 | # Show stage status
113 | current_stage_idx = -1
114 | if self.current_stage in stages_order:
115 | current_stage_idx = stages_order.index(self.current_stage)
116 |
117 | for idx, stage in enumerate(stages_order):
118 | icon = STAGE_ICONS.get(stage, "[...]")
119 | name = STAGE_NAMES.get(stage, stage.value)
120 |
121 | if idx < current_stage_idx:
122 | # Completed stage
123 | content += f"[DONE] ~~{name}~~\n"
124 | elif idx == current_stage_idx:
125 | # Current stage
126 | content += f"**{icon} {name}** <- *Current*\n"
127 | else:
128 | # Pending stage
129 | content += f"[ ] {name}\n"
130 |
131 | content += "\n---\n\n"
132 |
133 | # Show recent activity log (last 8 updates)
134 | content += "### Activity Log\n\n"
135 |
136 | if self.updates:
137 | recent_updates = self.updates[-8:]
138 | for update in reversed(recent_updates):
139 | icon = STAGE_ICONS.get(update.stage, "*")
140 | time_str = update.timestamp.strftime("%H:%M:%S")
141 |
142 | msg = f"`{time_str}` {icon} **{update.message}**"
143 | if update.details:
144 | msg += f"\n _{update.details}_"
145 | content += msg + "\n\n"
146 | else:
147 | content += "_Starting research..._\n"
148 |
149 | return content
150 |
151 |
152 | async def run_research_with_updates(topic: str, progress_display: ProgressDisplay):
153 | """Run research with real-time updates to the UI."""
154 |
155 | # Reset callback state
156 | progress_callback.reset()
157 |
158 | # Register async callback for UI updates
159 | async def on_progress(update: ProgressUpdate):
160 | await progress_display.update(update)
161 |
162 | progress_callback.register_async(on_progress)
163 |
164 | try:
165 | # Initialize state
166 | initial_state = ResearchState(research_topic=topic)
167 |
168 | # Create graph
169 | graph = create_research_graph()
170 |
171 | # Execute workflow and get final state
172 | final_state = await graph.ainvoke(initial_state)
173 |
174 | # Emit completion
175 | search_results = final_state.get('search_results', [])
176 | key_findings = final_state.get('key_findings', [])
177 | await emit_complete(topic, len(search_results), len(key_findings))
178 |
179 | return final_state
180 |
181 | finally:
182 | # Cleanup callback
183 | progress_callback.unregister(on_progress)
184 |
185 |
186 | @cl.on_chat_start
187 | async def start():
188 | """Initialize the chat session."""
189 | await cl.Message(
190 | content="""# Deep Research Agent
191 |
192 | Welcome! I'm your AI research assistant powered by **LangGraph** and **Gemini**.
193 |
194 | ## How it works:
195 | 1. **Tell me** what you want to research
196 | 2. I'll **search** the web for authoritative sources
197 | 3. **Synthesize** findings using AI
198 | 4. **Generate** a comprehensive report
199 |
200 | ## Features:
201 | - Real-time web search with DuckDuckGo
202 | - Source credibility scoring
203 | - Multiple export formats (MD, HTML, TXT)
204 | - Live progress tracking
205 |
206 | ---
207 |
208 | **What would you like to research today?**
209 |
210 | _Example topics:_
211 | - "Future of quantum computing in 2025"
212 | - "How does WebSocket streaming work?"
213 | - "Best practices for microservices architecture"
214 | """,
215 | author="Research Agent"
216 | ).send()
217 |
218 |
219 | @cl.on_message
220 | async def main(message: cl.Message):
221 | """Handle user messages."""
222 |
223 | topic = message.content.strip()
224 |
225 | if not topic:
226 | await cl.Message(
227 | content="WARNING: Please provide a research topic.",
228 | author="System"
229 | ).send()
230 | return
231 |
232 | # Validate config
233 | try:
234 | config.validate_config()
235 | except ValueError as e:
236 | await cl.Message(
237 | content=f"**Configuration Error:** {str(e)}\n\n"
238 | "Please set your API key in the `.env` file.",
239 | author="System"
240 | ).send()
241 | return
242 |
243 | # Show starting message
244 | await cl.Message(
245 | content=f"""## Starting Research
246 |
247 | **Topic:** _{topic}_
248 |
249 | **Configuration:**
250 | - Model: `{config.model_name}`
251 | - Max Queries: `{config.max_search_queries}`
252 | - Max Sections: `{config.max_report_sections}`
253 |
254 | _Research will begin shortly..._
255 | """,
256 | author="Research Agent"
257 | ).send()
258 |
259 | # Initialize progress display
260 | progress_display = ProgressDisplay()
261 | await progress_display.initialize(topic)
262 |
263 | try:
264 | # Run research with updates
265 | final_state = await run_research_with_updates(topic, progress_display)
266 |
267 | # Check for errors
268 | if final_state.get("error"):
269 | await cl.Message(
270 | content=f"## Research Failed\n\n{final_state.get('error')}",
271 | author="System"
272 | ).send()
273 | return
274 |
275 | # Display detailed summary with metrics
276 | search_results = final_state.get('search_results', [])
277 | key_findings = final_state.get('key_findings', [])
278 | report_sections = final_state.get('report_sections', [])
279 | credibility_scores = final_state.get('credibility_scores', [])
280 |
281 | # Count unique sources
282 | unique_sources = set()
283 | for result in search_results:
284 | if hasattr(result, 'url') and result.url:
285 | unique_sources.add(result.url)
286 |
287 | # Count high-credibility sources
288 | high_cred_count = sum(1 for score in credibility_scores if score.get('level') == 'high')
289 | medium_cred_count = sum(1 for score in credibility_scores if score.get('level') == 'medium')
290 |
291 | # Get LLM tracking info
292 | llm_calls = final_state.get('llm_calls', 0)
293 | total_input_tokens = final_state.get('total_input_tokens', 0)
294 | total_output_tokens = final_state.get('total_output_tokens', 0)
295 | total_tokens = total_input_tokens + total_output_tokens
296 |
297 | # Calculate elapsed time
298 | elapsed_seconds = 0
299 | if progress_display.start_time:
300 | elapsed_seconds = (datetime.now() - progress_display.start_time).seconds
301 |
302 | summary_content = f"""## Research Complete!
303 |
304 | ### Data Collected
305 | | Metric | Value |
306 | |--------|-------|
307 | | Unique Sources | **{len(unique_sources)}** |
308 | | High Credibility | **{high_cred_count}** |
309 | | Medium Credibility | **{medium_cred_count}** |
310 | | Key Insights | **{len(key_findings)}** |
311 | | Report Sections | **{len(report_sections)}** |
312 |
313 | ### Performance
314 | | Metric | Value |
315 | |--------|-------|
316 | | Total Time | **{elapsed_seconds}s** |
317 | | LLM Calls | **{llm_calls}** |
318 | | Input Tokens | **{total_input_tokens:,}** |
319 | | Output Tokens | **{total_output_tokens:,}** |
320 | | Total Tokens | **{total_tokens:,}** |
321 | """
322 |
323 | await cl.Message(
324 | content=summary_content,
325 | author="Research Agent"
326 | ).send()
327 |
328 | # Save and display report
329 | if final_state.get("final_report"):
330 | report = final_state["final_report"]
331 |
332 | # Save to file
333 | output_dir = Path("outputs")
334 | output_dir.mkdir(exist_ok=True)
335 |
336 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
337 | safe_topic = "".join(c if c.isalnum() or c in (' ', '-', '_') else '_' for c in topic)
338 | safe_topic = safe_topic[:30].strip()
339 | filename = f"{safe_topic}_{timestamp}.md"
340 | output_file = output_dir / filename
341 | output_file.write_text(report, encoding='utf-8')
342 |
343 | # Add to history
344 | history = ResearchHistory()
345 | history.add_research(
346 | topic=topic,
347 | output_file=output_file,
348 | metadata={
349 | 'sources': len(unique_sources),
350 | 'sections': len(report_sections),
351 | 'findings': len(key_findings),
352 | 'elapsed_seconds': elapsed_seconds,
353 | 'total_tokens': total_tokens
354 | }
355 | )
356 |
357 | report_header = f"""## Final Report
358 |
359 | **Report Statistics:**
360 | - Length: **{len(report):,}** characters
361 | - Saved to: `{output_file}`
362 |
363 | ---
364 |
365 | {report}"""
366 |
367 | await cl.Message(
368 | content=report_header,
369 | author="Research Agent"
370 | ).send()
371 |
372 | # Export to multiple formats
373 | exporter = ReportExporter()
374 | base_path = output_file.with_suffix('')
375 |
376 | # Export HTML
377 | html_file = exporter.export(report, base_path, format='html')
378 |
379 | # Export TXT
380 | txt_file = exporter.export(report, base_path, format='txt')
381 |
382 | # Offer downloads
383 | elements = [
384 | cl.File(
385 | name=filename,
386 | path=str(output_file),
387 | display="inline"
388 | ),
389 | cl.File(
390 | name=html_file.name,
391 | path=str(html_file),
392 | display="inline"
393 | ),
394 | cl.File(
395 | name=txt_file.name,
396 | path=str(txt_file),
397 | display="inline"
398 | )
399 | ]
400 |
401 | await cl.Message(
402 | content=f"""## Download Report
403 |
404 | Download your report in multiple formats:
405 |
406 | | Format | File |
407 | |--------|------|
408 | | Markdown | `{filename}` |
409 | | HTML | `{html_file.name}` |
410 | | Plain Text | `{txt_file.name}` |
411 | """,
412 | elements=elements,
413 | author="Research Agent"
414 | ).send()
415 |
416 | # Ask for next research with suggestions
417 | await cl.Message(
418 | content="""---
419 |
420 | ## Ready for Another Research?
421 |
422 | Type your next research topic below, or try one of these:
423 |
424 | - *"Future trends in [your industry]"*
425 | - *"Comparative analysis of [topic A] vs [topic B]"*
426 | - *"Best practices for [specific challenge]"*
427 | - *"Impact of [technology/trend] on [domain]"*
428 |
429 | **What would you like to research next?**""",
430 | author="Research Agent"
431 | ).send()
432 | else:
433 | await cl.Message(
434 | content="WARNING: No report was generated. Please try again.",
435 | author="System"
436 | ).send()
437 |
438 | except Exception as e:
439 | import traceback
440 | error_details = traceback.format_exc()
441 | await cl.Message(
442 | content=f"""## Unexpected Error
443 |
444 | **Error:** {str(e)}
445 |
446 |
447 | Technical Details
448 |
449 | ```
450 | {error_details}
451 | ```
452 |
453 |
454 |
455 | Please check the logs and try again.
456 | """,
457 | author="System"
458 | ).send()
459 |
460 |
461 | if __name__ == "__main__":
462 | from chainlit.cli import run_chainlit
463 | run_chainlit(__file__)
464 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Research Agent
2 |
3 | [](https://www.python.org/downloads/)
4 | [](https://opensource.org/licenses/MIT)
5 | [](https://github.com/langchain-ai/langgraph)
6 |
7 | A production-ready multi-agent autonomous research system built with LangGraph and LangChain. Four specialized agents work together to conduct comprehensive research on any topic and generate detailed, citation-backed reports with credibility scoring and quality metrics. Supports both local models (Ollama) and cloud APIs (Gemini).
8 |
9 | **Actively seeking opportunities as an ML Engineer II / Data Scientist II / AI Engineer II**
10 |
11 | ## Demo
12 | https://github.com/user-attachments/assets/df8404c6-7423-4a49-864a-bd4d59885c1b
13 |
14 | *Watch the full demo video to see the Deep Research Agent in action, showcasing the multi-agent workflow, real-time progress updates, and comprehensive report generation.*
15 |
16 | ## Features
17 |
18 | ### Core Capabilities
19 |
20 | - **Multi-Agent Architecture**: Four specialized autonomous agents (ResearchPlanner, ResearchSearcher, ResearchSynthesizer, ReportWriter) orchestrated by LangGraph's StateGraph. Each agent operates independently with its own tools and decision-making logic.
21 |
22 | - **Autonomous Research**: The search agent dynamically decides when to search, which queries to execute, and which sources warrant deep content extraction. This adaptive approach ensures quality over quantity, typically targeting 5-8 high-quality sources.
23 |
24 | - **Credibility Scoring**: Automatic source evaluation using domain authority analysis. Sources are scored (0-100) based on trusted domains (.edu, .gov), HTTPS, suspicious patterns, and academic indicators. Low-credibility sources are automatically filtered before synthesis.
25 |
26 | - **Quality Validation**: Section-level validation ensures minimum length requirements (500+ characters) and quality standards. Retry logic with exponential backoff handles failures gracefully, with up to 3 attempts per operation.
27 |
28 | - **Multi-Format Export**: Reports are automatically exported in three formats: Markdown (original), HTML (styled for web), and plain text (markdown stripped).
29 |
30 | - **LLM Usage Tracking**: Real-time monitoring of API calls, input/output tokens, and estimated costs. Per-agent breakdowns help identify optimization opportunities.
31 |
32 | - **Research Caching**: Intelligent file-based caching with 7-day TTL reduces redundant API calls. MD5-based topic hashing ensures accurate cache lookups.
33 |
34 | - **Web Interface**: Interactive Chainlit-based UI provides real-time progress updates, quality metrics, and multiple format downloads.
35 |
36 | ## Architecture
37 |
38 | The system implements a four-stage pipeline orchestrated by LangGraph's StateGraph:
39 |
40 | ```
41 | ResearchPlanner → ResearchSearcher → ResearchSynthesizer → ReportWriter
42 | ```
43 |
44 | ### Agent Responsibilities
45 |
46 | **ResearchPlanner**
47 | - Analyzes research topics and generates 3-5 research objectives
48 | - Creates 3 targeted search queries covering different aspects
49 | - Designs report outline with up to 8 sections
50 | - Provides strategic guidance for the autonomous search agent
51 |
52 | **ResearchSearcher** (Autonomous Agent)
53 | - LangChain-powered autonomous agent using `create_agent()`
54 | - Dynamically decides which queries to execute and when to extract content
55 | - Uses `web_search` and `extract_webpage_content` tools autonomously
56 | - Adapts research strategy based on intermediate findings
57 | - Targets 5-8 high-quality sources with deep content extraction
58 | - All sources are scored for credibility and filtered before synthesis
59 |
60 | **ResearchSynthesizer**
61 | - Analyzes aggregated search results with credibility awareness
62 | - Prioritizes HIGH-credibility sources (score ≥70) in findings
63 | - Resolves contradictions using credibility hierarchy
64 | - Extracts key insights and identifies patterns
65 | - Progressive truncation handles token limit errors gracefully
66 |
67 | **ReportWriter**
68 | - Generates structured report sections with consistent academic tone
69 | - Adds proper citations with configurable styles (APA, MLA, Chicago, IEEE)
70 | - Validates section quality and re-generates on failures
71 | - Compiles final markdown document with reference section
72 |
73 | ### Workflow
74 |
75 | 1. **Planning**: LLM generates research plan with objectives, queries, and outline
76 | 2. **Autonomous Search**: Agent executes searches and extracts content from promising sources
77 | 3. **Credibility Scoring**: All sources scored and filtered (default threshold: 40)
78 | 4. **Synthesis**: Findings extracted with credibility-aware prioritization
79 | 5. **Report Generation**: Structured sections written with citations
80 | 6. **Export**: Reports saved in multiple formats to `outputs/` directory
81 |
82 | ## Installation
83 |
84 | ### Prerequisites
85 |
86 | - Python 3.11+
87 | - pip or uv package manager
88 | - [Ollama](https://ollama.com/) (for local models) **OR** Google Gemini API key ([Get one free](https://makersuite.google.com/app/apikey)) **OR** OpenAI API key ([Get one here](https://platform.openai.com/api-keys))
89 |
90 | ### Setup
91 |
92 | ```bash
93 | # Clone the repository
94 | git clone https://github.com/tarun7r/deep-research-agent.git
95 | cd deep-research-agent
96 |
97 | # Create virtual environment
98 | python -m venv .venv
99 | source .venv/bin/activate # Windows: .venv\Scripts\activate
100 |
101 | # Install dependencies
102 | pip install -r requirements.txt
103 |
104 | # Configure (choose one):
105 | # Option A - Ollama: Install Ollama, pull a model (e.g., ollama pull qwen2.5:7b)
106 | # Option B - Gemini: Get API key from https://makersuite.google.com/app/apikey
107 | # Option C - OpenAI: Get API key from https://platform.openai.com/api-keys
108 |
109 | # Create .env file (see Configuration section below)
110 | ```
111 |
112 | ### Using Ollama (Local Models)
113 |
114 | Ollama allows you to run powerful LLMs locally on your machine without API costs or internet dependency.
115 |
116 | **Quick Start:**
117 |
118 | ```bash
119 | # Install Ollama (macOS/Linux)
120 | curl -fsSL https://ollama.com/install.sh | sh
121 |
122 | # Or download from https://ollama.com for other platforms
123 |
124 | # Pull a recommended model
125 | ollama pull qwen2.5:7b
126 |
127 | # Verify it's working
128 | ollama run qwen2.5:7b "Hello, test message"
129 | ```
130 |
131 | **Configuration:**
132 |
133 | Create a `.env` file:
134 | ```bash
135 | MODEL_PROVIDER=ollama
136 | MODEL_NAME=qwen2.5:7b
137 | SUMMARIZATION_MODEL=qwen2.5:7b
138 | ```
139 |
140 | > **Tip**: Ollama runs a local server at `http://localhost:11434` by default. The agent will automatically connect to it.
141 |
142 | ### Using llama.cpp
143 |
144 | llama.cpp provides direct control over model execution with maximum performance on Mac M1/M2/M3 with Metal acceleration.
145 |
146 | **Quick Start:**
147 |
148 | ```bash
149 |
150 | # 1. Download a GGUF model (e.g., Qwen2.5:7B q4_k_m quantization)
151 | cd ../..
152 | mkdir models
153 | # Download from Hugging Face
154 | huggingface-cli download Qwen/Qwen2.5-7B-Instruct-GGUF qwen2.5-7b-instruct-q4_k_m.gguf --local-dir ./models
155 |
156 | # 2. Start llama.cpp server with tool calling support
157 | cd llama.cpp/build/bin
158 | ./llama-server -m ~/models/qwen2.5-7b-instruct-q4_k_m.gguf \
159 | --host 0.0.0.0 \
160 | --port 8080 \
161 | -ngl 35 \
162 | --ctx-size 4096 \
163 | --jinja
164 | ```
165 |
166 | **Configuration:**
167 |
168 | Create a `.env` file:
169 | ```bash
170 | MODEL_PROVIDER=llamacpp
171 | MODEL_NAME=qwen2.5-7b-instruct-q4_k_m # Model name (can be anything)
172 | SUMMARIZATION_MODEL=qwen2.5-7b-instruct-q4_k_m
173 | LLAMACPP_BASE_URL=http://localhost:8080
174 | ```
175 |
176 | **Important Flags:**
177 | - `--jinja` - Required for tool/function calling support (used by research agents)
178 | - `-ngl 35` - Offload 35 layers to GPU (Metal acceleration)
179 | - `--ctx-size 4096` - Context window size
180 | - `--host 0.0.0.0` - Allow connections from any IP
181 | - `--port 8080` - Server port
182 |
183 | **Performance Tips:**
184 | - Metal acceleration provides ~2-3x speedup on M1/M2/M3
185 | - The server exposes an OpenAI-compatible API at `/v1/chat/completions`
186 | - Use `--n-gpu-layers` (or `-ngl`) to maximize GPU usage
187 |
188 | > **Note**: llama.cpp offers more control and can be faster than Ollama, but requires manual setup. Choose Ollama for simplicity or llama.cpp for maximum performance.
189 |
190 | ## Usage
191 |
192 | ### Command Line
193 |
194 | ```bash
195 | # Interactive mode
196 | python main.py
197 |
198 | # Direct topic
199 | python main.py "Impact of quantum computing on cryptography"
200 | ```
201 |
202 | ### Programmatic API
203 |
204 | ```python
205 | import asyncio
206 | from src.graph import run_research
207 |
208 | async def research():
209 | state = await run_research("Topic here", verbose=True, use_cache=True)
210 |
211 | # Access report
212 | print(state["final_report"])
213 |
214 | # Access LLM metrics
215 | print(f"LLM Calls: {state['llm_calls']}")
216 | print(f"Input Tokens: {state['total_input_tokens']:,}")
217 | print(f"Output Tokens: {state['total_output_tokens']:,}")
218 | print(f"Total Tokens: {state['total_input_tokens'] + state['total_output_tokens']:,}")
219 |
220 | # Access quality score
221 | if state.get("quality_score"):
222 | print(f"Quality: {state['quality_score']['total_score']}/100")
223 |
224 | asyncio.run(research())
225 | ```
226 |
227 | ### Web Interface
228 |
229 | ```bash
230 | # Start the web interface
231 | chainlit run app.py --host 127.0.0.1 --port 8000
232 | ```
233 |
234 | The web interface provides:
235 | - Interactive chat-based research
236 | - Real-time progress updates with stage indicators
237 | - Quality metrics and LLM usage statistics
238 | - Multiple format downloads (Markdown, HTML, TXT)
239 | - Research history tracking
240 |
241 | ## Configuration
242 |
243 | Environment variables in `.env`:
244 |
245 | ```bash
246 | # Model Provider (choose one)
247 | MODEL_PROVIDER=ollama # Options: ollama, llamacpp, gemini, openai
248 |
249 | # For Ollama
250 | MODEL_NAME=qwen2.5:7b # Recommended: qwen2.5:7b, llama3.1:8b, mistral:7b
251 | SUMMARIZATION_MODEL=qwen2.5:7b
252 | OLLAMA_BASE_URL=http://localhost:11434
253 |
254 | # For llama.cpp (alternative - requires --jinja flag on server)
255 | # MODEL_PROVIDER=llamacpp
256 | # MODEL_NAME=qwen2.5-7b-instruct-q4_k_m
257 | # SUMMARIZATION_MODEL=qwen2.5-7b-instruct-q4_k_m
258 | # LLAMACPP_BASE_URL=http://localhost:8080
259 |
260 | # For Gemini (alternative)
261 | # MODEL_PROVIDER=gemini
262 | # GEMINI_API_KEY=your_api_key_here
263 | # MODEL_NAME=gemini-2.5-flash
264 | # SUMMARIZATION_MODEL=gemini-2.5-flash
265 |
266 | # For OpenAI (alternative)
267 | # MODEL_PROVIDER=openai
268 | # OPENAI_API_KEY=your_api_key_here
269 | # MODEL_NAME=gpt-4o-mini # Recommended: gpt-4o-mini, gpt-4o, gpt-4-turbo
270 | # SUMMARIZATION_MODEL=gpt-4o-mini
271 |
272 | # Optional - Search Settings
273 | MAX_SEARCH_QUERIES=3
274 | MAX_SEARCH_RESULTS_PER_QUERY=3
275 | MIN_CREDIBILITY_SCORE=40
276 |
277 | # Optional - Report Settings
278 | MAX_REPORT_SECTIONS=8
279 | CITATION_STYLE=apa # Options: apa, mla, chicago, ieee
280 | ```
281 |
282 | ### Model Provider Comparison
283 |
284 | **Ollama (Local Models):**
285 | - Free, no API costs
286 | - Works offline, privacy-focused
287 | - Faster response times (no network latency)
288 | - No rate limits
289 | - Easy setup and model management
290 | - Requires ~5-8GB RAM for good models
291 | - Initial model download (~4-5GB per model)
292 |
293 | **llama.cpp (Local Models):**
294 | - Free, no API costs
295 | - Works offline, maximum privacy
296 | - Fastest local inference with Metal acceleration
297 | - No rate limits
298 | - Fine-grained control over model parameters
299 | - Lower memory usage with quantization
300 | - Requires manual setup and compilation
301 | - Requires ~4-8GB RAM depending on quantization
302 | - Best for: Maximum performance on M1/M2/M3 Macs
303 |
304 | **Gemini (Cloud API):**
305 | - No local resources needed
306 | - Latest cutting-edge models
307 | - Consistently fast across devices
308 | - Requires API key and internet
309 | - API costs (free tier available)
310 |
311 | **OpenAI (Cloud API):**
312 | - No local resources needed
313 | - Industry-leading models (GPT-4, GPT-4o)
314 | - Excellent performance and reliability
315 | - Requires API key and internet
316 | - Pay-per-use pricing (competitive rates)
317 | - Recommended models: `gpt-4o-mini` (cost-effective), `gpt-4o` (best quality)
318 |
319 | ## Output Format
320 |
321 | Generated reports follow this structure:
322 |
323 | ```markdown
324 | # [Research Topic]
325 |
326 | **Deep Research Report**
327 |
328 | ## Research Objectives
329 | 1. [Objective 1]
330 | 2. [Objective 2]
331 | ...
332 |
333 | ---
334 |
335 | ## [Section 1 Title]
336 | [Content with inline citations [1], [2]]
337 |
338 | ## [Section 2 Title]
339 | [Content with inline citations [3], [4]]
340 |
341 | ---
342 |
343 | ## References
344 | 1. [Formatted citation according to selected style]
345 | 2. [Formatted citation according to selected style]
346 | ...
347 | ```
348 |
349 | Reports are automatically exported in three formats:
350 | - **Markdown** (`.md`) - Original format with full markdown syntax
351 | - **HTML** (`.html`) - Styled web-ready format
352 | - **Plain Text** (`.txt`) - Markdown stripped, plain text version
353 |
354 | All reports are saved to the `outputs/` directory with timestamps.
355 |
356 | ## Project Structure
357 |
358 | ```
359 | deep-research-agent/
360 | ├── src/
361 | │ ├── __init__.py # Package initialization
362 | │ ├── config.py # Configuration management (Pydantic models)
363 | │ ├── state.py # State models (ResearchState, ResearchPlan, etc.)
364 | │ ├── agents.py # Agent implementations (Planner, Searcher, Synthesizer, Writer)
365 | │ ├── graph.py # LangGraph workflow orchestration
366 | │ ├── llm_tracker.py # LLM call and token tracking
367 | │ └── utils/
368 | │ ├── __init__.py # Utils package
369 | │ ├── tools.py # LangChain tools (@tool decorated for agents)
370 | │ ├── web_utils.py # Search & extraction implementations
371 | │ ├── cache.py # Research result caching (7-day TTL)
372 | │ ├── credibility.py # Source credibility scoring and filtering
373 | │ ├── exports.py # Multi-format export utilities
374 | │ ├── citations.py # Citation formatting (APA, MLA, Chicago, IEEE)
375 | │ └── history.py # Research history tracking
376 | ├── outputs/ # Generated reports (MD, HTML, TXT)
377 | ├── .cache/ # Cache and history storage
378 | │ ├── research/ # Cached research results
379 | │ └── research_history.json # Research history
380 | ├── main.py # CLI entry point
381 | ├── app.py # Chainlit web interface
382 | ├── requirements.txt # Python dependencies
383 | ├── pyproject.toml # Project metadata
384 | ├── LICENSE # MIT License
385 | └── README.md # This file
386 | ```
387 |
388 | ## Key Components
389 |
390 | ### State Management (`src/state.py`)
391 |
392 | Centralized state using Pydantic models tracks research progress, search results, findings, and LLM usage metrics throughout the workflow.
393 |
394 | ### Tools Layer (`src/utils/tools.py`)
395 |
396 | LangChain tools decorated with `@tool` enable autonomous agent tool-calling:
397 | - `web_search`: DuckDuckGo integration for web searches
398 | - `extract_webpage_content`: BeautifulSoup4-based content extraction
399 |
400 | ### Credibility Scorer (`src/utils/credibility.py`)
401 |
402 | Evaluates sources based on:
403 | - Domain authority (trusted domains: +30 points)
404 | - HTTPS enabled (+5 points)
405 | - Academic/research paths (+10 points)
406 | - Suspicious patterns (-20 points)
407 |
408 | Sources are automatically filtered and sorted by credibility before synthesis.
409 |
410 | ## Development Note
411 |
412 | The core ideation, architecture design, and logic of this project are the result of original research and understanding. While AI tools were used to assist with code restructuring and implementation, the fundamental concepts, agent workflows, credibility scoring methodology, and overall system design reflect independent research and development.
413 |
414 | ## Contact
415 |
416 | For questions, issues, or collaboration:
417 |
418 | - **GitHub**: [tarun7r](https://github.com/tarun7r)
419 | - **LinkedIn**: [Tarun Sai Goddu](https://www.linkedin.com/in/tarunsaigoddu/)
420 | - **Hugging Face**: [tarun7r](https://huggingface.co/tarun7r)
421 | - **Email**: tarunsaiaa@gmail.com
422 |
423 | ## License
424 |
425 | MIT License - See [LICENSE](LICENSE) file for details.
426 |
427 | ## Acknowledgments
428 |
429 | Built with [LangGraph](https://github.com/langchain-ai/langgraph) and [LangChain](https://github.com/langchain-ai/langchain). Supports [Ollama](https://ollama.com/) and [llama.cpp](https://github.com/ggerganov/llama.cpp) for local models, [Google Gemini](https://ai.google.dev/) and [OpenAI](https://openai.com/) APIs. Web search via [DuckDuckGo](https://duckduckgo.com/).
430 |
--------------------------------------------------------------------------------
/src/utils/tools.py:
--------------------------------------------------------------------------------
1 | """LLM-invokable tools for research agents."""
2 |
3 | from typing import List, Optional, Dict
4 | from langchain_core.tools import tool
5 | import logging
6 | import json
7 |
8 | from src.utils.web_utils import WebSearchTool as WebSearchImpl, ContentExtractor as ContentExtractorImpl
9 | from src.state import SearchResult
10 | from src.utils.citations import CitationFormatter
11 | from src.config import config
12 |
13 | logging.basicConfig(level=logging.INFO)
14 | logger = logging.getLogger(__name__)
15 |
16 |
17 | # Initialize tool implementations with config values
18 | _search_impl = WebSearchImpl(max_results=config.max_search_results_per_query)
19 | _extractor_impl = ContentExtractorImpl(timeout=10)
20 | _citation_formatter = CitationFormatter()
21 |
22 |
23 | @tool
24 | async def web_search(query: str, max_results: int = None) -> List[dict]:
25 | """Search the web for authoritative information using DuckDuckGo search engine.
26 |
27 | This tool executes web searches to find current, accurate information from diverse sources
28 | including academic papers, official documentation, news articles, and expert analyses.
29 |
30 | ## When to Use
31 | - Gathering factual information on any topic
32 | - Finding authoritative sources (academic, government, official docs)
33 | - Researching current events or recent developments
34 | - Verifying claims or finding supporting evidence
35 | - Discovering expert opinions and analyses
36 |
37 | ## Query Optimization Strategies
38 |
39 | ### For Maximum Accuracy:
40 | - Add "official" or "documentation" for technical topics
41 | - Include "research" or "study" for scientific topics
42 | - Add year (e.g., "2024") for time-sensitive information
43 | - Use site-specific queries: "site:edu" or "site:gov" for authoritative sources
44 |
45 | ### Query Formulation Best Practices:
46 | - Be specific: "Python async await tutorial" > "Python programming"
47 | - Use technical terms: "WebSocket protocol implementation" > "real-time web"
48 | - Include context: "Azure Speech SDK streaming architecture" > "Azure speech"
49 | - Combine concepts: "machine learning healthcare diagnosis 2024"
50 |
51 | ### Query Types for Comprehensive Research:
52 | - Definitional: "what is [topic]", "[topic] explained"
53 | - Technical: "[topic] architecture", "how [topic] works"
54 | - Comparative: "[topic] vs [alternative]", "[topic] comparison"
55 | - Practical: "[topic] best practices", "[topic] tutorial"
56 | - Current: "[topic] 2024", "latest [topic]"
57 |
58 | Args:
59 | query: A well-crafted search query string. Be specific and include relevant
60 | qualifiers. Maximum ~10 words for best results.
61 |
62 | Good examples:
63 | - "WebSocket vs HTTP streaming performance comparison"
64 | - "Azure cognitive services speech SDK documentation"
65 | - "transformer architecture deep learning explained 2024"
66 | - "site:arxiv.org large language models efficiency"
67 |
68 | Avoid:
69 | - Single words: "AI", "cloud", "programming"
70 | - Overly long queries (>15 words)
71 | - Ambiguous terms without context
72 |
73 | max_results: Maximum results to return (default: from config). Higher values
74 | give more sources but may include less relevant results.
75 |
76 | Returns:
77 | List of dictionaries, each containing:
78 | - query (str): The search query used
79 | - title (str): Page title (indicates content focus)
80 | - url (str): Full URL (check domain for credibility)
81 | - snippet (str): Preview text (~150 chars, helps assess relevance)
82 |
83 | Tips:
84 | - Check URL domains: .edu, .gov, .org often indicate credibility
85 | - Review snippets before extracting full content
86 | - If results are poor, try rephrasing with different terms
87 | """
88 | try:
89 | # Use config value if not specified
90 | if max_results is None:
91 | max_results = config.max_search_results_per_query
92 |
93 | # Update max_results if different
94 | if _search_impl.max_results != max_results:
95 | _search_impl.max_results = max_results
96 |
97 | results = await _search_impl.search_async(query)
98 |
99 | # Convert SearchResult objects to dicts for LLM consumption
100 | return [
101 | {
102 | "query": r.query,
103 | "title": r.title,
104 | "url": r.url,
105 | "snippet": r.snippet
106 | }
107 | for r in results
108 | ]
109 | except Exception as e:
110 | logger.error(f"Web search tool error: {str(e)}")
111 | return []
112 |
113 |
114 | @tool
115 | async def extract_webpage_content(url: str) -> Optional[str]:
116 | """Extract the main textual content from a webpage, removing boilerplate and noise.
117 |
118 | This tool fetches a webpage and uses intelligent content extraction to isolate the
119 | main article body, removing navigation, ads, sidebars, footers, and other non-content
120 | elements. Essential for getting the full context beyond search snippets.
121 |
122 | ## When to Use
123 | - After web_search identifies promising sources
124 | - To get full article text beyond the snippet preview
125 | - For in-depth analysis of specific sources
126 | - When you need to verify claims with full context
127 | - To extract technical details, examples, or data tables
128 |
129 | ## Source Prioritization Guide
130 |
131 | ### Extract First (High Value):
132 | - Official documentation pages (docs.*, developer.*)
133 | - Academic papers and research (arxiv.org, ieee.org, nature.com)
134 | - Government and institutional reports (.gov, .edu)
135 | - Detailed technical blog posts with code/examples
136 | - Industry whitepapers and case studies
137 |
138 | ### Extract If Needed (Medium Value):
139 | - News articles from reputable sources
140 | - Well-written tutorial and how-to guides
141 | - Expert commentary and analysis pieces
142 | - Wikipedia articles (good overviews)
143 |
144 | ### Usually Skip (Low Value):
145 | - Social media pages (limited extraction success)
146 | - Video-primary sites (YouTube, Vimeo) - no transcript extraction
147 | - Login-protected content
148 | - Heavily JavaScript-rendered single-page apps
149 | - Image galleries or portfolio sites
150 |
151 | ## What Gets Extracted
152 | - Main article/post body text
153 | - Headings and subheadings
154 | - Lists and bullet points
155 | - Code blocks and technical content
156 | - Tables (as text)
157 |
158 | ## What Gets Removed
159 | - Navigation menus and headers
160 | - Sidebar content and widgets
161 | - Footer links and copyright notices
162 | - Advertisements and promotions
163 | - Comment sections
164 | - Related article suggestions
165 |
166 | Args:
167 | url: Complete, valid HTTP/HTTPS URL to extract content from.
168 | Must be publicly accessible (no auth required).
169 |
170 | Good candidates:
171 | - "https://docs.microsoft.com/azure/cognitive-services/speech"
172 | - "https://arxiv.org/abs/2301.xxxxx"
173 | - "https://www.nature.com/articles/article-id"
174 | - "https://techblog.example.com/detailed-guide"
175 |
176 | Poor candidates:
177 | - "https://twitter.com/..." (social media)
178 | - "https://youtube.com/..." (video content)
179 | - URLs requiring login
180 |
181 | Returns:
182 | str: Extracted main text content (up to 5000 characters for efficiency).
183 | Content is cleaned and formatted with preserved paragraph breaks.
184 |
185 | None: If extraction fails due to:
186 | - Network/access errors (timeouts, 403/404)
187 | - Login/authentication requirements
188 | - JavaScript-heavy pages with no static content
189 | - Non-text content (PDFs, images, videos)
190 |
191 | Usage Pattern:
192 | 1. Run web_search to find relevant URLs
193 | 2. Review titles and domains for credibility
194 | 3. Extract content from top 3-5 most promising sources
195 | 4. Cross-reference extracted content for verification
196 | """
197 | try:
198 | content = await _extractor_impl.extract_content_async(url)
199 | return content
200 | except Exception as e:
201 | logger.error(f"Content extraction tool error: {str(e)}")
202 | return None
203 |
204 |
205 | @tool
206 | def analyze_research_topic(topic: str) -> Dict[str, List[str]]:
207 | """Decompose a research topic into structured dimensions for comprehensive coverage.
208 |
209 | This tool performs preliminary topic analysis to identify the key aspects,
210 | stakeholder perspectives, and essential questions that should be addressed
211 | in a thorough research investigation.
212 |
213 | ## Purpose
214 | Ensures research planning covers all important dimensions of a topic rather
215 | than focusing too narrowly on one aspect.
216 |
217 | ## Analysis Framework
218 |
219 | ### Aspects (What to cover)
220 | The fundamental dimensions or components of the topic:
221 | - Technical/Functional aspects (how it works)
222 | - Historical context (evolution, origins)
223 | - Current state (adoption, implementations)
224 | - Future outlook (trends, predictions)
225 | - Practical implications (real-world impact)
226 |
227 | ### Perspectives (Whose viewpoint)
228 | Different stakeholder or analytical lenses:
229 | - Technical perspective (engineers, developers)
230 | - Business perspective (costs, ROI, strategy)
231 | - User perspective (experience, benefits)
232 | - Ethical perspective (risks, implications)
233 | - Policy perspective (regulations, standards)
234 |
235 | ### Questions (What to answer)
236 | Core questions that comprehensive research should address:
237 | - Definitional: What is it?
238 | - Mechanistic: How does it work?
239 | - Evaluative: What are the pros/cons?
240 | - Comparative: How does it compare to alternatives?
241 | - Prospective: What's the future outlook?
242 |
243 | ## When to Use
244 | - At the start of research planning
245 | - When unsure how to structure research approach
246 | - To ensure comprehensive topic coverage
247 | - To generate diverse search query ideas
248 |
249 | Args:
250 | topic: The research topic or question to analyze.
251 | Can be a simple topic ("machine learning")
252 | or a complex question ("How does Azure Speech SDK streaming work?")
253 |
254 | Returns:
255 | Dictionary containing:
256 |
257 | - aspects (List[str]): Key dimensions to investigate
258 | Typically 3-5 core aspects of the topic
259 |
260 | - perspectives (List[str]): Stakeholder/analytical viewpoints
261 | Different angles from which to examine the topic
262 |
263 | - questions (List[str]): Essential questions to answer
264 | Core questions that research should address
265 |
266 | Usage:
267 | Use the returned analysis to:
268 | 1. Generate diverse search queries covering all aspects
269 | 2. Structure report outline to address all perspectives
270 | 3. Verify final report answers all essential questions
271 | """
272 | # This is a structured thinking tool for the planning agent
273 | # Returns structured breakdown to help with planning
274 | logger.info(f"Analyzing topic: {topic}")
275 |
276 | # Basic heuristic analysis
277 | aspects = []
278 | perspectives = []
279 | questions = []
280 |
281 | # Extract key concepts
282 | words = topic.lower().split()
283 | if "ai" in words or "artificial" in words or "intelligence" in words:
284 | aspects.extend(["applications", "technology", "impact"])
285 | perspectives.extend(["technical", "ethical", "societal"])
286 |
287 | if "healthcare" in words or "medical" in words or "health" in words:
288 | aspects.extend(["patient care", "diagnosis", "treatment"])
289 | perspectives.extend(["patients", "doctors", "researchers"])
290 |
291 | # Default structure
292 | if not aspects:
293 | aspects = ["overview", "current state", "future trends", "implications"]
294 | if not perspectives:
295 | perspectives = ["technical", "practical", "societal"]
296 |
297 | questions = [
298 | f"What is the current state of {topic}?",
299 | f"What are the key benefits and challenges?",
300 | f"What does the future hold for {topic}?"
301 | ]
302 |
303 | return {
304 | "aspects": aspects[:5],
305 | "perspectives": perspectives[:4],
306 | "questions": questions[:5]
307 | }
308 |
309 |
310 | @tool
311 | def extract_insights_from_text(text: str, focus: str = "key findings") -> List[str]:
312 | """Extract specific, targeted insights from text content based on a defined focus area.
313 |
314 | This tool performs focused extraction of relevant information from raw text,
315 | helping to isolate specific types of insights like findings, trends, challenges,
316 | benefits, technical details, or statistics.
317 |
318 | ## When to Use
319 | - Extracting specific categories of information from article content
320 | - Isolating technical details or specifications
321 | - Finding statistics, numbers, or quantitative data
322 | - Identifying challenges, limitations, or criticisms
323 | - Pulling out benefits, advantages, or positive outcomes
324 | - Discovering trends, patterns, or predictions
325 |
326 | ## Effective Focus Parameters
327 |
328 | ### For Technical Research:
329 | - "technical specifications" - Extract specs, requirements, parameters
330 | - "implementation details" - How it works, architecture, components
331 | - "performance metrics" - Speed, accuracy, benchmarks, comparisons
332 | - "limitations" - Constraints, edge cases, known issues
333 |
334 | ### For Analysis:
335 | - "key findings" - Main conclusions and discoveries (default)
336 | - "trends" - Patterns, trajectories, emerging developments
337 | - "challenges" - Problems, obstacles, difficulties
338 | - "benefits" - Advantages, positive outcomes, value propositions
339 | - "comparisons" - How things differ, trade-offs, alternatives
340 |
341 | ### For Practical Use:
342 | - "best practices" - Recommended approaches, guidelines
343 | - "use cases" - Applications, examples, scenarios
344 | - "requirements" - Prerequisites, dependencies, conditions
345 | - "steps" - Procedures, processes, workflows
346 |
347 | Args:
348 | text: The text content to analyze. Can be:
349 | - Extracted webpage content
350 | - Search result snippets
351 | - Combined content from multiple sources
352 | Longer texts (>1000 chars) yield better results.
353 |
354 | focus: The type of insight to extract. Be specific for better results.
355 | Default: "key findings"
356 |
357 | Examples:
358 | - "technical architecture"
359 | - "performance benchmarks"
360 | - "security considerations"
361 | - "cost implications"
362 | - "user benefits"
363 |
364 | Returns:
365 | List[str]: Extracted insights matching the focus area.
366 | Each insight is a complete, standalone statement.
367 | Returns ["No specific insights found..."] if none match.
368 |
369 | Tips:
370 | - Use specific focus terms for targeted extraction
371 | - Combine with multiple focus areas for comprehensive analysis
372 | - Review extracted insights for accuracy before including in reports
373 | """
374 | logger.info(f"Extracting insights with focus: {focus}")
375 |
376 | # Simple extraction: split by sentences and filter
377 | insights = []
378 | sentences = text.split('. ')
379 |
380 | focus_keywords = focus.lower().split()
381 | for sentence in sentences[:20]: # Limit to first 20 sentences
382 | sentence_lower = sentence.lower()
383 | # Check if sentence contains focus keywords
384 | if any(keyword in sentence_lower for keyword in focus_keywords):
385 | if len(sentence) > 20 and len(sentence) < 300:
386 | insights.append(sentence.strip() + '.')
387 |
388 | return insights[:10] if insights else ["No specific insights found for this focus."]
389 |
390 |
391 | @tool
392 | def format_citation(url: str, title: str = "", style: str = "apa") -> str:
393 | """Format a source citation in a standardized academic style.
394 |
395 | This tool generates properly formatted citations for the References section
396 | of research reports. Supports major academic citation styles used in
397 | scholarly writing.
398 |
399 | ## Supported Citation Styles
400 |
401 | ### APA (American Psychological Association) - Default
402 | - Common in: Social sciences, psychology, education, business
403 | - Format: Author. (Year). Title. Retrieved from URL
404 | - Example: Smith, J. (2024). Machine Learning Basics. Retrieved from https://...
405 |
406 | ### MLA (Modern Language Association)
407 | - Common in: Humanities, literature, arts
408 | - Format: Author. "Title." Date. Web. Access Date.
409 | - Example: Smith, John. "Machine Learning Basics." Web. 15 Dec. 2024.
410 |
411 | ### Chicago
412 | - Common in: History, some humanities, publishing
413 | - Format: Author. "Title." Accessed Date. URL.
414 | - Example: Smith, John. "Machine Learning Basics." Accessed December 15, 2024. https://...
415 |
416 | ### IEEE (Institute of Electrical and Electronics Engineers)
417 | - Common in: Engineering, computer science, technical fields
418 | - Format: Author, "Title," URL, accessed Date.
419 | - Example: J. Smith, "Machine Learning Basics," https://..., accessed December 15, 2024.
420 |
421 | ## When to Use
422 | - Building the References section of a report
423 | - Need consistent citation formatting
424 | - Converting URL + title into proper academic format
425 |
426 | Args:
427 | url: Complete URL of the source (required)
428 | Must be a valid HTTP/HTTPS URL
429 |
430 | title: Title of the article/page (recommended)
431 | Improves citation quality significantly
432 | If empty, citation will be URL-only
433 |
434 | style: Citation format to use (case-insensitive)
435 | Options: "apa" (default), "mla", "chicago", "ieee"
436 | Use the style appropriate for your field/audience
437 |
438 | Returns:
439 | str: Formatted citation string ready for inclusion in References
440 | Includes current date as access date where required by style
441 |
442 | Tips:
443 | - Always provide title when available for better citations
444 | - Use consistent style throughout a single report
445 | - APA is a safe default for most research contexts
446 | """
447 | logger.info(f"Formatting citation in {style} style")
448 |
449 | try:
450 | # Use the appropriate formatting method based on style
451 | if style.lower() == "apa":
452 | return _citation_formatter.format_apa(url, title)
453 | elif style.lower() == "mla":
454 | return _citation_formatter.format_mla(url, title)
455 | elif style.lower() == "chicago":
456 | return _citation_formatter.format_chicago(url, title)
457 | elif style.lower() == "ieee":
458 | return _citation_formatter.format_ieee(url, title)
459 | else:
460 | # Default to APA
461 | return _citation_formatter.format_apa(url, title)
462 | except Exception as e:
463 | logger.error(f"Citation formatting error: {e}")
464 | # Fallback to simple format
465 | if title:
466 | return f"{title}. Retrieved from {url}"
467 | return url
468 |
469 |
470 | @tool
471 | def validate_section_quality(section_text: str, min_words: int = 150) -> Dict[str, any]:
472 | """Validate a report section against quality standards before finalizing.
473 |
474 | This tool performs comprehensive quality checks on written sections to ensure
475 | they meet minimum standards for length, citation usage, structure, and
476 | overall readability. Use BEFORE submitting final section content.
477 |
478 | ## Quality Dimensions Checked
479 |
480 | ### 1. Length Requirements
481 | - Minimum word count enforcement
482 | - Flags sections that are too short for meaningful coverage
483 |
484 | ### 2. Citation Analysis
485 | - Presence of inline citations [1], [2], etc.
486 | - Academic writing requires citations for factual claims
487 |
488 | ### 3. Structural Elements
489 | - Use of markdown headings for organization
490 | - Appropriate for sections over 300 words
491 |
492 | ## When to Use
493 | - After drafting any report section
494 | - Before returning final section content
495 | - To identify areas needing improvement
496 | - To ensure minimum quality thresholds are met
497 |
498 | ## Interpreting Results
499 |
500 | ### is_valid = True
501 | - Section meets all minimum requirements
502 | - Safe to include in final report
503 |
504 | ### is_valid = False
505 | - One or more critical issues found
506 | - Review 'issues' list for specific problems
507 | - Follow 'suggestions' for improvements
508 | - Revise section before submitting
509 |
510 | Args:
511 | section_text: The complete section content to validate.
512 | Should be the full markdown text you plan to submit.
513 |
514 | min_words: Minimum acceptable word count. Default: 150
515 | For comprehensive sections, use 200-300
516 | For brief overviews, 100-150 may suffice
517 |
518 | Returns:
519 | Dictionary containing:
520 |
521 | - is_valid (bool): True if ALL quality checks pass
522 |
523 | - word_count (int): Actual word count of the section
524 | Compare against min_words to see the gap
525 |
526 | - has_citations (bool): True if [n] citation format detected
527 | FALSE = Major issue for factual content
528 |
529 | - issues (List[str]): Specific problems found
530 | Empty list = no issues
531 | Examples:
532 | - "Section too short: 89 words (minimum: 150)"
533 | - "No citations found"
534 |
535 | - suggestions (List[str]): Actionable improvement recommendations
536 | Examples:
537 | - "Add more detail and supporting information"
538 | - "Add inline citations [1], [2] to support claims"
539 | - "Consider adding subheadings for better structure"
540 |
541 | Usage Pattern:
542 | 1. Write your section content
543 | 2. Call validate_section_quality(your_content, min_words=200)
544 | 3. If is_valid is False, revise based on issues/suggestions
545 | 4. Repeat until is_valid is True
546 | 5. Submit the validated section
547 | """
548 | logger.info("Validating section quality")
549 |
550 | word_count = len(section_text.split())
551 | has_citations = '[' in section_text and ']' in section_text
552 | has_headers = '#' in section_text
553 |
554 | issues = []
555 | suggestions = []
556 |
557 | if word_count < min_words:
558 | issues.append(f"Section too short: {word_count} words (minimum: {min_words})")
559 | suggestions.append("Add more detail and supporting information")
560 |
561 | if not has_citations:
562 | issues.append("No citations found")
563 | suggestions.append("Add inline citations [1], [2] to support claims")
564 |
565 | if not has_headers and word_count > 300:
566 | suggestions.append("Consider adding subheadings for better structure")
567 |
568 | is_valid = len(issues) == 0
569 |
570 | return {
571 | "is_valid": is_valid,
572 | "word_count": word_count,
573 | "has_citations": has_citations,
574 | "issues": issues,
575 | "suggestions": suggestions
576 | }
577 |
578 |
579 | # Tool lists for different agents
580 | research_search_tools = [
581 | web_search,
582 | extract_webpage_content
583 | ]
584 |
585 | synthesis_tools = [
586 | extract_insights_from_text
587 | ]
588 |
589 | writing_tools = [
590 | format_citation,
591 | validate_section_quality
592 | ]
593 |
594 | planning_tools = [
595 | analyze_research_topic
596 | ]
597 |
598 | # All tools combined
599 | all_research_tools = [
600 | web_search,
601 | extract_webpage_content,
602 | analyze_research_topic,
603 | extract_insights_from_text,
604 | format_citation,
605 | validate_section_quality
606 | ]
607 |
608 |
609 | def get_research_tools(agent_type: str = "search") -> List:
610 | """Get research tools for a specific agent type.
611 |
612 | Args:
613 | agent_type: Type of agent ("search", "synthesis", "writing", "planning", "all")
614 |
615 | Returns:
616 | List of LangChain tool objects for that agent
617 | """
618 | tools_map = {
619 | "search": research_search_tools,
620 | "synthesis": synthesis_tools,
621 | "writing": writing_tools,
622 | "planning": planning_tools,
623 | "all": all_research_tools
624 | }
625 | return tools_map.get(agent_type, research_search_tools)
626 |
--------------------------------------------------------------------------------
/src/agents.py:
--------------------------------------------------------------------------------
1 | """Agent nodes for the research workflow."""
2 |
3 | import asyncio
4 | from typing import List
5 | import logging
6 |
7 | from langchain_google_genai import ChatGoogleGenerativeAI
8 | from langchain_ollama import ChatOllama
9 | from langchain_openai import ChatOpenAI
10 | from langchain_core.prompts import ChatPromptTemplate
11 | from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
12 | from langchain.agents import create_agent
13 |
14 | from src.state import ResearchState, ResearchPlan, SearchQuery, ReportSection
15 | from src.utils.tools import get_research_tools
16 | from src.config import config
17 | from src.utils.credibility import CredibilityScorer
18 | from src.utils.citations import CitationFormatter
19 | from src.llm_tracker import estimate_tokens
20 | from src.callbacks import (
21 | emit_planning_start, emit_planning_complete,
22 | emit_search_start, emit_search_results,
23 | emit_extraction_start, emit_extraction_complete,
24 | emit_synthesis_start, emit_synthesis_progress, emit_synthesis_complete,
25 | emit_writing_start, emit_writing_section, emit_writing_complete,
26 | emit_error
27 | )
28 | import time
29 |
30 | logging.basicConfig(level=logging.INFO)
31 | logger = logging.getLogger(__name__)
32 |
33 |
34 | def get_llm(temperature: float = 0.7, model_override: str = None):
35 | """Get LLM instance based on configuration.
36 |
37 | Args:
38 | temperature: Temperature for the LLM
39 | model_override: Optional model name to override config.model_name
40 |
41 | Returns:
42 | LLM instance (ChatOllama, ChatGoogleGenerativeAI, or ChatOpenAI)
43 | """
44 | model_name = model_override or config.model_name
45 |
46 | if config.model_provider == "ollama":
47 | logger.info(f"Using Ollama model: {model_name}")
48 | return ChatOllama(
49 | model=model_name,
50 | base_url=config.ollama_base_url,
51 | temperature=temperature,
52 | num_ctx=8192, # Context window
53 | )
54 | elif config.model_provider == "openai":
55 | logger.info(f"Using OpenAI model: {model_name}")
56 | return ChatOpenAI(
57 | model=model_name,
58 | api_key=config.openai_api_key,
59 | temperature=temperature
60 | )
61 | elif config.model_provider == "llamacpp":
62 | logger.info(f"Using llama.cpp server model: {model_name}")
63 | # llama.cpp server exposes OpenAI-compatible API
64 | return ChatOpenAI(
65 | model=model_name,
66 | base_url=f"{config.llamacpp_base_url}/v1", # OpenAI-compatible endpoint
67 | api_key="not-needed", # llama.cpp doesn't require API key
68 | temperature=temperature
69 | )
70 | else: # gemini
71 | logger.info(f"Using Gemini model: {model_name}")
72 | return ChatGoogleGenerativeAI(
73 | model=model_name,
74 | google_api_key=config.google_api_key,
75 | temperature=temperature
76 | )
77 |
78 |
79 | class ResearchPlanner:
80 | """Autonomous agent responsible for planning research strategy."""
81 |
82 | def __init__(self):
83 | self.llm = get_llm(temperature=0.7)
84 | # Note: Planning agent uses LLM directly with structured output for reliability
85 | # Tool calling works better for search/extraction tasks
86 | self.max_retries = 3
87 |
88 | async def plan(self, state: ResearchState) -> dict:
89 | """Create a research plan with structured LLM output.
90 |
91 | Returns dict with updates that LangGraph will merge into state.
92 | """
93 | logger.info(f"Planning research for: {state.research_topic}")
94 |
95 | # Emit progress update
96 | await emit_planning_start(state.research_topic)
97 |
98 | prompt = ChatPromptTemplate.from_messages([
99 | ("system", """You are an expert research strategist and information architect. Your role is to create comprehensive, methodical research plans that maximize accuracy and depth of coverage.
100 |
101 | ## Your Core Responsibilities
102 |
103 | ### 1. Define SMART Research Objectives (3-5 objectives)
104 | Create objectives that are:
105 | - **Specific**: Target concrete aspects of the topic, not vague generalities
106 | - **Measurable**: Can be verified as addressed in the final report
107 | - **Achievable**: Realistically answerable through web research
108 | - **Relevant**: Directly address the user's query and implied needs
109 | - **Time-aware**: Consider current state, recent developments, and future outlook
110 |
111 | ### 2. Design Strategic Search Queries (up to {max_queries} queries)
112 |
113 | **Query Diversity Matrix** - Ensure coverage across:
114 | - **Definitional queries**: "What is [topic]" / "[topic] explained"
115 | - **Mechanism queries**: "How does [topic] work" / "[topic] architecture"
116 | - **Comparison queries**: "[topic] vs alternatives" / "[topic] comparison"
117 | - **Expert/authoritative queries**: "[topic] research paper" / "[topic] official documentation"
118 | - **Practical queries**: "[topic] best practices" / "[topic] implementation guide"
119 | - **Trend queries**: "[topic] 2024" / "latest [topic] developments"
120 | - **Problem/solution queries**: "[topic] challenges" / "[topic] limitations"
121 |
122 | **Query Quality Guidelines**:
123 | - Use specific technical terms when appropriate
124 | - Include year markers for time-sensitive topics (e.g., "2024", "latest")
125 | - Add domain qualifiers for targeted results (e.g., "academic", "enterprise", "tutorial")
126 | - Avoid overly broad single-word queries
127 | - Consider alternative phrasings and synonyms
128 |
129 | ### 3. Structure the Report Outline (up to {max_sections} sections)
130 |
131 | Create a logical flow that:
132 | - Starts with context/background (helps readers understand the landscape)
133 | - Progresses from fundamentals to advanced topics
134 | - Groups related concepts together
135 | - Ends with practical implications, conclusions, or future outlook
136 | - Includes a dedicated section for technical details if applicable
137 |
138 | **Recommended Section Types**:
139 | - Executive Summary / Overview
140 | - Background & Context
141 | - Core Concepts / How It Works
142 | - Key Features / Components / Architecture
143 | - Benefits & Advantages
144 | - Challenges & Limitations
145 | - Use Cases / Applications
146 | - Comparison with Alternatives (if relevant)
147 | - Best Practices / Implementation Guidelines
148 | - Future Outlook / Trends
149 | - Conclusion & Recommendations
150 |
151 | ## Output Quality Standards
152 | - Every search query must have a clear, distinct purpose
153 | - No redundant or overlapping queries
154 | - Report sections should comprehensively cover all objectives
155 | - Consider the user's apparent expertise level when designing the plan"""),
156 | ("human", """Research Topic: {topic}
157 |
158 | Analyze this topic carefully. Consider:
159 | 1. What is the user really trying to understand?
160 | 2. What are the key dimensions of this topic?
161 | 3. What authoritative sources would have the best information?
162 | 4. What technical depth is appropriate?
163 |
164 | Create a detailed research plan in JSON format:
165 | {{
166 | "topic": "the research topic (refined if needed for clarity)",
167 | "objectives": [
168 | "Specific, measurable objective 1",
169 | "Specific, measurable objective 2",
170 | ...
171 | ],
172 | "search_queries": [
173 | {{"query": "well-crafted search query 1", "purpose": "specific reason this query helps achieve objectives"}},
174 | {{"query": "well-crafted search query 2", "purpose": "specific reason this query helps achieve objectives"}},
175 | ...
176 | ],
177 | "report_outline": [
178 | "Section 1: Logical starting point",
179 | "Section 2: Building on Section 1",
180 | ...
181 | ]
182 | }}
183 |
184 | Ensure each query targets different aspects and the outline tells a coherent story.""")
185 | ])
186 |
187 | for attempt in range(self.max_retries):
188 | try:
189 | start_time = time.time()
190 | chain = prompt | self.llm | JsonOutputParser()
191 |
192 | # Estimate input tokens
193 | input_text = f"{state.research_topic} {config.max_search_queries} {config.max_report_sections}"
194 | input_tokens = estimate_tokens(input_text)
195 |
196 | result = await chain.ainvoke({
197 | "topic": state.research_topic,
198 | "max_queries": config.max_search_queries,
199 | "max_sections": config.max_report_sections
200 | })
201 |
202 | # Track LLM call
203 | duration = time.time() - start_time
204 | output_tokens = estimate_tokens(str(result))
205 | call_detail = {
206 | 'agent': 'ResearchPlanner',
207 | 'operation': 'plan',
208 | 'model': config.model_name,
209 | 'input_tokens': input_tokens,
210 | 'output_tokens': output_tokens,
211 | 'duration': round(duration, 2),
212 | 'attempt': attempt + 1
213 | }
214 |
215 | # Validate result structure
216 | if not all(key in result for key in ["topic", "objectives", "search_queries", "report_outline"]):
217 | raise ValueError("Invalid plan structure returned")
218 |
219 | if not result["search_queries"]:
220 | raise ValueError("No search queries generated")
221 |
222 | # Convert to ResearchPlan
223 | plan_data = result
224 |
225 | # Validate result structure
226 | if not all(key in plan_data for key in ["topic", "objectives", "search_queries", "report_outline"]):
227 | raise ValueError("Invalid plan structure returned")
228 |
229 | if not plan_data["search_queries"]:
230 | raise ValueError("No search queries generated")
231 |
232 | # Convert to ResearchPlan with HARD LIMITS enforced
233 | plan = ResearchPlan(
234 | topic=plan_data["topic"],
235 | objectives=plan_data["objectives"][:5], # Max 5 objectives
236 | search_queries=[
237 | SearchQuery(query=sq["query"], purpose=sq["purpose"])
238 | for sq in plan_data["search_queries"][:config.max_search_queries]
239 | ],
240 | report_outline=plan_data["report_outline"][:config.max_report_sections]
241 | )
242 |
243 | logger.info(f"Created plan with {len(plan.search_queries)} queries (enforced max: {config.max_search_queries})")
244 | logger.info(f"Report outline has {len(plan.report_outline)} sections (enforced max: {config.max_report_sections})")
245 |
246 | # Emit progress update
247 | await emit_planning_complete(len(plan.search_queries), len(plan.report_outline))
248 |
249 | # Return dict updates - LangGraph merges into state
250 | return {
251 | "plan": plan,
252 | "current_stage": "searching",
253 | "iterations": state.iterations + 1,
254 | "llm_calls": state.llm_calls + 1,
255 | "total_input_tokens": state.total_input_tokens + input_tokens,
256 | "total_output_tokens": state.total_output_tokens + output_tokens,
257 | "llm_call_details": state.llm_call_details + [call_detail]
258 | }
259 |
260 | except Exception as e:
261 | logger.warning(f"Planning attempt {attempt + 1} failed: {str(e)}")
262 | if attempt == self.max_retries - 1:
263 | logger.error(f"Planning failed after {self.max_retries} attempts")
264 | return {
265 | "error": f"Planning failed: {str(e)}",
266 | "iterations": state.iterations + 1
267 | }
268 | else:
269 | await asyncio.sleep(2 ** attempt)
270 |
271 | # Fallback if all retries exhausted
272 | return {
273 | "error": "Planning failed: Maximum retries exceeded",
274 | "iterations": state.iterations + 1
275 | }
276 |
277 |
278 | class ResearchSearcher:
279 | """Autonomous agent responsible for executing research searches."""
280 |
281 | def __init__(self):
282 | self.llm = get_llm(temperature=0.3)
283 | self.tools = get_research_tools(agent_type="search")
284 | self.credibility_scorer = CredibilityScorer()
285 | self.max_retries = 3
286 |
287 | async def search(self, state: ResearchState) -> dict:
288 | """Autonomously execute research searches using tools.
289 |
290 | The agent will decide which searches to perform, when to extract content,
291 | and how to gather comprehensive information.
292 |
293 | Returns dict with search results that LangGraph will merge into state.
294 | """
295 | if not state.plan:
296 | await emit_error("No research plan available")
297 | return {"error": "No research plan available"}
298 |
299 | logger.info(f"Autonomous agent researching: {len(state.plan.search_queries)} planned queries")
300 |
301 | # Emit progress for each planned query
302 | total_queries = len(state.plan.search_queries)
303 | for i, query in enumerate(state.plan.search_queries, 1):
304 | await emit_search_start(query.query, i, total_queries)
305 |
306 | # Create system prompt for autonomous agent with config-based limits
307 | max_searches = config.max_search_queries
308 | max_results_per_search = config.max_search_results_per_query
309 | expected_total_results = max_searches * max_results_per_search
310 |
311 | system_prompt = f"""You are an elite research investigator with expertise in finding accurate, authoritative information. Your mission is to gather comprehensive, verified data from the most credible sources available.
312 |
313 | ## Your Available Tools
314 | 1. **web_search(query, max_results)**: Search the web for information
315 | 2. **extract_webpage_content(url)**: Extract full article content from a URL
316 |
317 | ## Research Protocol
318 |
319 | ### Phase 1: Strategic Searching
320 | Execute the planned search queries systematically:
321 | - Limit to **{max_searches} searches maximum**
322 | - Each search returns up to **{max_results_per_search} results**
323 | - If initial queries yield poor results, adapt with refined queries
324 |
325 | ### Phase 2: Source Evaluation & Content Extraction
326 | For each search result, quickly assess source quality:
327 |
328 | **HIGH-PRIORITY Sources (extract immediately):**
329 | - Government sites (.gov, .gov.uk, .europa.eu)
330 | - Academic institutions (.edu, .ac.uk, university domains)
331 | - Peer-reviewed journals (nature.com, sciencedirect.com, ieee.org)
332 | - Official documentation (docs.*, official product sites)
333 | - Established news organizations (reuters.com, bbc.com, nytimes.com)
334 | - Industry-recognized publications
335 |
336 | **MEDIUM-PRIORITY Sources (extract if needed):**
337 | - Well-known tech publications (techcrunch.com, wired.com, arstechnica.com)
338 | - Reputable blogs with author credentials
339 | - Company blogs from established organizations
340 | - Wikipedia (good for overview, verify claims elsewhere)
341 |
342 | **LOW-PRIORITY Sources (use cautiously):**
343 | - Personal blogs without credentials
344 | - User-generated content sites
345 | - Sites with excessive ads or clickbait titles
346 | - Sources without clear authorship
347 | - Outdated content (check publication dates)
348 |
349 | ### Phase 3: Content Gathering
350 | - Extract full content from the **top {expected_total_results} most promising URLs**
351 | - Prioritize sources that directly address the research objectives
352 | - Look for primary sources (original research, official docs) over secondary summaries
353 | - Note publication dates - prefer recent content for evolving topics
354 |
355 | ## Quality Checkpoints
356 | Before concluding, verify you have:
357 | [x] Multiple sources confirming key facts (cross-referencing)
358 | [x] At least some high-credibility sources in your collection
359 | [x] Coverage across different aspects of the research objectives
360 | [x] Both overview content and specific technical details
361 |
362 | ## Completion Signal
363 | When you have gathered sufficient high-quality information (aim for {expected_total_results} quality sources with extracted content), respond with:
364 |
365 | RESEARCH_COMPLETE: [Summary of what you found, including:
366 | - Number of sources gathered
367 | - Key themes discovered
368 | - Any notable gaps or areas needing more research
369 | - Confidence level in the gathered information]"""
370 |
371 | # Create autonomous agent using LangChain's create_agent
372 | agent_graph = create_agent(
373 | self.llm,
374 | self.tools,
375 | system_prompt=system_prompt
376 | )
377 |
378 | for attempt in range(self.max_retries):
379 | try:
380 | start_time = time.time()
381 |
382 | # Prepare input
383 | objectives_text = "\n".join(f"- {obj}" for obj in state.plan.objectives)
384 | queries_text = "\n".join(
385 | f"- {q.query} (Purpose: {q.purpose})"
386 | for q in state.plan.search_queries
387 | )
388 |
389 | # Estimate input tokens
390 | input_message = f"""## Research Mission Brief
391 |
392 | ### Topic Under Investigation:
393 | {state.research_topic}
394 |
395 | ### Research Objectives (All must be addressed):
396 | {objectives_text}
397 |
398 | ### Planned Search Queries (Execute strategically):
399 | {queries_text}
400 |
401 | ---
402 |
403 | ### Your Mission:
404 | 1. Execute the search queries above using the web_search tool
405 | 2. Evaluate results for credibility and relevance
406 | 3. Extract full content from the most authoritative sources using extract_webpage_content
407 | 4. Ensure you gather information that addresses ALL research objectives
408 | 5. Prioritize recent, authoritative sources over older or less credible ones
409 |
410 | ### Quality Targets:
411 | - Gather from at least {config.max_search_queries * config.max_search_results_per_query} different sources
412 | - Extract full content from the top 5-8 most relevant pages
413 | - Ensure coverage across all research objectives
414 | - Include at least some academic, government, or official documentation sources if available
415 |
416 | Begin your systematic research now. Execute searches and extract content until you have comprehensive coverage."""
417 |
418 | input_tokens = estimate_tokens(input_message)
419 |
420 | # Execute autonomous research
421 | result = await agent_graph.ainvoke({
422 | "messages": [{"role": "user", "content": input_message}]
423 | })
424 |
425 | # Track LLM call (approximation - agent may make multiple calls)
426 | duration = time.time() - start_time
427 |
428 | # Extract messages from result
429 | messages = result.get('messages', [])
430 | output_text = ""
431 | if messages:
432 | output_text = str(messages[-1].content if hasattr(messages[-1], 'content') else str(messages[-1]))
433 |
434 | output_tokens = estimate_tokens(output_text)
435 |
436 | # Extract search results from messages
437 | # We need to track tool calls and results within the messages
438 | search_results = []
439 | from src.state import SearchResult
440 |
441 | for msg in messages:
442 | # Check for tool calls in message
443 | if hasattr(msg, 'tool_calls') and msg.tool_calls:
444 | for tool_call in msg.tool_calls:
445 | if tool_call.get('name') == 'web_search':
446 | # This is a search request, we'll get results in next message
447 | pass
448 |
449 | # Check for tool responses
450 | if hasattr(msg, 'name') and msg.name == 'web_search':
451 | # Parse tool response
452 | try:
453 | content = msg.content
454 | if isinstance(content, str):
455 | import json
456 | tool_results = json.loads(content)
457 | else:
458 | tool_results = content
459 |
460 | if isinstance(tool_results, list):
461 | for item in tool_results:
462 | if isinstance(item, dict):
463 | search_results.append(SearchResult(
464 | query=item.get('query', ''),
465 | title=item.get('title', ''),
466 | url=item.get('url', ''),
467 | snippet=item.get('snippet', ''),
468 | content=None
469 | ))
470 | except Exception as e:
471 | logger.warning(f"Error parsing tool result: {e}")
472 |
473 | # Check for content extraction results
474 | if hasattr(msg, 'name') and msg.name == 'extract_webpage_content':
475 | try:
476 | content = msg.content
477 | # Find the corresponding search result and update it
478 | # Note: This is a simplified approach, might need refinement
479 | if search_results and content:
480 | # Update the most recent search result without content
481 | for sr in reversed(search_results):
482 | if not sr.content:
483 | sr.content = content
484 | break
485 | except Exception as e:
486 | logger.warning(f"Error updating content: {e}")
487 |
488 | logger.info(f"Autonomous agent collected {len(search_results)} results")
489 |
490 | # Calculate total extracted content
491 | total_extracted_chars = sum(
492 | len(r.content) if r.content else 0
493 | for r in search_results
494 | )
495 | extracted_count = sum(1 for r in search_results if r.content)
496 |
497 | # Emit extraction completion
498 | await emit_extraction_complete(extracted_count, total_extracted_chars)
499 |
500 | if not search_results:
501 | await emit_error("Agent did not collect any search results")
502 | raise ValueError("Agent did not collect any search results")
503 |
504 | # Score all results first
505 | scored_results = self.credibility_scorer.score_search_results(search_results)
506 |
507 | # Filter by minimum credibility score
508 | filtered_scored = [
509 | item for item in scored_results
510 | if item['credibility']['score'] >= config.min_credibility_score
511 | ]
512 |
513 | # Extract filtered results and scores (already sorted by score, highest first)
514 | credibility_scores = [item['credibility'] for item in filtered_scored]
515 | sorted_results = [item['result'] for item in filtered_scored]
516 |
517 | logger.info(f"Filtered {len(search_results)} -> {len(sorted_results)} results (min_credibility={config.min_credibility_score})")
518 |
519 | # Mark queries as completed
520 | for q in state.plan.search_queries:
521 | q.completed = True
522 |
523 | call_detail = {
524 | 'agent': 'ResearchSearcher',
525 | 'operation': 'autonomous_search',
526 | 'model': config.model_name,
527 | 'input_tokens': input_tokens,
528 | 'output_tokens': output_tokens,
529 | 'duration': round(duration, 2),
530 | 'results_count': len(sorted_results),
531 | 'original_results_count': len(search_results),
532 | 'min_credibility_score': config.min_credibility_score,
533 | 'attempt': attempt + 1
534 | }
535 |
536 | # Return dict updates - LangGraph merges into state
537 | return {
538 | "search_results": sorted_results,
539 | "credibility_scores": credibility_scores,
540 | "current_stage": "synthesizing",
541 | "iterations": state.iterations + 1,
542 | "llm_calls": state.llm_calls + 1,
543 | "total_input_tokens": state.total_input_tokens + input_tokens,
544 | "total_output_tokens": state.total_output_tokens + output_tokens,
545 | "llm_call_details": state.llm_call_details + [call_detail]
546 | }
547 |
548 | except Exception as e:
549 | logger.warning(f"Search attempt {attempt + 1} failed: {str(e)}")
550 | if attempt == self.max_retries - 1:
551 | logger.error(f"Search failed after {self.max_retries} attempts")
552 | return {
553 | "error": f"Search failed: {str(e)}",
554 | "iterations": state.iterations + 1
555 | }
556 | else:
557 | await asyncio.sleep(2 ** attempt)
558 |
559 | # Fallback if all retries exhausted
560 | return {
561 | "error": "Search failed: Maximum retries exceeded",
562 | "iterations": state.iterations + 1
563 | }
564 |
565 |
566 | class ResearchSynthesizer:
567 | """Autonomous agent responsible for synthesizing research findings."""
568 |
569 | def __init__(self):
570 | self.llm = get_llm(temperature=0.3, model_override=config.summarization_model)
571 | self.tools = get_research_tools(agent_type="synthesis")
572 | self.max_retries = 3
573 |
574 | async def synthesize(self, state: ResearchState) -> dict:
575 | """Autonomously synthesize key findings using tools and reasoning.
576 |
577 | Returns dict with key findings that LangGraph will merge into state.
578 | """
579 | logger.info(f"Synthesizing findings from {len(state.search_results)} results")
580 |
581 | if not state.search_results:
582 | await emit_error("No search results to synthesize")
583 | return {"error": "No search results to synthesize"}
584 |
585 | # Emit synthesis start
586 | await emit_synthesis_start(len(state.search_results))
587 |
588 | # Create system prompt for autonomous synthesis agent
589 | system_prompt = """You are a senior research analyst specializing in synthesizing complex information into accurate, actionable insights. Your task is to analyze search results and extract verified, well-supported findings.
590 |
591 | ## Your Available Tools
592 | - **extract_insights_from_text(text, focus)**: Extract specific insights from text content
593 |
594 | ## Source Credibility Framework
595 |
596 | Each source has a credibility rating. Apply this hierarchy strictly:
597 |
598 | ### HIGH Credibility (Score >=70) - Primary Sources
599 | - Government and institutional sources
600 | - Peer-reviewed research and academic papers
601 | - Official documentation and specifications
602 | - Established news organizations with editorial standards
603 | => **TRUST**: Use as primary basis for findings
604 |
605 | ### MEDIUM Credibility (Score 40-69) - Supporting Sources
606 | - Industry publications and tech blogs
607 | - Expert commentary and analysis
608 | - Well-maintained wikis and documentation
609 | => **VERIFY**: Cross-reference with HIGH sources; use to add context
610 |
611 | ### LOW Credibility (Score <40) - Supplementary Only
612 | - Personal blogs, forums, user comments
613 | - Sources without clear authorship
614 | - Outdated or unverified content
615 | => **CAUTION**: Only use if corroborated by higher-credibility sources
616 |
617 | ## Synthesis Methodology
618 |
619 | ### Step 1: Identify Core Facts
620 | - What claims appear in multiple HIGH-credibility sources?
621 | - What are the foundational facts that most sources agree on?
622 | - Extract specific data points: numbers, dates, names, technical specifications
623 |
624 | ### Step 2: Detect and Resolve Conflicts
625 | When sources contradict each other:
626 | 1. Check credibility scores - trust higher-rated sources
627 | 2. Check recency - newer information may supersede older
628 | 3. Check specificity - primary sources trump secondary summaries
629 | 4. If unresolvable, note the disagreement in findings
630 |
631 | ### Step 3: Synthesize Key Findings
632 | For each finding, ensure:
633 | - **Accuracy**: Only include information that appears in the sources
634 | - **Attribution**: Note which source numbers support the finding [1], [2], etc.
635 | - **Specificity**: Include concrete details, not vague generalities
636 | - **Balance**: Present multiple perspectives if sources differ
637 |
638 | ### Step 4: Quality Control
639 | Before finalizing, verify:
640 | [x] No claims are made without source support
641 | [x] HIGH-credibility sources are prioritized
642 | [x] Contradictions are acknowledged, not ignored
643 | [x] Findings directly address research objectives
644 | [x] Technical accuracy is maintained (don't oversimplify incorrectly)
645 |
646 | ## Output Format
647 |
648 | Return findings as a JSON array of strings. Each finding should:
649 | - Be a complete, standalone insight
650 | - Include source references where applicable
651 | - Be specific enough to be useful (avoid generic statements)
652 | - Focus on facts over opinions (unless opinion is from recognized experts)
653 |
654 | Example format:
655 | [
656 | "Finding 1: [Specific fact or insight] - supported by sources [1], [3]",
657 | "Finding 2: [Technical detail with specifics] - per official documentation [2]",
658 | "Finding 3: [Trend or development] - noted across multiple industry sources [4], [5], [6]"
659 | ]
660 |
661 | ## Anti-Hallucination Rules
662 | DO NOT invent statistics, dates, or specifics not in sources
663 | DO NOT make claims beyond what sources support
664 | DO NOT present speculation as fact
665 | DO NOT ignore source credibility ratings
666 | DO say "sources indicate" or "according to [source]" for less certain claims
667 | DO note when information is limited or conflicting"""
668 |
669 | # Create autonomous synthesis agent
670 | agent_graph = create_agent(
671 | self.llm,
672 | self.tools,
673 | system_prompt=system_prompt
674 | )
675 |
676 | # Progressive truncation strategy
677 | max_results = 20
678 |
679 | for attempt in range(self.max_retries):
680 | try:
681 | start_time = time.time()
682 |
683 | # Adjust result count based on attempt
684 | current_max = max(5, max_results - (attempt * 5))
685 |
686 | # Prepare search results text with credibility information
687 | results_to_use = state.search_results[:current_max]
688 | credibility_scores_to_use = state.credibility_scores[:current_max] if state.credibility_scores else []
689 |
690 | results_text = "\n\n".join([
691 | f"[{i+1}] {r.title}\n"
692 | f"URL: {r.url}\n"
693 | f"Credibility: {cred.get('level', 'unknown').upper()} (Score: {cred.get('score', 'N/A')}/100) - {', '.join(cred.get('factors', []))}\n"
694 | f"Snippet: {r.snippet}\n" +
695 | (f"Content: {r.content[:300]}..." if r.content else "")
696 | for i, (r, cred) in enumerate(zip(results_to_use, credibility_scores_to_use))
697 | ])
698 |
699 | # If credibility scores don't match (shouldn't happen, but handle gracefully)
700 | if len(results_to_use) != len(credibility_scores_to_use):
701 | # Fallback: format without credibility if mismatch
702 | results_text = "\n\n".join([
703 | f"[{i+1}] {r.title}\nURL: {r.url}\nSnippet: {r.snippet}\n" +
704 | (f"Content: {r.content[:300]}..." if r.content else "")
705 | for i, r in enumerate(results_to_use)
706 | ])
707 |
708 | # Prepare input message for the autonomous agent
709 | input_message = f"""## Research Synthesis Task
710 |
711 | ### Topic: {state.research_topic}
712 |
713 | ### Your Mission:
714 | Analyze the search results below and extract the most important, accurate, and well-supported findings.
715 |
716 | ---
717 |
718 | ### Search Results with Credibility Scores:
719 | {results_text}
720 |
721 | ---
722 |
723 | ### Synthesis Instructions:
724 |
725 | 1. **Extract Key Facts**: Identify the core factual claims across sources
726 | 2. **Cross-Reference**: Note which findings are supported by multiple sources
727 | 3. **Resolve Conflicts**: When sources disagree, trust higher-credibility sources
728 | 4. **Maintain Specificity**: Include specific details, numbers, and technical information
729 | 5. **Note Limitations**: Flag areas where information is sparse or contradictory
730 |
731 | ### Output Requirements:
732 | Return a JSON array of 10-15 key findings. Each finding should:
733 | - Be a complete, specific statement (not vague generalizations)
734 | - Reference source numbers when citing facts: "...according to [1]" or "...per [3], [5]"
735 | - Focus on facts that directly address the research topic
736 | - Prioritize findings from HIGH-credibility sources
737 |
738 | Example format:
739 | [
740 | "The technology uses [specific mechanism] to achieve [specific outcome], enabling [specific capability] [1]",
741 | "According to official documentation [2], the key components include: [list specific items]",
742 | "Industry adoption has grown to [specific metric], with major deployments at [specific examples] [3], [5]",
743 | "Experts note challenges including [specific challenge 1] and [specific challenge 2] [4]"
744 | ]
745 |
746 | Analyze the sources now and extract your findings:"""
747 |
748 | # Estimate input tokens
749 | input_tokens = estimate_tokens(input_message)
750 |
751 | # Execute autonomous synthesis
752 | result = await agent_graph.ainvoke({
753 | "messages": [{"role": "user", "content": input_message}]
754 | })
755 |
756 | # Track LLM call
757 | duration = time.time() - start_time
758 |
759 | # Extract final response
760 | messages = result.get('messages', [])
761 | output_text = ""
762 | if messages:
763 | last_msg = messages[-1]
764 | output_text = str(last_msg.content if hasattr(last_msg, 'content') else str(last_msg))
765 |
766 | output_tokens = estimate_tokens(output_text)
767 |
768 | call_detail = {
769 | 'agent': 'ResearchSynthesizer',
770 | 'operation': 'autonomous_synthesis',
771 | 'model': config.summarization_model,
772 | 'input_tokens': input_tokens,
773 | 'output_tokens': output_tokens,
774 | 'duration': round(duration, 2),
775 | 'attempt': attempt + 1
776 | }
777 |
778 | # Parse the JSON response
779 | import json
780 | import re
781 |
782 | # Try to extract JSON array from the response
783 | json_match = re.search(r'\[(.*?)\]', output_text, re.DOTALL)
784 |
785 | key_findings = []
786 | if json_match:
787 | try:
788 | findings = json.loads(json_match.group(0))
789 | if isinstance(findings, list):
790 | key_findings = [
791 | str(f) # Convert all items to strings (handles int, dict, etc.)
792 | for f in findings
793 | ]
794 | else:
795 | key_findings = [str(findings)]
796 | except json.JSONDecodeError:
797 | pass
798 |
799 | # If JSON parsing failed or empty, use fallback extraction
800 | if not key_findings:
801 | # Look for bullet points or numbered items
802 | lines = output_text.split('\n')
803 | for line in lines:
804 | line = line.strip().lstrip('-').lstrip('*').lstrip('>').strip()
805 | # Remove numbering like "1.", "2.", etc.
806 | line = re.sub(r'^\d+\.\s*', '', line)
807 | if len(line) > 30 and not line.startswith('[') and not line.startswith(']'):
808 | key_findings.append(line)
809 |
810 | # Limit to reasonable number
811 | key_findings = key_findings[:15]
812 |
813 | # If still empty, create basic findings from search results
814 | if not key_findings and state.search_results:
815 | logger.warning("Agent produced no findings, creating basic ones from results")
816 | key_findings = [
817 | f"{r.title}: {r.snippet[:100]}..."
818 | for r in state.search_results[:10]
819 | if r.snippet
820 | ]
821 |
822 | logger.info(f"Extracted {len(key_findings)} key findings")
823 |
824 | # Emit synthesis completion
825 | await emit_synthesis_complete(len(key_findings))
826 |
827 | # Return dict updates - LangGraph merges into state
828 | return {
829 | "key_findings": key_findings,
830 | "current_stage": "reporting",
831 | "iterations": state.iterations + 1,
832 | "llm_calls": state.llm_calls + 1,
833 | "total_input_tokens": state.total_input_tokens + input_tokens,
834 | "total_output_tokens": state.total_output_tokens + output_tokens,
835 | "llm_call_details": state.llm_call_details + [call_detail]
836 | }
837 |
838 | except Exception as e:
839 | logger.warning(f"Synthesis attempt {attempt + 1} failed: {str(e)}")
840 | if attempt == self.max_retries - 1:
841 | logger.error(f"Synthesis failed after {self.max_retries} attempts")
842 | return {
843 | "error": f"Synthesis failed: {str(e)}",
844 | "iterations": state.iterations + 1
845 | }
846 | else:
847 | await asyncio.sleep(2 ** attempt)
848 |
849 | # Fallback if all retries exhausted
850 | return {
851 | "error": "Synthesis failed: Maximum retries exceeded",
852 | "iterations": state.iterations + 1
853 | }
854 |
855 |
856 | class ReportWriter:
857 | """Autonomous agent responsible for writing research reports."""
858 |
859 | def __init__(self, citation_style: str = 'apa'):
860 | self.llm = get_llm(temperature=0.7)
861 | self.tools = get_research_tools(agent_type="writing")
862 | self.max_retries = 3
863 | self.citation_style = citation_style
864 | self.citation_formatter = CitationFormatter()
865 |
866 | async def write_report(self, state: ResearchState) -> dict:
867 | """Write the final research report with validation and retry.
868 |
869 | Returns dict with report data that LangGraph will merge into state.
870 | """
871 | logger.info("Writing final report")
872 |
873 | if not state.plan or not state.key_findings:
874 | await emit_error("Insufficient data for report generation")
875 | return {"error": "Insufficient data for report generation"}
876 |
877 | # Emit writing start
878 | await emit_writing_start(len(state.plan.report_outline))
879 |
880 | # Track total LLM calls for report generation
881 | report_llm_calls = 0
882 | report_input_tokens = 0
883 | report_output_tokens = 0
884 | report_call_details = []
885 |
886 | for attempt in range(self.max_retries):
887 | try:
888 | # Generate each section with retry
889 | report_sections = []
890 | total_sections = len(state.plan.report_outline)
891 |
892 | for section_idx, section_title in enumerate(state.plan.report_outline, 1):
893 | # Emit progress for each section
894 | await emit_writing_section(section_title, section_idx, total_sections)
895 |
896 | section, section_tokens = await self._write_section(
897 | state.research_topic,
898 | section_title,
899 | state.key_findings,
900 | state.search_results
901 | )
902 | if section:
903 | report_sections.append(section)
904 | if section_tokens:
905 | report_llm_calls += 1
906 | report_input_tokens += section_tokens['input_tokens']
907 | report_output_tokens += section_tokens['output_tokens']
908 | report_call_details.append(section_tokens)
909 |
910 | # Validate minimum quality
911 | if not report_sections:
912 | raise ValueError("No report sections generated")
913 |
914 | # Create temporary state for compilation
915 | temp_state = ResearchState(
916 | research_topic=state.research_topic,
917 | plan=state.plan,
918 | report_sections=report_sections
919 | )
920 |
921 | # Compile final report
922 | final_report = self._compile_report(temp_state)
923 |
924 | # Format citations in specified style
925 | if state.search_results:
926 | final_report = self.citation_formatter.update_report_citations(
927 | final_report,
928 | style=self.citation_style,
929 | search_results=state.search_results
930 | )
931 |
932 | # Add credibility information to report if available
933 | if state.credibility_scores:
934 | high_cred_sources = [
935 | i+1 for i, score in enumerate(state.credibility_scores)
936 | if score.get('level') == 'high'
937 | ]
938 | if high_cred_sources:
939 | final_report += f"\n\n---\n\n**Note:** {len(high_cred_sources)} high-credibility sources were prioritized in this research."
940 |
941 | # Validate report length
942 | if len(final_report) < 500:
943 | raise ValueError("Report too short - insufficient content")
944 |
945 | logger.info(f"Report generation complete: {len(final_report)} chars")
946 |
947 | # Emit writing completion
948 | await emit_writing_complete(len(final_report))
949 |
950 | # Return dict updates - LangGraph merges into state
951 | return {
952 | "report_sections": report_sections,
953 | "final_report": final_report,
954 | "current_stage": "complete",
955 | "iterations": state.iterations + 1,
956 | "llm_calls": state.llm_calls + report_llm_calls,
957 | "total_input_tokens": state.total_input_tokens + report_input_tokens,
958 | "total_output_tokens": state.total_output_tokens + report_output_tokens,
959 | "llm_call_details": state.llm_call_details + report_call_details
960 | }
961 |
962 | except Exception as e:
963 | logger.warning(f"Report attempt {attempt + 1} failed: {str(e)}")
964 | if attempt == self.max_retries - 1:
965 | logger.error(f"Report generation failed after {self.max_retries} attempts")
966 | return {
967 | "error": f"Report writing failed: {str(e)}",
968 | "iterations": state.iterations + 1
969 | }
970 | else:
971 | await asyncio.sleep(2 ** attempt)
972 |
973 | # Fallback if all retries exhausted
974 | return {
975 | "error": "Report generation failed: Maximum retries exceeded",
976 | "iterations": state.iterations + 1
977 | }
978 |
979 | async def _write_section(
980 | self,
981 | topic: str,
982 | section_title: str,
983 | findings: List[str],
984 | search_results: List
985 | ) -> tuple:
986 | """Autonomously write a single report section using tools."""
987 | logger.info(f"Writing section: {section_title}")
988 |
989 | # Create system prompt for section writing
990 | system_prompt = f"""You are a distinguished research writer and subject matter expert. Your task is to write authoritative, accurate, and well-structured report sections that inform and educate readers.
991 |
992 | ## Your Available Tools
993 | - **format_citation(url, title, style)**: Format citations in academic styles
994 | - **validate_section_quality(section_text, min_words)**: Verify section meets quality standards
995 |
996 | ## Writing Standards
997 |
998 | ### Content Quality Requirements
999 | 1. **Minimum Length**: {config.min_section_words} words (use validate_section_quality to verify)
1000 | 2. **Factual Accuracy**: Every claim must be grounded in the provided findings
1001 | 3. **Proper Citations**: Use inline citations [1], [2], etc. for all factual claims
1002 | 4. **Balanced Perspective**: Present multiple viewpoints when they exist
1003 | 5. **Technical Precision**: Use correct terminology; don't oversimplify incorrectly
1004 |
1005 | ### Structure & Formatting (Markdown)
1006 | - Use **bold** for key terms and important concepts
1007 | - Use bullet points or numbered lists for multiple items
1008 | - Use subheadings (### or ####) to organize complex sections
1009 | - Include specific examples, data points, or case studies when available
1010 | - Maintain logical flow from one paragraph to the next
1011 |
1012 | ### Writing Style Guidelines
1013 | - **Tone**: Professional, authoritative, but accessible
1014 | - **Voice**: Third-person academic style (avoid "I", "we", "you")
1015 | - **Clarity**: Explain complex concepts clearly; define technical terms
1016 | - **Conciseness**: Every sentence should add value; avoid filler
1017 | - **Precision**: Use specific language; avoid vague qualifiers like "very" or "many"
1018 |
1019 | ## Critical Accuracy Rules
1020 |
1021 | ### DO
1022 | - Base all claims on the provided key findings
1023 | - Cite sources for factual statements: "According to [1]..." or "Research indicates [2]..."
1024 | - Distinguish between established facts and emerging trends
1025 | - Note limitations or caveats when relevant
1026 | - Use specific numbers, dates, and names from sources
1027 | - Acknowledge when evidence is limited: "Available data suggests..."
1028 |
1029 | ### DO NOT
1030 | - Invent statistics, percentages, or specific numbers not in findings
1031 | - Make claims that go beyond the provided information
1032 | - Present opinions as facts without attribution
1033 | - Ignore contradictions between sources
1034 | - Use placeholder text or generic filler content
1035 | - Oversimplify to the point of inaccuracy
1036 |
1037 | ## Section Writing Process
1038 |
1039 | 1. **Analyze**: Review the findings relevant to this section's topic
1040 | 2. **Outline**: Mentally structure the key points to cover
1041 | 3. **Draft**: Write comprehensive content with proper citations
1042 | 4. **Verify**: Use validate_section_quality to check length and citations
1043 | 5. **Refine**: Ensure logical flow and accuracy
1044 |
1045 | ## Output Format
1046 | Write the section content directly in markdown format. Start with the content immediately (the section title will be added automatically). Ensure proper spacing between paragraphs.
1047 |
1048 | Example structure:
1049 | ```
1050 | [Opening paragraph introducing the section topic]
1051 |
1052 | [Main content paragraph with specific details and citations [1]]
1053 |
1054 | ### [Subheading if needed]
1055 |
1056 | [Additional content with more citations [2], [3]]
1057 |
1058 | [Concluding paragraph summarizing key points]
1059 | ```"""
1060 |
1061 | # Create autonomous writing agent
1062 | agent_graph = create_agent(
1063 | self.llm,
1064 | self.tools,
1065 | system_prompt=system_prompt
1066 | )
1067 |
1068 | try:
1069 | start_time = time.time()
1070 |
1071 | # Prepare input message with source context
1072 | sources_context = ""
1073 | if search_results:
1074 | sources_context = "\n\nAvailable Sources for Citation:\n" + "\n".join(
1075 | f"[{i+1}] {r.title} ({r.url})"
1076 | for i, r in enumerate(search_results[:15]) # Top 15 sources
1077 | )
1078 |
1079 | input_message = f"""## Assignment: Write Report Section
1080 |
1081 | **Research Topic**: {topic}
1082 | **Section Title**: {section_title}
1083 | **Minimum Word Count**: {config.min_section_words} words
1084 |
1085 | ---
1086 |
1087 | ### Key Findings to Incorporate:
1088 | {chr(10).join(f"- {f}" for f in findings)}
1089 |
1090 | {sources_context}
1091 |
1092 | ---
1093 |
1094 | ### Instructions:
1095 | 1. Write a comprehensive section that covers the topic "{section_title}" thoroughly
1096 | 2. Incorporate the key findings above, adding context and explanation
1097 | 3. Use inline citations [1], [2], etc. when referencing specific facts from sources
1098 | 4. Maintain academic rigor while being accessible to general readers
1099 | 5. Use markdown formatting for structure (bold, lists, subheadings as needed)
1100 | 6. After writing, use validate_section_quality to ensure minimum word count is met
1101 |
1102 | Write the section content now:"""
1103 |
1104 | # Estimate input tokens
1105 | input_tokens = estimate_tokens(input_message)
1106 |
1107 | # Execute autonomous section writing
1108 | result = await agent_graph.ainvoke({
1109 | "messages": [{"role": "user", "content": input_message}]
1110 | })
1111 |
1112 | # Extract content from result
1113 | messages = result.get('messages', [])
1114 | content = ""
1115 | if messages:
1116 | last_msg = messages[-1]
1117 | # Handle different content formats
1118 | if hasattr(last_msg, 'content'):
1119 | msg_content = last_msg.content
1120 | # If content is a list (like from tool responses), extract text
1121 | if isinstance(msg_content, list):
1122 | content = ""
1123 | for item in msg_content:
1124 | if isinstance(item, dict) and 'text' in item:
1125 | content += item['text']
1126 | elif isinstance(item, dict) and 'type' in item and item['type'] == 'text':
1127 | content += item.get('text', '')
1128 | else:
1129 | content += str(item)
1130 | else:
1131 | content = str(msg_content)
1132 | else:
1133 | content = str(last_msg)
1134 |
1135 | # Track LLM call
1136 | duration = time.time() - start_time
1137 | output_tokens = estimate_tokens(content)
1138 | call_detail = {
1139 | 'agent': 'ReportWriter',
1140 | 'operation': f'write_section_{section_title[:30]}',
1141 | 'model': config.model_name,
1142 | 'input_tokens': input_tokens,
1143 | 'output_tokens': output_tokens,
1144 | 'duration': round(duration, 2)
1145 | }
1146 |
1147 | # Extract cited sources
1148 | import re
1149 | citations = re.findall(r'\[(\d+)\]', content)
1150 | source_urls = []
1151 | for cite_num in set(citations):
1152 | idx = int(cite_num) - 1
1153 | if 0 <= idx < len(search_results):
1154 | source_urls.append(search_results[idx].url)
1155 |
1156 | section = ReportSection(
1157 | title=section_title,
1158 | content=content,
1159 | sources=source_urls
1160 | )
1161 |
1162 | return section, call_detail
1163 |
1164 | except Exception as e:
1165 | logger.error(f"Error writing section '{section_title}': {str(e)}")
1166 | return None, None
1167 |
1168 | def _compile_report(self, state: ResearchState) -> str:
1169 | """Compile all sections into final report."""
1170 | # Count actual sources from search results
1171 | search_results = getattr(state, 'search_results', []) or []
1172 | report_sections = getattr(state, 'report_sections', []) or []
1173 |
1174 | # Get unique URLs from search results
1175 | unique_sources = set()
1176 | for result in search_results:
1177 | if hasattr(result, 'url') and result.url:
1178 | unique_sources.add(result.url)
1179 |
1180 | # Also collect from report sections if they have sources
1181 | for section in report_sections:
1182 | if hasattr(section, 'sources'):
1183 | unique_sources.update(section.sources)
1184 |
1185 | source_count = len(unique_sources) if unique_sources else len(search_results)
1186 |
1187 | report_parts = [
1188 | f"# {state.research_topic}\n",
1189 | f"**Deep Research Report**\n",
1190 | f"\n## Executive Summary\n",
1191 | f"This report provides a comprehensive analysis of {state.research_topic}. ",
1192 | f"The research was conducted across **{source_count} sources** ",
1193 | f"and synthesized into **{len(report_sections)} key sections**.\n",
1194 | f"\n## Research Objectives\n"
1195 | ]
1196 |
1197 | if state.plan and hasattr(state.plan, 'objectives'):
1198 | for i, obj in enumerate(state.plan.objectives, 1):
1199 | report_parts.append(f"{i}. {obj}\n")
1200 |
1201 | report_parts.append("\n---\n")
1202 |
1203 | # Add all sections
1204 | has_references_section = False
1205 | for section in report_sections:
1206 | # Check if content already starts with the title as a heading
1207 | content = section.content.strip()
1208 |
1209 | # Check if this section contains References
1210 | if "## References" in content or section.title.lower() == "references":
1211 | has_references_section = True
1212 |
1213 | if content.startswith(f"## {section.title}"):
1214 | # Content already has heading, use as-is
1215 | report_parts.append(f"\n{content}\n\n")
1216 | else:
1217 | # Add heading before content
1218 | report_parts.append(f"\n## {section.title}\n\n")
1219 | report_parts.append(content)
1220 | report_parts.append("\n")
1221 |
1222 | # Only add references if not already present in sections
1223 | if not has_references_section:
1224 | # Add references from search results
1225 | report_parts.append("\n---\n\n## References\n\n")
1226 |
1227 | # Build a list of (url, title) tuples from search results
1228 | source_info = []
1229 | seen_urls = set()
1230 |
1231 | for result in search_results:
1232 | if hasattr(result, 'url') and result.url and result.url not in seen_urls:
1233 | seen_urls.add(result.url)
1234 | title = getattr(result, 'title', '')
1235 | source_info.append((result.url, title))
1236 |
1237 | # Add sources from sections if available (if not already included)
1238 | for section in report_sections:
1239 | if hasattr(section, 'sources'):
1240 | for url in section.sources:
1241 | if url not in seen_urls:
1242 | seen_urls.add(url)
1243 | source_info.append((url, ''))
1244 |
1245 | # Add formatted references (only once, outside the loop)
1246 | if not has_references_section:
1247 | if source_info:
1248 | from src.utils.citations import CitationFormatter
1249 | formatter = CitationFormatter()
1250 | for i, (url, title) in enumerate(source_info[:30], 1): # Top 30 sources
1251 | # Format citation in APA style
1252 | citation = formatter.format_apa(url, title)
1253 | report_parts.append(f"{i}. {citation}\n")
1254 | else:
1255 | report_parts.append("*No sources were available for this research.*\n")
1256 |
1257 | return "".join(report_parts)
1258 |
1259 |
--------------------------------------------------------------------------------