├── src
    └── deep_research
    │   ├── core
    │       ├── exception.py
    │       ├── __init__.py
    │       ├── config.py
    │       ├── research.py
    │       └── report.py
    │   ├── cli
    │       ├── __init__.py
    │       └── main.py
    │   ├── utils
    │       ├── __init__.py
    │       ├── log_util.py
    │       └── research_helper.py
    │   └── services
    │       ├── __init__.py
    │       ├── search_template.py
    │       ├── search_service.py
    │       ├── persistence_service.py
    │       └── ai_service.py
├── output
    └── readme.md
├── demo
    └── China's Credit Card Consumer Market: Comprehensive Analysis Report (2015-2030).pdf
├── setup.py
├── requirements.txt
├── .gitignore
├── LICENSE
├── .env.example
└── README.md


/src/deep_research/core/exception.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/output/readme.md:
--------------------------------------------------------------------------------
1 | # All data will be generated to this output folder.


--------------------------------------------------------------------------------
/src/deep_research/cli/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research CLI - A tool for conducting deep research"""
2 | 
3 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/src/deep_research/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research CLI - A tool for conducting deep research"""
2 | 
3 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/src/deep_research/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research CLI - A tool for conducting deep research"""
2 | 
3 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/src/deep_research/services/__init__.py:
--------------------------------------------------------------------------------
1 | """Deep Research CLI - A tool for conducting deep research"""
2 | 
3 | __version__ = "0.1.0"


--------------------------------------------------------------------------------
/demo/China's Credit Card Consumer Market: Comprehensive Analysis Report (2015-2030).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iamshaynez/deep-research-cli/HEAD/demo/China's Credit Card Consumer Market: Comprehensive Analysis Report (2015-2030).pdf


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="deep-research-cli",
 5 |     version="0.1.0",
 6 |     packages=find_packages(where="src"),
 7 |     package_dir={"":"src"},
 8 |     install_requires=[
 9 |         # Add your dependencies here
10 |     ],
11 | )


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | click>=8.0.0
 3 | rich>=10.0.0
 4 | python-dotenv>=1.0.0
 5 | 
 6 | # AI and LLM Integration
 7 | openai>=1.0.0
 8 | tavily-python>=0.2.0
 9 | 
10 | # Utilities
11 | requests>=2.31.0
12 | pydantic>=2.0.0
13 | python-dateutil>=2.8.2
14 | tqdm>=4.66.0
15 | json_repair>=0.35.0
16 | 
17 | # Logging and Error Handling
18 | loguru>=0.7.0
19 | 
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | output/
 2 | 
 3 | src/deep_research/cli/exec.py
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # Distribution / packaging
10 | dist/
11 | build/
12 | *.egg-info/
13 | 
14 | # Virtual environments
15 | venv/
16 | env/
17 | .env/
18 | .venv/
19 | 
20 | # IDE specific files
21 | .idea/
22 | .vscode/
23 | *.swp
24 | *.swo
25 | 
26 | # Testing
27 | .coverage
28 | htmlcov/
29 | .pytest_cache/
30 | .tox/
31 | 
32 | # Jupyter Notebook
33 | .ipynb_checkpoints
34 | 
35 | # Environment variables
36 | .env
37 | .env.local
38 | 
39 | # macOS specific
40 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Xiaowen.Z
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Madantory Variables
 2 | # 1. LLM Model settings, openrouter is recommanded since it integrates multiple model providers.
 3 | OPENAI_KEY=
 4 | OPENAI_BASE=https://openrouter.ai/api/v1
 5 | 
 6 | # 2. Tavily API Token
 7 | TAVILY_API_KEY=
 8 | 
 9 | # Optional Variables
10 | 
11 | # a. LLM Model Choice, refer config.py for default settings. 
12 | # SMART_MODEL = "deepseek/deepseek-r1"
13 | # NORMAL_MODEL = "deepseek/deepseek-r1-distill-llama-70b"
14 | # LONG_MODEL = "google/gemini-2.0-flash-001"
15 | # REPORT_MODEL = "google/gemini-2.0-pro-exp-02-05:free"
16 | 
17 | # b. Log Level in Console, default INFO
18 | # DEEP_RESEARCH_LOG_LEVEL=INFO
19 | 
20 | # c. Language use to create reports. Default = Chinese. Used in Prompt so just english words.
21 | # REPORT_LANG=Chinese
22 | 
23 | # d. RESEARCH_PLAN_PROMPT, additional prompt which will hijack into Research Plan Creation process
24 | # RESEARCH_PLAN_PROMPT=
25 | 
26 | # e. REPORT_PLAN_PROMPT, additional prompt which will hijack into Report Creation process influence the report content
27 | # REPORT_PROMPT=
28 | 
29 | # f. NUMBER_OF_QUERIES_IN_CATEGORY, number of queries in each category, default 3
30 | # NUMBER_OF_QUERIES_IN_CATEGORY=


--------------------------------------------------------------------------------
/src/deep_research/core/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration module for deep-research-cli"""
 2 | 
 3 | import os
 4 | from typing import Optional
 5 | from dotenv import load_dotenv
 6 | 
 7 | # Load environment variables
 8 | load_dotenv()
 9 | 
10 | class Config:
11 |     """Configuration class for model settings"""
12 |     SMART_MODEL = os.getenv('SMART_MODEL', "deepseek/deepseek-r1")
13 |     NORMAL_MODEL = os.getenv('NORMAL_MODEL', "deepseek/deepseek-r1-distill-llama-70b")
14 |     LONG_MODEL = os.getenv('LONG_MODEL', "google/gemini-2.0-flash-001")
15 |     REPORT_MODEL = os.getenv('REPORT_MODEL', "google/gemini-2.0-pro-exp-02-05:free")
16 | 
17 |     REPORT_LANG = os.getenv('REPORT_LANG', "Chinese")
18 | 
19 | class LLMConfig:
20 |     """Configuration class for LLM models"""
21 |     def __init__(self):
22 |         self.api_key = os.getenv('OPENAI_KEY')
23 |         self.api_base = os.getenv('OPENAI_BASE')
24 |         self.smart_model = os.getenv('SMART_MODEL', Config.SMART_MODEL)
25 |         self.normal_model = os.getenv('NORMAL_MODEL', Config.NORMAL_MODEL)
26 |         self.long_model = os.getenv('LONG_MODEL', Config.LONG_MODEL)
27 | 
28 |         if not self.api_key:
29 |             raise ValueError('OPENAI_KEY environment variable is not set')
30 | 
31 | class SearchConfig:
32 |     """Configuration class for Tavily Search API"""
33 |     def __init__(self):
34 |         self.api_key = os.getenv('TAVILY_API_KEY')
35 |         if not self.api_key:
36 |             raise ValueError('TAVILY_API_KEY environment variable is not set')


--------------------------------------------------------------------------------
/src/deep_research/services/search_template.py:
--------------------------------------------------------------------------------
 1 | """Module for managing search parameter templates"""
 2 | 
 3 | from typing import Dict, Any
 4 | 
 5 | class SearchTemplate:
 6 |     """Class for managing search parameter templates with predefined configurations"""
 7 | 
 8 |     # Predefined search templates
 9 |     TEMPLATES = {
10 |         "basic": {
11 |             "search_depth": "basic",
12 |             "include_raw_content": True,
13 |             "max_results": 5
14 |         },
15 |         "advanced": {
16 |             "search_depth": "advanced",
17 |             "max_results": 10,
18 |             "include_domains": [],
19 |             "exclude_domains": [],
20 |             "include_raw_content": True,
21 |         },
22 |         "news": {
23 |             "search_depth": "advanced",
24 |             "max_results": 8,
25 |             "include_raw_content": True,
26 |             "topic": "news"
27 |         },
28 |         "academic": {
29 |             "search_depth": "advanced",
30 |             "max_results": 15,
31 |             "include_raw_content": True,
32 |             "include_domains": [".edu", ".org", "scholar.google.com"],
33 |             "topic": "general"
34 |         }
35 |     }
36 | 
37 |     def load_template(self, name: str) -> Dict[str, Any]:
38 |         """Load a predefined search parameter template
39 |         
40 |         Args:
41 |             name: Name of the template to load
42 |             
43 |         Returns:
44 |             Dictionary containing the template parameters or error message
45 |         """
46 |         if name in self.TEMPLATES:
47 |             return {"success": True, "data": self.TEMPLATES[name].copy()}
48 |         return {"success": False, "error": f"Template not found: {name}"}
49 |     
50 |     def apply_template(self, name: str, **override_params: Any) -> Dict[str, Any]:
51 |         """Load a template and optionally override specific parameters
52 |         
53 |         Args:
54 |             name: Name of the template to load
55 |             **override_params: Optional parameters to override in the template
56 |             
57 |         Returns:
58 |             Dictionary containing the final parameters or error message
59 |         """
60 |         result = self.load_template(name)
61 |         if result["success"]:
62 |             params = result["data"]
63 |             params.update(override_params)
64 |             return {"success": True, "data": params}
65 |         return result


--------------------------------------------------------------------------------
/src/deep_research/cli/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from deep_research.core.research import Research
 3 | from deep_research.core.report import Report
 4 | from deep_research.utils.log_util import LogUtil
 5 | from dotenv import load_dotenv
 6 | import traceback
 7 | 
 8 | # Load environment variables
 9 | load_dotenv()
10 | 
11 | def main():
12 |     logger = LogUtil().logger
13 |     logger.info("Starting Deep Research CLI")
14 |     parser = argparse.ArgumentParser(
15 |         description="Deep Research CLI - A tool for conducting deep research",
16 |         formatter_class=argparse.RawDescriptionHelpFormatter,
17 |         epilog=\
18 | """
19 | Examples:
20 |   # Create a new research with a topic
21 |   python main.py --topic "Impact of AI on healthcare in 2024"
22 |   
23 |   # Load and continue an existing research
24 |   python main.py --research-id RS_20240214_123456
25 | """
26 |     )
27 | 
28 |     group = parser.add_mutually_exclusive_group(required=True)
29 |     group.add_argument('--topic', '-t', help='Research topic in any language')
30 |     group.add_argument('--research-id', '-r', help='ID of existing research to load')
31 |     parser.add_argument('--report-method', '-p', default='generate_research_report_detailed',
32 |                       choices=['generate_research_report', 'generate_research_report_detailed', 'generate_wechat_article'],
33 |                       help='Report generation method to use (default: generate_research_report)')
34 | 
35 |     args = parser.parse_args()
36 | 
37 |     try:
38 |         if args.topic:
39 |             logger.info(f"Creating new research with topic: {args.topic}")
40 |             research = Research(topic=args.topic)
41 |             logger.info(f"Created new research with ID: {research.id}")
42 |             logger.info("Starting research execution")
43 |             research.execute()
44 |             logger.info(f"Completed research execution. All data saved to output directory [/output/{research.id}]")
45 |         else:
46 |             logger.info(f"Loading existing research with ID: {args.research_id}")
47 |             research = Research(research_id=args.research_id)
48 |             logger.info(f"Loaded existing research with ID: {research.id}")
49 |             
50 |             # Create report instance and generate report
51 |             logger.info(f"Generating report using method: {args.report_method}")
52 |             report = Report(research.id)
53 |             report_method = getattr(report, args.report_method)
54 |             report_content = report_method()
55 |             logger.info(f"Successfully generated report using {args.report_method}")
56 | 
57 |     except Exception as e:
58 |         logger.error(f"Error during research execution: {str(e)}")
59 |         logger.error("Full error stack trace:")
60 |         logger.error(traceback.format_exc())
61 |         exit(1)
62 | 
63 | if __name__ == "__main__":
64 |     main()


--------------------------------------------------------------------------------
/src/deep_research/utils/log_util.py:
--------------------------------------------------------------------------------
  1 | """Module for handling logging operations"""
  2 | 
  3 | import logging
  4 | import os
  5 | from datetime import datetime
  6 | from typing import Optional, Union
  7 | from rich.logging import RichHandler
  8 | from rich.console import Console
  9 | from rich.theme import Theme
 10 | 
 11 | # Environment variable name for log level
 12 | LOG_LEVEL_ENV = "DEEP_RESEARCH_LOG_LEVEL"
 13 | 
 14 | class LogUtil:
 15 |     """Class for managing logging operations"""
 16 | 
 17 |     _instance = None
 18 |     _initialized = False
 19 |     _console = None
 20 | 
 21 |     def __new__(cls):
 22 |         """Singleton pattern implementation"""
 23 |         if cls._instance is None:
 24 |             cls._instance = super(LogUtil, cls).__new__(cls)
 25 |         return cls._instance
 26 | 
 27 |     def __init__(self):
 28 |         """Initialize logging configuration"""
 29 |         if not LogUtil._initialized:
 30 |             # Initialize Rich console with custom theme
 31 |             self._console = Console(theme=Theme({
 32 |                 "info": "cyan",
 33 |                 "warning": "yellow",
 34 |                 "error": "red",
 35 |                 "critical": "red bold",
 36 |                 "debug": "dim cyan"
 37 |             }))
 38 |             
 39 |             # Get log level from environment variable or default to INFO
 40 |             log_level_str = os.getenv(LOG_LEVEL_ENV, 'INFO').upper()
 41 |             try:
 42 |                 log_level = getattr(logging, log_level_str)
 43 |             except AttributeError:
 44 |                 log_level = logging.INFO
 45 |                 print(f"Invalid log level {log_level_str}, defaulting to INFO")
 46 |             
 47 |             self.logger = logging.getLogger('deep_research')
 48 |             self.logger.setLevel(log_level)
 49 |             LogUtil._initialized = True
 50 |             self._setup_logging()
 51 | 
 52 |     def _setup_logging(self):
 53 |         """Setup logging configuration with Rich handler"""
 54 |         # Create Rich handler with custom format
 55 |         rich_handler = RichHandler(
 56 |             console=self._console,
 57 |             rich_tracebacks=True,
 58 |             tracebacks_show_locals=True,
 59 |             show_time=True,
 60 |             show_path=True,
 61 |             enable_link_path=True
 62 |         )
 63 |         rich_handler.setLevel(logging.DEBUG)
 64 |         
 65 |         # Set format to include minimal timestamp since Rich handler adds its own
 66 |         rich_handler.setFormatter(logging.Formatter('%(message)s', datefmt='[%X]'))
 67 |         
 68 |         # Remove any existing handlers and add Rich handler
 69 |         self.logger.handlers = []
 70 |         self.logger.addHandler(rich_handler)
 71 | 
 72 |     def debug(self, message: str, *args, **kwargs):
 73 |         """Log debug message with Rich formatting
 74 | 
 75 |         Args:
 76 |             message: The message to log
 77 |             *args: Variable length argument list
 78 |             **kwargs: Arbitrary keyword arguments
 79 |         """
 80 |         self.logger.debug(message, *args, **kwargs)
 81 | 
 82 |     def info(self, message: str, *args, **kwargs):
 83 |         """Log info message with Rich formatting
 84 | 
 85 |         Args:
 86 |             message: The message to log
 87 |             *args: Variable length argument list
 88 |             **kwargs: Arbitrary keyword arguments
 89 |         """
 90 |         self.logger.info(message, *args, **kwargs)
 91 | 
 92 |     def warning(self, message: str, *args, **kwargs):
 93 |         """Log warning message with Rich formatting
 94 | 
 95 |         Args:
 96 |             message: The message to log
 97 |             *args: Variable length argument list
 98 |             **kwargs: Arbitrary keyword arguments
 99 |         """
100 |         self.logger.warning(message, *args, **kwargs)
101 | 
102 |     def error(self, message: str, *args, **kwargs):
103 |         """Log error message with Rich formatting
104 | 
105 |         Args:
106 |             message: The message to log
107 |             *args: Variable length argument list
108 |             **kwargs: Arbitrary keyword arguments
109 |         """
110 |         self.logger.error(message, *args, **kwargs)
111 | 
112 |     def critical(self, message: str, *args, **kwargs):
113 |         """Log critical message with Rich formatting
114 | 
115 |         Args:
116 |             message: The message to log
117 |             *args: Variable length argument list
118 |             **kwargs: Arbitrary keyword arguments
119 |         """
120 |         self.logger.critical(message, *args, **kwargs)
121 | 
122 |     @staticmethod
123 |     def get_logger() -> 'LogUtil':
124 |         """Get singleton instance of LogUtil
125 | 
126 |         Returns:
127 |             LogUtil instance
128 |         """
129 |         return LogUtil()
130 | 
131 | if __name__ == "__main__":
132 |     # Create logger instance
133 |     logger = LogUtil.get_logger()
134 | 
135 |     # Example usage of different log levels
136 |     logger.debug("This is a debug message")
137 |     logger.info("This is an info message")
138 |     logger.warning("This is a warning message")
139 |     logger.error("This is an error message")
140 |     logger.critical("This is a critical message")


--------------------------------------------------------------------------------
/src/deep_research/services/search_service.py:
--------------------------------------------------------------------------------
  1 | """Utility module for Tavily search API interactions
  2 | 
  3 | This module provides a client interface for interacting with the Tavily Search API,
  4 | with support for templated searches and configurable parameters.
  5 | """
  6 | 
  7 | from typing import Optional, Dict, Any, List
  8 | from deep_research.core.config import SearchConfig
  9 | from deep_research.services.search_template import SearchTemplate
 10 | from deep_research.utils.log_util import LogUtil
 11 | 
 12 | class SearchClient:
 13 |     """Client for interacting with Tavily Search API
 14 |     
 15 |     This class provides methods to perform searches using the Tavily API,
 16 |     with support for template-based parameter configurations and error handling.
 17 |     """
 18 |     def __init__(self, config: Optional[SearchConfig] = None, template_dir: Optional[str] = None):
 19 |         """Initialize the SearchClient with configuration and templates
 20 |         
 21 |         Args:
 22 |             config: Optional SearchConfig instance for API configuration
 23 |             template_dir: Optional directory path for custom templates
 24 |         
 25 |         Raises:
 26 |             ImportError: If tavily-python package is not installed
 27 |             ValueError: If TAVILY_API_KEY is not set
 28 |         """
 29 |         # Initialize logger
 30 |         self.logger = LogUtil.get_logger()
 31 |         self.logger.debug("Initializing SearchClient")
 32 |         
 33 |         # Set up configuration and template
 34 |         self.config = config or SearchConfig()
 35 |         self.template = SearchTemplate()
 36 |         
 37 |         try:
 38 |             from tavily import TavilyClient
 39 |             self.client = TavilyClient(api_key=self.config.api_key)
 40 |             self.logger.info("Successfully initialized Tavily client")
 41 |         except ImportError:
 42 |             self.logger.error("Failed to import tavily package")
 43 |             raise ImportError(
 44 |                 'Tavily package is not installed. '
 45 |                 'Please install it with: pip install tavily-python'
 46 |             )
 47 |     
 48 |     def search_with_template(
 49 |         self,
 50 |         query: str,
 51 |         template_name: str,
 52 |         **override_params: Any
 53 |     ) -> Dict[str, Any]:
 54 |         """Perform a search using a parameter template
 55 | 
 56 |         Args:
 57 |             query: The search query string
 58 |             template_name: Name of the template to use
 59 |             **override_params: Parameters to override from the template
 60 | 
 61 |         Returns:
 62 |             The search results as a dictionary
 63 |         """
 64 |         self.logger.debug(f"Applying template '{template_name}' for query: {query}")
 65 |         
 66 |         # Apply template and get parameters
 67 |         template_result = self.template.apply_template(template_name, **override_params)
 68 |         if not template_result["success"]:
 69 |             error_msg = f"Template error: {template_result.get('error', 'Unknown error')}"
 70 |             self.logger.error(error_msg)
 71 |             return {"error": error_msg}
 72 |             
 73 |         # Prepare search parameters
 74 |         params = template_result["data"]
 75 |         params["query"] = query  # Ensure query is included in params
 76 |         self.logger.debug(f"Search parameters prepared: {params}")
 77 |         
 78 |         return self.search(**params)
 79 |     
 80 |     def search(
 81 |         self,
 82 |         query: str,
 83 |         search_depth: str = "basic",
 84 |         max_results: int = 5,
 85 |         **kwargs: Any
 86 |     ) -> Dict[str, Any]:
 87 |         """Perform a search using the Tavily API
 88 | 
 89 |         Args:
 90 |             query: The search query string
 91 |             search_depth: The depth of search ('basic' or 'advanced')
 92 |             max_results: Maximum number of results to return
 93 |             **kwargs: Additional parameters to pass to the API
 94 | 
 95 |         Returns:
 96 |             The search results as a dictionary
 97 |         """
 98 |         self.logger.info(f"Executing search query: {query} with depth: {search_depth}")
 99 |         
100 |         try:
101 |             # Execute search request
102 |             response = self.client.search(
103 |                 query=query,
104 |                 search_depth=search_depth,
105 |                 max_results=max_results,
106 |                 **kwargs
107 |             )
108 |             self.logger.debug(f"Search completed successfully with {len(response.get('results', []))} results")
109 |             return response
110 |         except Exception as e:
111 |             error_msg = str(e)
112 |             self.logger.error(f"Search failed: {error_msg}")
113 |             return {"error": error_msg}
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     # Example usage of SearchClient
118 |     logger = LogUtil.get_logger()
119 |     try:
120 |         logger.info("Starting example search")
121 |         client = SearchClient()
122 |         response = client.search_with_template(
123 |             query="What is DeepSeek R1",
124 |             template_name="advanced"
125 |         )
126 |         
127 |         print(response)
128 |         logger.info("Example search completed")
129 |             
130 |     except Exception as e:
131 |         logger.error(f"Example failed: {str(e)}")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep Research CLI
  2 | 
  3 | A command-line python tool for conducting comprehensive research on any topic using AI and advanced search capabilities. This tool automates the research process by generating structured research plans, executing targeted searches, and producing detailed reports in multiple formats similar to 'ChatGPT Deep Research'
  4 | 
  5 | > !!!!! THIS PROJECT IS UNDER VERY EARLY STAGE OF DEVELOPMENT. NOTHING IS GUARANTEED WORKING. :)
  6 | 
  7 | ## Features
  8 | 
  9 | ![](https://cdn.sa.net/2025/02/09/3UPtxEc6eDK4RvA.png)
 10 | 
 11 | - **Multi-language Support**: Research topics can be input in any language
 12 | - **Automated Research Planning**: Generates comprehensive research plans with categorized queries
 13 | - **Advanced Search Integration**: Utilizes Tavily Search API for high-quality search results
 14 | - **Multiple Report Formats**:
 15 |   - Detailed Research Reports
 16 |   - Concise Research Summaries
 17 |   - WeChat Article Format
 18 | - **Flexible Model Selection**: Supports various AI models for report generation
 19 | - **Structured Output**: Organizes research results and reports in a clear directory structure
 20 | 
 21 | ## Installation
 22 | 
 23 | 1. Clone the repository:
 24 |    ```bash
 25 |    git clone https://github.com/yourusername/deep-research-cli.git
 26 |    cd deep-research-cli
 27 |    ```
 28 | 
 29 | 2. Install dependencies:
 30 |    ```bash
 31 |    pip install -r requirements.txt
 32 |    ```
 33 | 
 34 | 3. Install Setup.py:
 35 |    ```bash
 36 |    pip install -e .
 37 |    ```
 38 | 
 39 | 4. Configure environment variables:
 40 |    ```bash
 41 |    cp .env.example .env
 42 |    ```
 43 |    Edit `.env` file and add your API keys:
 44 |    ```
 45 |    OPENAI_KEY=your_openai_api_key
 46 |    OPENAI_BASE=your_openai_api_base_url
 47 |    TAVILY_API_KEY=your_tavily_api_key
 48 |    ```
 49 | 
 50 | ## Usage
 51 | 
 52 | ### Starting New Research
 53 | 
 54 | ```bash
 55 | # Create a new research project
 56 | python main.py --topic "Impact of AI on healthcare in 2024"
 57 | ```
 58 | 
 59 | ### Generating Reports from Existing Research
 60 | 
 61 | ```bash
 62 | # Generate a detailed report from existing research
 63 | python main.py --research-id RS_20240214_123456 --report-method generate_research_report_detailed
 64 | 
 65 | # Generate a WeChat article
 66 | python main.py --research-id RS_20240214_123456 --report-method generate_wechat_article
 67 | ```
 68 | 
 69 | ### Command Line Arguments
 70 | 
 71 | - `--topic, -t`: Research topic in any language
 72 | - `--research-id, -r`: ID of existing research to load
 73 | - `--model, -m`: Model to use for report generation (default: deepseek/deepseek-r1)
 74 | - `--report-method, -p`: Report generation method to use
 75 |   - `generate_research_report`: Basic research report
 76 |   - `generate_research_report_detailed`: Detailed research report
 77 |   - `generate_wechat_article`: WeChat article format
 78 | 
 79 | ## Research Process
 80 | 
 81 | 1. **Topic Analysis**: Translates and analyzes the research topic
 82 | 2. **Plan Generation**: Creates a structured research plan with categories and queries
 83 | 3. **Data Collection**: Executes searches for each query and category
 84 | 4. **Report Generation**: Processes collected data into comprehensive reports
 85 | 5. **Reference Management**: Generates reference links for all sources
 86 | 
 87 | ## Output Structure
 88 | 
 89 | All the progress data will be saved in output folder. Including searching literature, searching results, and generated reports.
 90 | 
 91 | ```
 92 | output/
 93 | └── RS_[DATE]_[TIME]/
 94 |     ├── RS_[DATE]_[TIME]_meta.json       # Research metadata
 95 |     ├── [CATEGORY]/                      # Category-specific results
 96 |     │   └── [QUERY].json                 # Search results
 97 |     ├── [CATEGORY]_report.json           # Category reports
 98 |     ├── RS_[DATE]_[TIME]_reference.md    # Reference links
 99 |     └── RS_[DATE]_[TIME]_[MODEL]_[TYPE].md  # Generated reports
100 | ```
101 | 
102 | ## Configuration (.env.example)
103 | 
104 | ```
105 | # Madantory Variables
106 | # 1. LLM Model settings, openrouter is recommanded since it integrates multiple model providers.
107 | OPENAI_KEY=
108 | OPENAI_BASE=https://openrouter.ai/api/v1
109 | 
110 | # 2. Tavily API Token
111 | TAVILY_API_KEY=
112 | 
113 | # Optional Variables
114 | 
115 | # a. LLM Model Choice, refer config.py for default settings. 
116 | # SMART_MODEL = "deepseek/deepseek-r1"
117 | # NORMAL_MODEL = "deepseek/deepseek-r1-distill-llama-70b"
118 | # LONG_MODEL = "google/gemini-2.0-flash-001"
119 | # REPORT_MODEL = "google/gemini-2.0-pro-exp-02-05:free"
120 | 
121 | # b. Log Level in Console, default INFO
122 | # DEEP_RESEARCH_LOG_LEVEL=INFO
123 | 
124 | # c. Language use to create reports. Default = Chinese. Used in Prompt so just english words.
125 | # REPORT_LANG=Chinese
126 | 
127 | # d. RESEARCH_PLAN_PROMPT, additional prompt which will hijack into Research Plan Creation process
128 | # RESEARCH_PLAN_PROMPT=
129 | 
130 | # e. REPORT_PLAN_PROMPT, additional prompt which will hijack into Report Creation process influence the report content
131 | # REPORT_PROMPT=
132 | 
133 | # f. NUMBER_OF_QUERIES_IN_CATEGORY, number of queries in each category, default 3
134 | # NUMBER_OF_QUERIES_IN_CATEGORY=3
135 | 
136 | ```
137 | 
138 | ## Research Cost Estimation
139 | 
140 | > !!!!!! A LOT ROOM FOR OPTIMIZATION
141 | 
142 | - LLM Cost: $0.1 / Research (default, you can always change to free models)
143 | - Tavily Search: 100 Credit / Research = $0.8 (Tavily provide 1000 free credits per month)
144 | 
145 | ## License
146 | 
147 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.


--------------------------------------------------------------------------------
/src/deep_research/services/persistence_service.py:
--------------------------------------------------------------------------------
  1 | """Utility module for JSON data persistence operations
  2 | 
  3 | This module provides a client interface for handling JSON data persistence operations,
  4 | including saving and loading JSON files, managing research metadata, and handling
  5 | search results. It implements proper error handling and directory management.
  6 | 
  7 | Typical usage example:
  8 |     client = PersistenceClient()
  9 |     client.save_json({"key": "value"}, "data.json")
 10 |     data = client.load_json("data.json")
 11 | """
 12 | 
 13 | import json
 14 | import os
 15 | from typing import Any, Dict, Optional, List
 16 | from ..utils.log_util import LogUtil
 17 | 
 18 | class PersistenceClient:
 19 |     """Client for handling JSON data persistence operations
 20 |     
 21 |     This class provides methods to save and load JSON data, manage research metadata,
 22 |     and handle search results. It includes proper error handling, directory creation,
 23 |     and path sanitization.
 24 |     
 25 |     Attributes:
 26 |         base_dir: The base directory for all file operations
 27 |         logger: Logger instance for tracking operations
 28 |     """
 29 |     
 30 |     def __init__(self, base_dir: Optional[str] = None):
 31 |         """Initialize the persistence client
 32 |         
 33 |         Args:
 34 |             base_dir: Optional base directory for storing files. If not provided,
 35 |                      uses the current working directory
 36 |         """
 37 |         self.base_dir = base_dir or os.getcwd()
 38 |         self.logger = LogUtil.get_logger()  # Initialize logger
 39 |         self.logger.info(f"Initialized PersistenceClient with base directory: {self.base_dir}")
 40 |         
 41 |     def save_json(self, data: Any, file_path: str) -> Dict[str, Any]:
 42 |         """Save data to a JSON file
 43 |         
 44 |         This method ensures the target directory exists before writing and handles
 45 |         any potential errors during the save operation.
 46 |         
 47 |         Args:
 48 |             data: The data to save (must be JSON serializable)
 49 |             file_path: Path to the file where data will be saved
 50 |             
 51 |         Returns:
 52 |             A dictionary indicating success or error
 53 |         """
 54 |         full_path = os.path.join(self.base_dir, file_path)
 55 |         self.logger.debug(f"Attempting to save JSON data to: {full_path}")
 56 |         
 57 |         try:
 58 |             # Ensure the directory exists
 59 |             os.makedirs(os.path.dirname(full_path), exist_ok=True)
 60 |             self.logger.debug(f"Ensured directory exists: {os.path.dirname(full_path)}")
 61 |             
 62 |             # Write data to file
 63 |             with open(full_path, 'w', encoding='utf-8') as f:
 64 |                 json.dump(data, f, ensure_ascii=False, indent=2)
 65 |             
 66 |             self.logger.info(f"Successfully saved JSON data to: {file_path}")
 67 |             return {"success": True, "message": f"Data successfully saved to {file_path}"}
 68 |         except Exception as e:
 69 |             self.logger.error(f"Failed to save JSON data to {file_path}: {str(e)}")
 70 |             return {"success": False, "error": str(e)}
 71 |     
 72 |     
 73 |     def load_json(self, file_path: str) -> Dict[str, Any]:
 74 |         """Load data from a JSON file
 75 |         
 76 |         This method attempts to read and parse a JSON file, handling any potential
 77 |         file access or JSON parsing errors.
 78 |         
 79 |         Args:
 80 |             file_path: Path to the file to load data from
 81 |             
 82 |         Returns:
 83 |             A dictionary containing the loaded data
 84 |             
 85 |         Raises:
 86 |             ValueError: If the file cannot be read or parsed
 87 |         """
 88 |         full_path = os.path.join(self.base_dir, file_path)
 89 |         self.logger.debug(f"Attempting to load JSON data from: {full_path}")
 90 |         
 91 |         try:
 92 |             with open(full_path, 'r', encoding='utf-8') as f:
 93 |                 data = json.load(f)
 94 |                 self.logger.info(f"Successfully loaded JSON data from: {file_path}")
 95 |                 return data
 96 |         except FileNotFoundError:
 97 |             self.logger.error(f"File not found: {file_path}")
 98 |             raise ValueError(f"File not found: {file_path}")
 99 |         except json.JSONDecodeError as e:
100 |             self.logger.error(f"Invalid JSON format in {file_path}: {str(e)}")
101 |             raise ValueError(f"Invalid JSON format in {file_path}: {str(e)}")
102 |         except Exception as e:
103 |             self.logger.error(f"Failed to load JSON data from {file_path}: {str(e)}")
104 |             raise ValueError(f"Failed to load JSON data from {file_path}: {str(e)}")
105 | 
106 | 
107 | 
108 |     def save_file(self, file_path: str, content: str) -> Dict[str, Any]:
109 |         """Save content to a file
110 |         
111 |         This method ensures the target directory exists before writing and handles
112 |         any potential errors during the save operation. It's useful for saving
113 |         non-JSON content like text files or configuration files.
114 |         
115 |         Args:
116 |             file_path: Path to the file where content will be saved
117 |             content: The content to save to the file
118 |             
119 |         Returns:
120 |             A dictionary indicating success or error
121 |         """
122 |         full_path = os.path.join(self.base_dir, file_path)
123 |         self.logger.debug(f"Attempting to save content to: {full_path}")
124 |         
125 |         try:
126 |             # Ensure the directory exists
127 |             os.makedirs(os.path.dirname(full_path), exist_ok=True)
128 |             self.logger.debug(f"Ensured directory exists: {os.path.dirname(full_path)}")
129 |             
130 |             # Write content to file
131 |             with open(full_path, 'w', encoding='utf-8') as f:
132 |                 f.write(content)
133 |             
134 |             self.logger.info(f"Successfully saved content to: {file_path}")
135 |             return {"success": True, "message": f"Content successfully saved to {file_path}"}
136 |         except Exception as e:
137 |             self.logger.error(f"Failed to save content to {file_path}: {str(e)}")
138 |             return {"success": False, "error": str(e)}
139 | 
140 | if __name__ == '__main__':
141 |     # Example usage of PersistenceClient
142 |     client = PersistenceClient()
143 |     
144 |     # Example data
145 |     test_data = {
146 |         "name": "Test User",
147 |         "age": 30,
148 |         "interests": ["programming", "reading"]
149 |     }
150 |     
151 |     # Save data
152 |     save_result = client.save_json(test_data, "test_data.json")
153 |     print("Save result:", save_result)
154 |     
155 |     # Load data
156 |     load_result = client.load_json("test_data.json")
157 |     print("\nLoad result:", load_result)


--------------------------------------------------------------------------------
/src/deep_research/services/ai_service.py:
--------------------------------------------------------------------------------
  1 | """Utility module for OpenAI LLM model interactions
  2 | 
  3 | This module provides a client interface for interacting with OpenAI-compatible LLM models.
  4 | It handles different types of completions (normal, smart, long) and supports various response formats.
  5 | """
  6 | 
  7 | import os
  8 | import json
  9 | import json_repair
 10 | from typing import Optional, Dict, Any
 11 | from dotenv import load_dotenv
 12 | 
 13 | # Load environment variables
 14 | load_dotenv()
 15 | 
 16 | from ..core.config import LLMConfig
 17 | from ..utils.log_util import LogUtil
 18 | 
 19 | class LLMClient:
 20 |     """Client for interacting with OpenAI LLM models
 21 |     
 22 |     This class provides methods to interact with different types of LLM models,
 23 |     handling authentication, API calls, and response processing.
 24 |     """
 25 |     def __init__(self, config: Optional[LLMConfig] = None):
 26 |         self.config = config or LLMConfig()
 27 |         self.logger = LogUtil.get_logger()
 28 |         
 29 |         try:
 30 |             import openai
 31 |             self.client = openai.OpenAI(
 32 |                 api_key=self.config.api_key,
 33 |                 base_url=self.config.api_base
 34 |             )
 35 |             self.logger.info("Successfully initialized OpenAI client")
 36 |         except ImportError:
 37 |             self.logger.critical("OpenAI package is not installed")
 38 |             raise ImportError(
 39 |                 'OpenAI package is not installed. '
 40 |                 'Please install it with: pip install openai'
 41 |             )
 42 | 
 43 |     def chat_completion(
 44 |         self,
 45 |         messages: list[Dict[str, str]],
 46 |         model: Optional[str] = None,
 47 |         response_format: str = 'json',
 48 |         **kwargs: Any
 49 |     ) -> Dict[str, Any]:
 50 |         """Send a chat completion request to the OpenAI API
 51 | 
 52 |         This method handles the core interaction with the OpenAI API, including
 53 |         response format handling and token limit management for certain models.
 54 | 
 55 |         Args:
 56 |             messages: List of message dictionaries with 'role' and 'content'
 57 |             model: Optional model override, defaults to normal_model
 58 |             response_format: Optional response format, e.g. {"type": "json_object"}
 59 |             **kwargs: Additional parameters to pass to the API
 60 | 
 61 |         Returns:
 62 |             The response content as a JSON object or markdown string
 63 |         """
 64 |         # Prepare request parameters
 65 |         params = {
 66 |             "model": model or self.config.normal_model,
 67 |             "messages": messages
 68 |         }
 69 |         
 70 |         self.logger.debug(f"Sending chat completion request with model: {params['model']}")
 71 |             
 72 |         try:
 73 |             response = self.client.chat.completions.create(
 74 |                 **params,
 75 |                 **kwargs
 76 |             )
 77 |             self.logger.debug("Successfully received response from API")
 78 |             content = response.choices[0].message.content
 79 | 
 80 |             # Handle markdown format with potential token limit handling
 81 |             if response_format == 'markdown' and 'google' in model.lower():
 82 |                 # Check if response was truncated due to token limit
 83 |                 if response.choices[0].native_finish_reason == 'MAX_TOKENS' :
 84 |                     self.logger.info("Response truncated due to token limit, continuing conversation")
 85 |                     # Append the partial response to messages and continue the conversation
 86 |                     messages.append({"role": "assistant", "content": content})
 87 |                     messages.append({"role": "user", "content": "Please continue from where you left off."})                
 88 |                     # Recursively get the rest of the response
 89 |                     continuation = self.chat_completion(
 90 |                         messages=messages,
 91 |                         model=model,
 92 |                         response_format='markdown',
 93 |                         **kwargs
 94 |                     )
 95 |                     # Combine the current content with the continuation
 96 |                     return content + continuation
 97 |                 return content
 98 |             
 99 |             # Handle JSON format
100 |             if response_format == 'json':
101 |                 try:
102 |                     repaired_json = json_repair.loads(content)
103 |                     return repaired_json
104 |                 except Exception as e:
105 |                     self.logger.error(f"Failed to parse JSON response: {str(e)}")
106 |                     raise
107 |             
108 |             return content
109 |         except Exception as e:
110 |             self.logger.error(f"Error in chat completion: {str(e)}")
111 |             raise
112 | 
113 |     def smart_completion(
114 |         self,
115 |         messages: list[Dict[str, str]],
116 |         response_format: str = 'json',
117 |         **kwargs: Any
118 |     ) -> Dict[str, Any]:
119 |         """Use the smart model (e.g. GPT-4) for chat completion
120 | 
121 |         This method is optimized for tasks requiring higher intelligence and reasoning.
122 | 
123 |         Args:
124 |             messages: List of message dictionaries
125 |             response_format: Optional response format, e.g. {"type": "json_object"}
126 |             **kwargs: Additional parameters to pass to the API
127 | 
128 |         Returns:
129 |             The response content as a JSON object
130 |         """
131 |         self.logger.info(f"Using smart model: {self.config.smart_model}")
132 |         return self.chat_completion(
133 |             messages=messages,
134 |             model=self.config.smart_model,
135 |             response_format=response_format,
136 |             stream=False,
137 |             **kwargs
138 |         )
139 | 
140 |     def long_completion(
141 |         self,
142 |         messages: list[Dict[str, str]],
143 |         response_format: str = 'json',
144 |         **kwargs: Any
145 |     ) -> Dict[str, Any]:
146 |         """Use the long context model for chat completion
147 | 
148 |         This method is designed for handling longer conversations or inputs
149 |         that require more context window.
150 | 
151 |         Args:
152 |             messages: List of message dictionaries
153 |             response_format: Optional response format, e.g. {"type": "json_object"}
154 |             **kwargs: Additional parameters to pass to the API
155 | 
156 |         Returns:
157 |             The response content as a JSON object
158 |         """
159 |         self.logger.info(f"Using long context model: {self.config.long_model}")
160 |         return self.chat_completion(
161 |             messages=messages,
162 |             model=self.config.long_model,
163 |             response_format=response_format,
164 |             stream=False,
165 |             **kwargs
166 |         )
167 | 
168 | 
169 | if __name__ == '__main__':
170 |     # Example usage of LLMClient
171 |     try:
172 |         client = LLMClient()
173 |         
174 |         # Test normal completion
175 |         messages = [
176 |             {"role": "user", "content": "What is Python?"}
177 |         ]
178 |         print("\nNormal Completion Test:")
179 |         response = client.chat_completion(messages)
180 |         print(response)
181 |         
182 |     except Exception as e:
183 |         print(f"Error: {str(e)}")


--------------------------------------------------------------------------------
/src/deep_research/core/research.py:
--------------------------------------------------------------------------------
  1 | """Module for handling research operations
  2 | 
  3 | This module provides the core functionality for managing research operations, including:
  4 | - Research initialization and metadata management
  5 | - Topic translation and research plan generation
  6 | - Search execution and result management
  7 | - Report generation and link compilation
  8 | 
  9 | The Research class serves as the main entry point for conducting research tasks,
 10 | handling both new research topics and loading existing research data.
 11 | """
 12 | 
 13 | from datetime import datetime
 14 | from typing import List, Dict, Any
 15 | from deep_research.utils.research_helper import ResearchHelper
 16 | from deep_research.core.config import Config
 17 | from deep_research.utils.log_util import LogUtil
 18 | 
 19 | class Research:
 20 |     """Class for managing research operations
 21 |     
 22 |     This class orchestrates the entire research process, from initialization to execution.
 23 |     It handles topic translation, research plan generation, search execution, and report
 24 |     generation. The class can either start a new research project from a topic or load
 25 |     an existing research project using its ID.
 26 |     """
 27 | 
 28 |     def __init__(self, topic: str = None, research_id: str = None):
 29 |         """Initialize a new research instance
 30 | 
 31 |         Args:
 32 |             topic: The research topic in any language (optional if research_id is provided)
 33 |             research_id: Existing research ID to load data from (optional)
 34 | 
 35 |         Raises:
 36 |             ValueError: If neither topic nor research_id is provided, or if both are provided
 37 |         """
 38 |         self._logger = LogUtil().logger
 39 |         self._logger.info(f"Initializing Research instance with topic='{topic}', research_id='{research_id}')")
 40 | 
 41 |         if topic and research_id:
 42 |             error_msg = "Cannot provide both topic and research_id"
 43 |             self._logger.error(error_msg)
 44 |             raise ValueError(error_msg)
 45 |         elif not topic and not research_id:
 46 |             error_msg = "Must provide either topic or research_id"
 47 |             self._logger.error(error_msg)
 48 |             raise ValueError(error_msg)
 49 | 
 50 |         # Generate new research ID if not provided
 51 |         self.research_id = research_id or 'RS_' + datetime.now().strftime("%Y%m%d_%H%M%S")
 52 |         self._helper = ResearchHelper(self.research_id)
 53 |         self._logger.debug(f"Created research instance with ID: {self.research_id}")
 54 | 
 55 |         if topic:
 56 |             self._init_from_topic(topic)
 57 |         else:
 58 |             self._init_from_id()
 59 | 
 60 |     def _init_from_topic(self, topic: str):
 61 |         self._logger.info(f"Initializing new research from topic: {topic}")
 62 |         """Initialize research instance with a new topic
 63 | 
 64 |         Args:
 65 |             topic: The research topic in any language
 66 |         """
 67 |         self.topic = topic
 68 |         self.english_topic = self._translate_topic()
 69 |         self.research_content = None
 70 |         self.research_plan = None
 71 | 
 72 |         self.create_research_detail(self.english_topic)
 73 | 
 74 |     def _init_from_id(self):
 75 |         self._logger.info(f"Loading existing research from ID: {self.research_id}")
 76 |         """Initialize research instance from existing research ID"""
 77 |         try:
 78 |             data = self._helper.load_research_metadata()
 79 |             self.topic = data['topic']
 80 |             self.english_topic = data['english_topic']
 81 |             self.research_content = data['research_content']
 82 |             self.research_plan = data['research_plan']
 83 |         except Exception as e:
 84 |             error_msg = f"Failed to load research data for ID {self.research_id}: {str(e)}"
 85 |             self._logger.error(error_msg)
 86 |             raise ValueError(error_msg)
 87 | 
 88 |     def _translate_topic(self) -> str:
 89 |         self._logger.debug(f"Translating topic to English: {self.topic}")
 90 |         """Translate the research topic to English
 91 | 
 92 |         Returns:
 93 |             The translated topic in English
 94 |         """
 95 |         return self._helper.translate_to_english(self.topic)
 96 | 
 97 |     def create_research_detail(self, english_topic):
 98 |         self._logger.info(f"Generating research details for topic: {english_topic}")
 99 |         """Generate research content based on the topic
100 | 
101 |         Returns:
102 |             A dictionary containing the research content
103 |         """
104 |         self.research_content = self._helper.generate_research_content(english_topic)
105 |         json_plan = self._helper.generate_research_plan(self.research_content)
106 |         self.research_plan = json_plan['research_plan']
107 |         self.save()
108 | 
109 |     def save(self) -> None:
110 |         self._logger.debug(f"Saving research metadata for ID: {self.research_id}")
111 |         """Save research metadata to a JSON file in the output directory
112 | 
113 |         Creates a new folder with the research ID and saves metadata as JSON.
114 |         The directory will be created automatically if it doesn't exist.
115 |         """
116 |         metadata = {
117 |             'topic': self.topic,
118 |             'research_id': self.research_id,
119 |             'english_topic': self.english_topic,
120 |             'research_content': self.research_content,
121 |             'research_plan': self.research_plan
122 |         }
123 |         self._helper.save_research_metadata(metadata)
124 | 
125 |     @property
126 |     def id(self) -> str:
127 |         """Get the research's unique ID
128 | 
129 |         Returns:
130 |             The research's unique ID string
131 |         """
132 |         return self.research_id
133 | 
134 |     def execute_search(self) -> None:
135 |         self._logger.info("Starting search execution for all research categories")
136 |         """Execute searches for all queries in the research plan
137 | 
138 |         Iterates through the research plan, performs advanced searches for each query,
139 |         and saves results in category-specific directories.
140 |         """
141 |         if not self.research_plan:
142 |             error_msg = "No research plan available"
143 |             self._logger.error(error_msg)
144 |             raise ValueError(error_msg)
145 | 
146 |         self._logger.debug(f'Research plan: {self.research_plan}')
147 |         for category_data in self.research_plan:
148 |             category = category_data.get('category')
149 |             if not category:
150 |                 continue
151 |             
152 |             for query in category_data.get('queries_list', []):
153 |                 if not query:
154 |                     continue
155 | 
156 |                 search_results = self._helper.search_advanced(query)
157 |                 self._helper.save_search_results(category, query, search_results)
158 | 
159 |     def get_category_reports(self) -> List[Dict[str, str]]:
160 |         self._logger.debug("Retrieving all category reports")
161 |         """Retrieve and process search results from a specific category
162 | 
163 |         Returns:
164 |             A list of dictionaries containing title and content for each result
165 |         """
166 |         return self._helper.read_category_reports()
167 | 
168 |     def generate_category_report(self, category: str):
169 |         self._logger.info(f"Generating report for category: {category}")
170 |         """Generate and save a research report for a specific category
171 | 
172 |         Args:
173 |             category: The category name to generate report for
174 | 
175 |         Returns:
176 |             A dictionary containing the generated report
177 |         """
178 |         category_results = self._helper.read_category_results(category)
179 |         report = self._helper.generate_category_report(
180 |             research_content=self.research_content,
181 |             category=category,
182 |             category_resources=category_results
183 |         )
184 |         self._helper.save_category_report(category, report)
185 |         return report
186 | 
187 |     def generate_all_category_links(self) -> List[str]:
188 |         self._logger.info("Generating reference links for all categories")
189 |         """Generate links for all categories in the research plan
190 |         Iterates through each category in the research plan and generates a link for each one.
191 |         Returns:
192 |             A list of strings containing the generated links for each category
193 |         """
194 |         if not self.research_plan:
195 |             error_msg = "No research plan available"
196 |             self._logger.error(error_msg)
197 |             raise ValueError(error_msg)
198 | 
199 |         links = []
200 |         for category_data in self.research_plan:
201 |             category = category_data.get('category')
202 |             if not category:
203 |                 continue
204 | 
205 |             try:
206 |                 category_results = self._helper.read_category_results(category)
207 |                 for result in category_results:
208 |                     title = result['title']
209 |                     url = result['url']
210 |                     link = f"[{title}]({url})"
211 |                     links.append(link)
212 |             except Exception as e:
213 |                 self._logger.error(f"Error generating link for category {category}: {str(e)}")
214 |                 continue
215 | 
216 |         reference_content = "## Reference\n\n"
217 |         reference_content += '\n'.join(f'- {item}' for item in links)
218 |         file_path = f'output/{self.research_id}/{self.research_id}_reference.md'
219 | 
220 |         from deep_research.services.persistence_service import PersistenceClient
221 |         persistence_client = PersistenceClient()
222 |         persistence_client.save_file(file_path, reference_content)
223 | 
224 |         return links
225 | 
226 |     def generate_all_category_reports(self) -> List[Dict[str, Any]]:
227 |         self._logger.info("Generating reports for all research categories")
228 |         """Generate reports for all categories in the research plan
229 | 
230 |         Iterates through each category in the research plan and generates a report for each one.
231 | 
232 |         Returns:
233 |             A list of dictionaries containing the generated reports for each category
234 |         """
235 |         if not self.research_plan:
236 |             error_msg = "No research plan available"
237 |             self._logger.error(error_msg)
238 |             raise ValueError(error_msg)
239 | 
240 |         reports = []
241 |         for category_data in self.research_plan:
242 |             category = category_data.get('category')
243 |             if not category:
244 |                 continue
245 | 
246 |             try:
247 |                 report = self.generate_category_report(category)
248 |                 reports.append(report)
249 |             except Exception as e:
250 |                 self._logger.error(f"Error generating report for category {category}: {str(e)}")
251 |                 continue
252 | 
253 |         return reports
254 | 
255 |     def execute(self):
256 |         self._logger.info(f"Starting complete research execution for ID: {self.research_id}")
257 |         """Execute the research process
258 |         This method orchestrates the entire research process, including search, report generation, and saving.
259 |         """
260 |         # Step 1: Execute searches
261 |         self.execute_search()
262 |         # Step 2: Generate reports
263 |         self.generate_all_category_reports()
264 |         # Step 3: Generate links
265 |         self.generate_all_category_links()
266 | 
267 | 
268 | if __name__ == "__main__":
269 |     research = Research(research_id="RS_20250210_175342")
270 | 
271 | 
272 |     


--------------------------------------------------------------------------------
/src/deep_research/core/report.py:
--------------------------------------------------------------------------------
  1 | """Module for handling report generation operations
  2 | 
  3 | This module provides functionality for generating various types of research reports,
  4 | including WeChat articles, research reports, and detailed research reports.
  5 | It uses LLM models for content generation and handles file operations for saving reports.
  6 | """
  7 | 
  8 | from typing import Dict, Any, List
  9 | from deep_research.core.config import Config
 10 | from deep_research.services.persistence_service import PersistenceClient
 11 | from deep_research.utils.research_helper import ResearchHelper
 12 | from deep_research.services.ai_service import LLMClient
 13 | from deep_research.utils.log_util import LogUtil
 14 | import time
 15 | import os
 16 | from datetime import datetime
 17 | 
 18 | class Report:
 19 |     """Class for managing report generation operations
 20 |     
 21 |     This class handles the generation of different types of research reports,
 22 |     including WeChat articles and comprehensive research reports. It manages
 23 |     the loading of research data, report generation using LLM models, and
 24 |     saving of generated reports.
 25 |     """
 26 | 
 27 |     def __init__(self, research_id: str):
 28 |         """Initialize a new report instance
 29 | 
 30 |         Args:
 31 |             research_id: The ID of the research to generate reports for
 32 | 
 33 |         Raises:
 34 |             ValueError: If research data cannot be loaded for the given ID
 35 |         """
 36 |         self.research_id = research_id
 37 |         self._persistence_client = PersistenceClient()
 38 |         self._research_helper = ResearchHelper(research_id)
 39 |         self._logger = LogUtil().logger
 40 |         self._logger.info(f"Initializing Report instance for research ID: {research_id}")
 41 |         self._load_research_data()
 42 | 
 43 |     def _load_research_data(self) -> None:
 44 |         """Load research metadata from local storage
 45 |         
 46 |         Raises:
 47 |             ValueError: If research data cannot be loaded or is invalid
 48 |         """
 49 |         file_path = f'output/{self.research_id}/{self.research_id}_meta.json'
 50 |         try:
 51 |             self._logger.debug(f"Loading research data from {file_path}")
 52 |             data = self._persistence_client.load_json(file_path)
 53 |             self.research_content = data['research_content']
 54 |             self.research_plan = data['research_plan']
 55 |             self._logger.info("Successfully loaded research data")
 56 |         except Exception as e:
 57 |             error_msg = f"Failed to load research data for ID {self.research_id}: {str(e)}"
 58 |             self._logger.error(error_msg)
 59 |             raise ValueError(error_msg)
 60 | 
 61 | 
 62 |     def generate_wechat_article(self, model: str = Config.SMART_MODEL) -> str:
 63 |         """Generate a WeChat article based on research data
 64 | 
 65 |         This method generates a user-friendly article suitable for WeChat platform,
 66 |         incorporating research findings in an engaging and accessible format.
 67 | 
 68 |         Args:
 69 |             model: The LLM model to use for report generation
 70 | 
 71 |         Returns:
 72 |             The generated article content as a string
 73 | 
 74 |         Raises:
 75 |             ValueError: If no research plan is available
 76 |         """
 77 |         if not self.research_plan:
 78 |             self._logger.error("No research plan available for WeChat article generation")
 79 |             raise ValueError("No research plan available")
 80 | 
 81 |         self._logger.info(f"Starting WeChat article generation using model: {model}")
 82 |         reports = self._research_helper.read_category_reports()
 83 | 
 84 | 
 85 |         client = LLMClient()
 86 |         messages = [
 87 |                 {"role": "system", "content": f'''你的任务是根据课题和收集到的资料，编写一篇内容详实的微信公众号文章。 
 88 | 
 89 | 要求:
 90 | 
 91 | - {os.getenv('REPORT_WECHAT_PROMPT', "")}
 92 | - {os.getenv('REPORT_PROMPT', "")}
 93 | - 要有非常明确的态度和观点。
 94 | - 要有一个吸引眼球但不要太肤浅的标题
 95 | - 文本要具有亲和力，每个段落要有感情，要有深度，并添加你非常详细的解释涉及到的原理或者知识的背景。
 96 | - 不要虚构任何案例和内容，遵照文献里的内容进行交叉比对，思考深度含义。需要非常详细的观点称述，逻辑过程推演和讲解。
 97 | - 不要有太多奇怪的比喻。
 98 | - 内容有情绪，有代入感，文字通俗，偶尔使用 emoji。
 99 | - 文章最后要把所有相关的科学的依据罗列出来。
100 | 
101 | 
102 | ---
103 | 
104 | 话题和相关信息: 
105 | {self.research_content}
106 | 
107 | 
108 | 收集的资料和全文报告: 
109 | {reports}
110 | 
111 | ---
112 | 输出文章格式:
113 | 
114 | - Use [{Config.REPORT_LANG}]
115 | - Markdown format
116 | - with Key highlighted using ** bold
117 | - Title #
118 | - Section ## with insights
119 | - Subsection ### with lengthy explaination on each section
120 | 
121 | Provide your output in markdown format. 简体中文编写。
122 | 
123 |         '''}
124 |             ]
125 |         try:
126 |             report_content = client.chat_completion(messages, model=model, response_format='markdown')
127 |             model_name = self._research_helper.sanitize_filename(model)
128 |             file_path = f'output/{self.research_id}/{self.research_id}_{model_name}_wechat.md'
129 |             self._persistence_client.save_file(file_path, report_content)
130 |             self._logger.info(f"Successfully generated and saved WeChat article to {file_path}")
131 |             return report_content
132 |         except Exception as e:
133 |             error_msg = f"Failed to generate WeChat article: {str(e)}"
134 |             self._logger.error(error_msg)
135 |             raise
136 | 
137 | 
138 |     def generate_research_report(self, model=Config.SMART_MODEL) -> Dict[str, Any]:
139 |         """Generate a comprehensive research report
140 | 
141 |         This method creates a detailed research report that includes analysis,
142 |         insights, and explanations based on the collected literature.
143 | 
144 |         Args:
145 |             model: The LLM model to use for report generation
146 | 
147 |         Returns:
148 |             The generated report content
149 | 
150 |         Raises:
151 |             ValueError: If no research plan is available
152 |         """
153 |         if not self.research_plan:
154 |             self._logger.error("No research plan available for research report generation")
155 |             raise ValueError("No research plan available")
156 | 
157 |         self._logger.info(f"Starting research report generation using model: {model}")
158 |         reports = self._research_helper.read_category_reports()
159 | 
160 | 
161 |         client = LLMClient()
162 |         messages = [
163 |             {"role": "system", "content": f'''Your Task: Based on the provided literature and materials, your goal is to compile a comprehensive and detailed investigative report. 
164 |     The report provide extensive analysis, insights, and explanations to ensure sufficient length and depth. 
165 | 
166 |     Instructions:
167 | 
168 |     - {os.getenv('REPORT_PROMPT', "")}
169 |     - Always Focus on the research goal.
170 |     - Integrate the Literature: First, you need to integrate all the content from the provided literature. Avoid deleting or simplifying the information; instead, reorganize it logically with explain content for each. 
171 |     - Numbers and Statistics: Always leave the reference source together.
172 |     - Develop Insights: carefully analyze the content and the research topic to develop meaningful insights. These insights should go beyond what is explicitly mentioned in the literature and uncover new perspectives or implications.
173 |     - Do not mention numbers you don't have evidence to support or skip sections without actual numbers from literature.
174 |     - Use Tables or Mermaid Graphs to illustrate but only if needed.
175 | 
176 |     ---
177 | 
178 |     Research Topic: 
179 |     {self.research_content}
180 | 
181 | 
182 |     Collected Literatures: 
183 |     {reports}
184 | 
185 |     ---
186 |     Report format:
187 | 
188 |     - Use [{Config.REPORT_LANG}]
189 |     - Markdown format
190 |     - with Key highlighted using ** bold
191 |     - Title #
192 |     - Section ## with insights
193 |     - Subsection ### with lengthy explaination on each section
194 | 
195 | 
196 |     Provide your output in markdown format. 
197 | 
198 |     '''}
199 |         ]
200 |         try:
201 |             report_content = client.chat_completion(messages, model=model, response_format='markdown')
202 |             model_name = self._research_helper.sanitize_filename(model)
203 |             file_path = f'output/{self.research_id}/{self.research_id}_{model_name}_research.md'
204 |             persistence_client = PersistenceClient()
205 |             persistence_client.save_file(file_path, report_content)
206 |             self._logger.info(f"Successfully generated and saved research report to {file_path}")
207 |             return report_content
208 |         except Exception as e:
209 |             error_msg = f"Failed to generate research report: {str(e)}"
210 |             self._logger.error(error_msg)
211 |             raise
212 | 
213 | 
214 |     def generate_research_report_detailed(self, model=Config.REPORT_MODEL) -> Dict[str, Any]:
215 |         """Generate a detailed research report
216 | 
217 |         This method creates a comprehensive and detailed research report with
218 |         extensive analysis and insights from the collected literature.
219 | 
220 |         Args:
221 |             model: The LLM model to use for report generation
222 | 
223 |         Returns:
224 |             The generated detailed report content
225 | 
226 |         Raises:
227 |             ValueError: If no research plan is available
228 |         """
229 |         if not self.research_plan:
230 |             self._logger.error("No research plan available for detailed research report generation")
231 |             raise ValueError("No research plan available")
232 | 
233 |         self._logger.info(f"Starting detailed research report generation using model: {model}")
234 |         reports = self._research_helper.read_category_reports()
235 | 
236 | 
237 |         client = LLMClient()
238 |         messages = [
239 |             {"role": "system", "content": f'''Your Task: Based on the provided literature and materials, your goal is to compile a comprehensive and detailed investigative report. 
240 |  
241 |     Instructions:
242 | 
243 |     - {os.getenv('REPORT_PROMPT', "")}
244 |     - Always Focus on the research goal.
245 |     - Integrate the Literature: First, you need to integrate all the content from the provided literature. 
246 |     - Numbers and Statistics: Always leave the reference source together.
247 |     - Comprehensive and detail, organized structured report with logical section order, do not summarize.
248 |     - Conclusion with deepen insights
249 |     - Use Tables or Mermaid Graphs to illustrate but if needed.
250 |     
251 |     ---
252 | 
253 |     Research Topic: 
254 |     {self.research_content}
255 | 
256 | 
257 |     Collected Literatures: 
258 |     {reports}
259 | 
260 |     ---
261 |     Report format:
262 | 
263 |     - Use [{Config.REPORT_LANG}]
264 |     - Markdown format
265 |     - with Key highlighted using ** bold
266 |     - Title #
267 |     - Section ## with insights
268 |     - Subsection ### with detailed content
269 | 
270 |     Provide your output in markdown format. 
271 | 
272 |     '''}
273 |         ]
274 |         try:
275 |             report_content = client.chat_completion(messages, model=model, response_format='markdown')
276 |             model_name = self._research_helper.sanitize_filename(model)
277 |             file_path = f'output/{self.research_id}/{self.research_id}_{model_name}_detail_research.md'
278 |             persistence_client = PersistenceClient()
279 |             persistence_client.save_file(file_path, report_content)
280 |             self._logger.info(f"Successfully generated and saved detailed research report to {file_path}")
281 |             return report_content
282 |         except Exception as e:
283 |             error_msg = f"Failed to generate detailed research report: {str(e)}"
284 |             self._logger.error(error_msg)
285 |             raise
286 | 
287 | 
288 | if __name__ == "__main__":
289 |     research_id = "RS_20250210_214128"  # Example research ID
290 |     report = Report(research_id)
291 |     article_content = report.generate_wechat_article(model='deepseek/deepseek-r1')
292 | 


--------------------------------------------------------------------------------
/src/deep_research/utils/research_helper.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from typing import Dict, Any, List
  3 | import os
  4 | import glob
  5 | from deep_research.services.ai_service import LLMClient
  6 | from deep_research.services.search_service import SearchClient
  7 | from deep_research.services.persistence_service import PersistenceClient
  8 | from deep_research.core.config import Config
  9 | from deep_research.utils.log_util import LogUtil
 10 | from dotenv import load_dotenv
 11 | # Load environment variables
 12 | load_dotenv()
 13 | 
 14 | class ResearchHelper:
 15 |     """Class for managing research helper operations
 16 |     
 17 |     This class provides utility methods for handling various research operations,
 18 |     including translation, content generation, search execution, and result management.
 19 |     It integrates with various services (LLM, Search, Persistence) to provide a
 20 |     comprehensive research assistance functionality.
 21 |     """
 22 | 
 23 |     def __init__(self, research_id: str = None):
 24 |         """Initialize a new research helper instance
 25 | 
 26 |         Args:
 27 |             research_id: The ID of the research to work with
 28 |         """
 29 |         self._logger = LogUtil().logger
 30 |         self._logger.info(f"Initializing ResearchHelper with research_id: {research_id}")
 31 |         self.research_id = research_id
 32 |         self._llm_client = LLMClient()
 33 |         self._search_client = SearchClient()
 34 |         self._persistence_client = PersistenceClient()
 35 | 
 36 |     def translate_to_english(self, text: str, **kwargs: Any) -> Dict[str, str]:
 37 |         """Translate any text to English using the LLM model
 38 | 
 39 |         Args:
 40 |             text: The text to translate to English
 41 |             **kwargs: Additional parameters to pass to the API
 42 | 
 43 |         Returns:
 44 |             The English translation as a string in a dictionary format
 45 |         """
 46 |         self._logger.debug(f"Translating text to English: {text}")
 47 |         messages = [
 48 |             {"role": "user", "content": f"""You are a professional translator. Translate the [{text}] to English. Only return translated text in Json format: {{response:""}}"""}
 49 |         ]
 50 |         try:
 51 |             result = self._llm_client.chat_completion(messages, **kwargs)['response']
 52 |             self._logger.info(f"Successfully translated text to English")
 53 |             return result
 54 |         except Exception as e:
 55 |             self._logger.error(f"Failed to translate text: {str(e)}")
 56 |             raise
 57 | 
 58 |     def generate_research_content(self, text: str, **kwargs: Any) -> Dict[str, str]:
 59 |         """Generate research content based on the topic
 60 | 
 61 |         Args:
 62 |             text: The research topic
 63 |             **kwargs: Additional parameters to pass to the API
 64 | 
 65 |         Returns:
 66 |             A dictionary containing the research content
 67 |         """
 68 |         self._logger.debug(f"Generating research content for topic: {text}")
 69 |         messages = [
 70 |             {"role": "user", "content": f'''You are a research expert, to provide comprehensive framework of searching keywords for user to search information for research purpose.
 71 | 
 72 | User will provide a topic or target for research.
 73 | 
 74 | You will think about the topic or target, deep dive in the core question and target, define the scope of the research and goal and meaning of the research to help set up a solid background content of the whole research.
 75 | 
 76 | return your result in JSON format:
 77 | 
 78 | ```
 79 | {{
 80 |   original_topic: "",
 81 |   core_research_topic:"",
 82 |   research_scope:"",
 83 |   research_target:""
 84 | }}
 85 | ```
 86 | Research Topic: [{text}]'''}
 87 |         ]
 88 |         try:
 89 |             result = self._llm_client.smart_completion(messages, **kwargs)
 90 |             self._logger.info("Successfully generated research content")
 91 |             return result
 92 |         except Exception as e:
 93 |             self._logger.error(f"Failed to generate research content: {str(e)}")
 94 |             raise
 95 | 
 96 |     def generate_research_plan(self, research_content: Dict[str, str], **kwargs: Any) -> Dict[str, str]:
 97 |         """Generate a research plan based on the research content
 98 | 
 99 |         Args:
100 |             research_content: The research content to base the plan on
101 |             **kwargs: Additional parameters to pass to the API
102 | 
103 |         Returns:
104 |             A dictionary containing the research plan
105 |         """
106 |         self._logger.debug("Generating research plan")
107 |         messages = [
108 |             {"role": "user", "content": f'''You are a research planner, to provide comprehensive framework of searching keywords for user to search information for research purpose.
109 | 
110 | Based on below research information
111 | 
112 | - {os.getenv('RESEARCH_PLAN_PROMPT', "")}
113 | - you will work out a comprehensive list of queries for user to collect informations on Search Engines cover everything aspect of the research goal.
114 | - Query need to be specific to the research topic and category to narrow the results with lengthy description and keywords.
115 | - Less than {os.getenv('NUMBER_OF_QUERIES_IN_CATEGORY', "3")} queries each list 
116 | 
117 | ```
118 | {research_content}
119 | ```
120 | You will provide the research plan in below  in JSON format:
121 | 
122 | ```
123 | {{
124 |   research_plan: [
125 |     {{
126 |       category: "",
127 |       category_research_goal: "",
128 |       queries_list: ["",""]
129 |     }},
130 |     {{
131 |       category: "",
132 |       category_research_goal: "",
133 |       queries_list: ["",""]
134 |     }}
135 |   ]
136 | }}
137 | ```
138 | 
139 | rethink until you think the plan is comprehensive for finding the answer or support the research. Adjust or append if you think still missing some. 
140 | Provide output in pure JSON format.
141 | '''}
142 |         ]
143 |         try:
144 |             result = self._llm_client.smart_completion(messages, **kwargs)
145 |             self._logger.info("Successfully generated research plan")
146 |             return result
147 |         except Exception as e:
148 |             self._logger.error(f"Failed to generate research plan: {str(e)}")
149 |             raise
150 | 
151 |     def save_research_metadata(self, metadata: Dict[str, Any]) -> None:
152 |         """Save research metadata to a JSON file
153 | 
154 |         Args:
155 |             metadata: The metadata to save
156 |         """
157 |         output_file = f'output/{self.research_id}/{self.research_id}_meta.json'
158 |         self._logger.debug(f"Saving research metadata to {output_file}")
159 |         try:
160 |             result = self._persistence_client.save_json(metadata, output_file)
161 |             self._logger.info("Successfully saved research metadata")
162 |             return result
163 |         except Exception as e:
164 |             self._logger.error(f"Failed to save research metadata: {str(e)}")
165 |             raise
166 | 
167 |     def load_research_metadata(self) -> Dict[str, Any]:
168 |         """Load research metadata from a JSON file
169 | 
170 |         Returns:
171 |             A dictionary containing the research metadata
172 |         """
173 |         file_path = f'output/{self.research_id}/{self.research_id}_meta.json'
174 |         self._logger.debug(f"Loading research metadata from {file_path}")
175 |         try:
176 |             result = self._persistence_client.load_json(file_path)
177 |             self._logger.info("Successfully loaded research metadata")
178 |             return result
179 |         except Exception as e:
180 |             self._logger.error(f"Failed to load research metadata: {str(e)}")
181 |             raise
182 | 
183 |     def search_advanced(self, query: str, **kwargs: Any) -> Dict[str, Any]:
184 |         """Perform an advanced search
185 | 
186 |         Args:
187 |             query: The search query
188 |             **kwargs: Additional parameters to pass to the API
189 | 
190 |         Returns:
191 |             The search results
192 |         """
193 |         self._logger.debug(f"Executing advanced search with query: {query}")
194 |         try:
195 |             response = self._search_client.search_with_template(
196 |                 query=query,
197 |                 template_name="advanced"
198 |             )
199 |             self._logger.info("Successfully executed advanced search")
200 |             return response
201 |         except Exception as e:
202 |             self._logger.error(f"Failed to execute advanced search: {str(e)}")
203 |             raise
204 | 
205 |     def generate_category_report(self, research_content: Dict[str, str], category: str, category_resources: List[Dict[str, str]]) -> str:
206 |         """Generate a report for a specific category
207 | 
208 |         Args:
209 |             research_content: The research content
210 |             category: The category to generate a report for
211 |             category_resources: The resources for the category
212 | 
213 |         Returns:
214 |             The generated report
215 |         """
216 |         self._logger.info(f"Generating summary report for category: {category}")
217 |         messages = [
218 |             {"role": "user", "content": f'''You are a pro researcher. Current research topic is:
219 | 
220 | {research_content}
221 | 
222 | Under sub research category [{category}]
223 | 
224 | Please read all the collected resources and integrate into one comprehensive report.
225 | 
226 | Follow below:
227 | 
228 | - Based on the specified theme, please collect relevant literature and materials to generate a comprehensive report. 
229 | - The report should be lengthy and thorough, with each section fully elaborated to ensure no detail from the literature is overlooked.
230 | - Every section is written in a detailed and elaborate manner, with no omission of information.
231 | - The analysis integrates all relevant literature, avoiding any gaps or oversights.
232 | - structured the report
233 | 
234 | Collected resources to read:
235 | ```
236 | {category_resources}
237 | ```
238 | 
239 | Provide output in Markdown format.
240 | '''}
241 |         ]
242 |         try:
243 |             result = self._llm_client.long_completion(messages, response_format='markdown')
244 |             self._logger.info(f"Successfully generated report for category: {category}")
245 |             return result
246 |         except Exception as e:
247 |             self._logger.error(f"Failed to generate category report: {str(e)}")
248 |             raise
249 | 
250 |     @staticmethod
251 |     def sanitize_filename(name: str) -> str:
252 |         """Sanitize a string to be used as a filename
253 | 
254 |         Args:
255 |             name: The string to sanitize
256 | 
257 |         Returns:
258 |             A sanitized string safe for use as a filename
259 |         """
260 |         import re
261 |         sanitized = re.sub(r'[^\w\s-]', '', name)
262 |         sanitized = re.sub(r'[-\s]+', '_', sanitized)
263 |         return sanitized
264 | 
265 |     def save_search_results(self, category: str, query: str, results: Dict[str, Any]) -> Dict[str, Any]:
266 |         """Save search results to a JSON file
267 | 
268 |         Args:
269 |             category: The category name
270 |             query: The search query
271 |             results: The search results to save
272 | 
273 |         Returns:
274 |             A dictionary indicating success or error
275 |         """
276 |         sanitized_category = self.sanitize_filename(category)
277 |         sanitized_query = self.sanitize_filename(query)
278 |         output_path = f'output/{self.research_id}/{sanitized_category}'
279 |         output_file = f'{output_path}/{sanitized_query}.json'
280 | 
281 |         self._logger.debug(f"Saving search results to {output_file}")
282 |         try:
283 |             os.makedirs(output_path, exist_ok=True)
284 |             result = self._persistence_client.save_json(results, output_file)
285 |             self._logger.info("Successfully saved search results")
286 |             return result
287 |         except Exception as e:
288 |             self._logger.error(f"Failed to save search results: {str(e)}")
289 |             raise
290 | 
291 |     def save_category_report(self, category: str, report: str) -> None:
292 |         """Save a category report to a JSON file
293 | 
294 |         Args:
295 |             category: The category name
296 |             report: The report content
297 |         """
298 |         report_json = {
299 |             "category": category,
300 |             "report": report
301 |         }
302 |         sanitized_category = self.sanitize_filename(category)
303 |         file_path = f'output/{self.research_id}/{sanitized_category}_report.json'
304 | 
305 |         self._logger.debug(f"Saving category report to {file_path}")
306 |         try:
307 |             self._persistence_client.save_json(report_json, file_path)
308 |             self._logger.info(f"Successfully saved report for category: {category}")
309 |         except Exception as e:
310 |             self._logger.error(f"Failed to save category report: {str(e)}")
311 |             raise
312 | 
313 |     def read_category_results(self, category: str) -> List[Dict[str, str]]:
314 |         """Read results for a specific category
315 | 
316 |         Args:
317 |             category: The category name
318 | 
319 |         Returns:
320 |             A list of results for the category
321 |         """
322 |         sanitized_category = self.sanitize_filename(category)
323 |         category_path = f'output/{self.research_id}/{sanitized_category}'
324 | 
325 |         self._logger.debug(f"Reading category results from {category_path}")
326 |         results = []
327 | 
328 |         if not os.path.exists(category_path):
329 |             self._logger.warning(f"Category path does not exist: {category_path}")
330 |             return results
331 | 
332 |         json_files = glob.glob(os.path.join(category_path, '*.json'))
333 |         for json_file in json_files:
334 |             try:
335 |                 data = self._persistence_client.load_json(os.path.relpath(json_file))
336 |                 for result in data.get('results', []):
337 |                     title = result.get('title', '')
338 |                     content = result.get('raw_content') or result.get('content', '')
339 |                     url = result.get('url', '')
340 |                     if title and content and result.get('score', 0) > 0.6:
341 |                         results.append({
342 |                             'title': title,
343 |                             'url': url,
344 |                             'content': content
345 |                         })
346 |             except Exception as e:
347 |                 self._logger.error(f"Error processing file {json_file}: {str(e)}")
348 |                 continue
349 | 
350 |         self._logger.info(f"Successfully read {len(results)} results for category: {category}")
351 |         return results
352 | 
353 |     def read_category_reports(self) -> List[Dict[str, str]]:
354 |         """Read all category reports
355 | 
356 |         Returns:
357 |             A list of category reports
358 |         """
359 |         report_path = f'output/{self.research_id}'
360 |         self._logger.debug(f"Reading category reports from {report_path}")
361 |         reports = []
362 | 
363 |         if not os.path.exists(report_path):
364 |             self._logger.warning(f"Report path does not exist: {report_path}")
365 |             return reports
366 | 
367 |         json_files = glob.glob(os.path.join(report_path, '*_report.json'))
368 |         for json_file in json_files:
369 |             try:
370 |                 data = self._persistence_client.load_json(os.path.relpath(json_file))
371 |                 reports.append(data)
372 |             except Exception as e:
373 |                 self._logger.error(f"Error processing file {json_file}: {str(e)}")
374 |                 continue
375 | 
376 |         self._logger.info(f"Successfully read {len(reports)} category reports")
377 |         return reports
378 | 
379 |     


--------------------------------------------------------------------------------