├── requirements.txt ├── LICENSE ├── llm_config.py ├── llm_wrapper.py ├── web_scraper.py ├── strategic_analysis_parser.py ├── README.md ├── llm_response_parser.py ├── Web-LLM.py ├── Self_Improving_Search.py └── research_manager.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.31.0 2 | beautifulsoup4>=4.12.2 3 | colorama>=0.4.6 4 | python-dotenv>=1.0.0 5 | tenacity>=8.2.3 6 | tiktoken>=0.5.1 7 | urllib3>=2.1.0 8 | duckduckgo-search>=3.9.3 9 | selenium>=4.15.2 10 | webdriver-manager>=4.0.1 11 | fake-useragent>=1.4.0 12 | html2text>=2020.1.16 13 | markdownify>=0.11.6 14 | readability-lxml>=0.8.1 15 | pyyaml>=6.0.1 16 | rich>=13.7.0 17 | httpx>=0.25.1 18 | pytest>=7.4.3 19 | black>=23.11.0 20 | isort>=5.12.0 21 | flake8>=6.1.0 22 | mypy>=1.7.0 23 | llama-cpp-python 24 | windows-curses>=2.3.1 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 James Warburton 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /llm_config.py: -------------------------------------------------------------------------------- 1 | # llm_config.py 2 | 3 | LLM_TYPE = "ollama" # Options: 'llama_cpp', 'ollama' 4 | 5 | # LLM settings for llama_cpp 6 | MODEL_PATH = "/home/james/llama.cpp/models/gemma-2-9b-it-Q6_K.gguf" # Replace with your llama.cpp models filepath 7 | 8 | LLM_CONFIG_LLAMA_CPP = { 9 | "llm_type": "llama_cpp", 10 | "model_path": MODEL_PATH, 11 | "n_ctx": 20000, # context size 12 | "n_gpu_layers": 0, # number of layers to offload to GPU (-1 for all, 0 for none) 13 | "n_threads": 8, # number of threads to use 14 | "temperature": 0.7, # temperature for sampling 15 | "top_p": 0.9, # top p for sampling 16 | "top_k": 40, # top k for sampling 17 | "repeat_penalty": 1.1, # repeat penalty 18 | "max_tokens": 1024, # max tokens to generate 19 | "stop": ["User:", "\n\n"] # stop sequences 20 | } 21 | 22 | # LLM settings for Ollama 23 | LLM_CONFIG_OLLAMA = { 24 | "llm_type": "ollama", 25 | "base_url": "http://localhost:11434", # default Ollama server URL 26 | "model_name": "custom-phi3-32k-Q4_K_M", # Replace with your Ollama model name 27 | "temperature": 0.7, 28 | "top_p": 0.9, 29 | "n_ctx": 55000, 30 | "context_length": 55000, 31 | "stop": ["User:", "\n\n"] 32 | } 33 | 34 | def get_llm_config(): 35 | if LLM_TYPE == "llama_cpp": 36 | return LLM_CONFIG_LLAMA_CPP 37 | elif LLM_TYPE == "ollama": 38 | return LLM_CONFIG_OLLAMA 39 | else: 40 | raise ValueError(f"Invalid LLM_TYPE: {LLM_TYPE}") 41 | -------------------------------------------------------------------------------- /llm_wrapper.py: -------------------------------------------------------------------------------- 1 | from llama_cpp import Llama 2 | import requests 3 | import json 4 | from llm_config import get_llm_config 5 | 6 | class LLMWrapper: 7 | def __init__(self): 8 | self.llm_config = get_llm_config() 9 | self.llm_type = self.llm_config.get('llm_type', 'llama_cpp') 10 | if self.llm_type == 'llama_cpp': 11 | self.llm = self._initialize_llama_cpp() 12 | elif self.llm_type == 'ollama': 13 | self.base_url = self.llm_config.get('base_url', 'http://localhost:11434') 14 | self.model_name = self.llm_config.get('model_name', 'your_model_name') 15 | else: 16 | raise ValueError(f"Unsupported LLM type: {self.llm_type}") 17 | 18 | def _initialize_llama_cpp(self): 19 | return Llama( 20 | model_path=self.llm_config.get('model_path'), 21 | n_ctx=self.llm_config.get('n_ctx', 55000), 22 | n_gpu_layers=self.llm_config.get('n_gpu_layers', 0), 23 | n_threads=self.llm_config.get('n_threads', 8), 24 | verbose=False 25 | ) 26 | 27 | def generate(self, prompt, **kwargs): 28 | if self.llm_type == 'llama_cpp': 29 | llama_kwargs = self._prepare_llama_kwargs(kwargs) 30 | response = self.llm(prompt, **llama_kwargs) 31 | return response['choices'][0]['text'].strip() 32 | elif self.llm_type == 'ollama': 33 | return self._ollama_generate(prompt, **kwargs) 34 | else: 35 | raise ValueError(f"Unsupported LLM type: {self.llm_type}") 36 | 37 | def _ollama_generate(self, prompt, **kwargs): 38 | url = f"{self.base_url}/api/generate" 39 | data = { 40 | 'model': self.model_name, 41 | 'prompt': prompt, 42 | 'options': { 43 | 'temperature': kwargs.get('temperature', self.llm_config.get('temperature', 0.7)), 44 | 'top_p': kwargs.get('top_p', self.llm_config.get('top_p', 0.9)), 45 | 'stop': kwargs.get('stop', self.llm_config.get('stop', [])), 46 | 'num_predict': kwargs.get('max_tokens', self.llm_config.get('max_tokens', 55000)), 47 | 'context_length': self.llm_config.get('n_ctx', 55000) 48 | } 49 | } 50 | response = requests.post(url, json=data, stream=True) 51 | if response.status_code != 200: 52 | raise Exception(f"Ollama API request failed with status {response.status_code}: {response.text}") 53 | text = ''.join(json.loads(line)['response'] for line in response.iter_lines() if line) 54 | return text.strip() 55 | 56 | def _cleanup(self): 57 | """Force terminate any running LLM processes""" 58 | if self.llm_type == 'ollama': 59 | try: 60 | # Force terminate Ollama process 61 | requests.post(f"{self.base_url}/api/terminate") 62 | except: 63 | pass 64 | 65 | try: 66 | # Also try to terminate via subprocess if needed 67 | import subprocess 68 | subprocess.run(['pkill', '-f', 'ollama'], capture_output=True) 69 | except: 70 | pass 71 | 72 | def _prepare_llama_kwargs(self, kwargs): 73 | llama_kwargs = { 74 | 'max_tokens': kwargs.get('max_tokens', self.llm_config.get('max_tokens', 55000)), 75 | 'temperature': kwargs.get('temperature', self.llm_config.get('temperature', 0.7)), 76 | 'top_p': kwargs.get('top_p', self.llm_config.get('top_p', 0.9)), 77 | 'stop': kwargs.get('stop', self.llm_config.get('stop', [])), 78 | 'echo': False, 79 | } 80 | return llama_kwargs 81 | -------------------------------------------------------------------------------- /web_scraper.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | from urllib.robotparser import RobotFileParser 4 | from urllib.parse import urlparse, urljoin 5 | import time 6 | import logging 7 | from concurrent.futures import ThreadPoolExecutor, as_completed 8 | import re 9 | 10 | # Set up logging 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 12 | logger = logging.getLogger(__name__) 13 | 14 | class WebScraper: 15 | def __init__(self, user_agent="WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)", 16 | rate_limit=1, timeout=10, max_retries=3): 17 | self.session = requests.Session() 18 | self.session.headers.update({"User-Agent": user_agent}) 19 | self.robot_parser = RobotFileParser() 20 | self.rate_limit = rate_limit 21 | self.timeout = timeout 22 | self.max_retries = max_retries 23 | self.last_request_time = {} 24 | 25 | def can_fetch(self, url): 26 | parsed_url = urlparse(url) 27 | robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt" 28 | self.robot_parser.set_url(robots_url) 29 | try: 30 | self.robot_parser.read() 31 | return self.robot_parser.can_fetch(self.session.headers["User-Agent"], url) 32 | except Exception as e: 33 | logger.warning(f"Error reading robots.txt for {url}: {e}") 34 | return True # Assume allowed if robots.txt can't be read 35 | 36 | def respect_rate_limit(self, url): 37 | domain = urlparse(url).netloc 38 | current_time = time.time() 39 | if domain in self.last_request_time: 40 | time_since_last_request = current_time - self.last_request_time[domain] 41 | if time_since_last_request < self.rate_limit: 42 | time.sleep(self.rate_limit - time_since_last_request) 43 | self.last_request_time[domain] = time.time() 44 | 45 | def scrape_page(self, url): 46 | if not self.can_fetch(url): 47 | logger.info(f"Robots.txt disallows scraping: {url}") 48 | return None 49 | 50 | for attempt in range(self.max_retries): 51 | try: 52 | self.respect_rate_limit(url) 53 | response = self.session.get(url, timeout=self.timeout) 54 | response.raise_for_status() 55 | return self.extract_content(response.text, url) 56 | except requests.RequestException as e: 57 | logger.warning(f"Error scraping {url} (attempt {attempt + 1}/{self.max_retries}): {e}") 58 | if attempt == self.max_retries - 1: 59 | logger.error(f"Failed to scrape {url} after {self.max_retries} attempts") 60 | return None 61 | time.sleep(2 ** attempt) # Exponential backoff 62 | 63 | def extract_content(self, html, url): 64 | soup = BeautifulSoup(html, 'html.parser') 65 | 66 | # Remove unwanted elements 67 | for element in soup(["script", "style", "nav", "footer", "header"]): 68 | element.decompose() 69 | 70 | # Extract title 71 | title = soup.title.string if soup.title else "" 72 | 73 | # Try to find main content 74 | main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content') 75 | 76 | if main_content: 77 | paragraphs = main_content.find_all('p') 78 | else: 79 | paragraphs = soup.find_all('p') 80 | 81 | # Extract text from paragraphs 82 | text = ' '.join([p.get_text().strip() for p in paragraphs]) 83 | 84 | # If no paragraphs found, get all text 85 | if not text: 86 | text = soup.get_text() 87 | 88 | # Clean up whitespace 89 | text = re.sub(r'\s+', ' ', text).strip() 90 | 91 | # Extract and resolve links 92 | links = [urljoin(url, a['href']) for a in soup.find_all('a', href=True)] 93 | 94 | return { 95 | "url": url, 96 | "title": title, 97 | "content": text[:2400], # Limit to first 2400 characters 98 | "links": links[:10] # Limit to first 10 links 99 | } 100 | 101 | def scrape_multiple_pages(urls, max_workers=5): 102 | scraper = WebScraper() 103 | results = {} 104 | 105 | with ThreadPoolExecutor(max_workers=max_workers) as executor: 106 | future_to_url = {executor.submit(scraper.scrape_page, url): url for url in urls} 107 | for future in as_completed(future_to_url): 108 | url = future_to_url[future] 109 | try: 110 | data = future.result() 111 | if data: 112 | results[url] = data 113 | logger.info(f"Successfully scraped: {url}") 114 | else: 115 | logger.warning(f"Failed to scrape: {url}") 116 | except Exception as exc: 117 | logger.error(f"{url} generated an exception: {exc}") 118 | 119 | return results 120 | 121 | # Function to integrate with your main system 122 | def get_web_content(urls): 123 | scraped_data = scrape_multiple_pages(urls) 124 | return {url: data['content'] for url, data in scraped_data.items() if data} 125 | 126 | # Standalone can_fetch function 127 | def can_fetch(url): 128 | parsed_url = urlparse(url) 129 | robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt" 130 | rp = RobotFileParser() 131 | rp.set_url(robots_url) 132 | try: 133 | rp.read() 134 | return rp.can_fetch("*", url) 135 | except Exception as e: 136 | logger.warning(f"Error reading robots.txt for {url}: {e}") 137 | return True # Assume allowed if robots.txt can't be read 138 | 139 | if __name__ == "__main__": 140 | test_urls = [ 141 | "https://en.wikipedia.org/wiki/Web_scraping", 142 | "https://example.com", 143 | "https://www.python.org" 144 | ] 145 | scraped_content = get_web_content(test_urls) 146 | for url, content in scraped_content.items(): 147 | print(f"Content from {url}:") 148 | print(content[:500]) # Print first 500 characters 149 | print("\n---\n") 150 | -------------------------------------------------------------------------------- /strategic_analysis_parser.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict, Optional, Union 2 | import re 3 | import logging 4 | from dataclasses import dataclass 5 | from datetime import datetime 6 | 7 | @dataclass 8 | class ResearchFocus: 9 | """Represents a specific area of research focus""" 10 | area: str 11 | priority: int 12 | source_query: str = "" 13 | timestamp: str = "" 14 | search_queries: List[str] = None 15 | 16 | def __post_init__(self): 17 | if not self.timestamp: 18 | self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 19 | if self.search_queries is None: 20 | self.search_queries = [] 21 | 22 | @dataclass 23 | class AnalysisResult: 24 | """Contains the complete analysis result""" 25 | original_question: str 26 | focus_areas: List[ResearchFocus] 27 | raw_response: str 28 | timestamp: str = "" 29 | confidence_score: float = 0.0 30 | 31 | def __post_init__(self): 32 | if not self.timestamp: 33 | self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 34 | 35 | # Set up logging 36 | logger = logging.getLogger(__name__) 37 | 38 | class StrategicAnalysisParser: 39 | """Enhanced parser with improved pattern matching and validation""" 40 | def __init__(self): 41 | self.patterns = { 42 | 'original_question': [ 43 | r"(?i)original question analysis:\s*(.*?)(?=research gap|$)", 44 | r"(?i)original query:\s*(.*?)(?=research gap|$)", 45 | r"(?i)research question:\s*(.*?)(?=research gap|$)", 46 | r"(?i)topic analysis:\s*(.*?)(?=research gap|$)" 47 | ], 48 | 'research_gaps': [ 49 | r"(?i)research gaps?:\s*", 50 | r"(?i)gaps identified:\s*", 51 | r"(?i)areas for research:\s*", 52 | r"(?i)investigation areas:\s*" 53 | ], 54 | 'priority': [ 55 | r"(?i)priority:\s*(\d+)", 56 | r"(?i)priority level:\s*(\d+)", 57 | r"(?i)\(priority:\s*(\d+)\)", 58 | r"(?i)importance:\s*(\d+)" 59 | ] 60 | } 61 | self.logger = logging.getLogger(__name__) 62 | 63 | def parse_analysis(self, llm_response: str) -> Optional[AnalysisResult]: 64 | """Main parsing method with improved validation""" 65 | try: 66 | # Clean and normalize the response 67 | cleaned_response = self._clean_text(llm_response) 68 | 69 | # Extract original question with validation 70 | original_question = self._extract_original_question(cleaned_response) 71 | if not original_question: 72 | self.logger.warning("Failed to extract original question") 73 | original_question = "Original question extraction failed" 74 | 75 | # Extract and validate research areas 76 | focus_areas = self._extract_research_areas(cleaned_response) 77 | focus_areas = self._normalize_focus_areas(focus_areas) 78 | 79 | # Calculate confidence score 80 | confidence_score = self._calculate_confidence_score(original_question, focus_areas) 81 | 82 | return AnalysisResult( 83 | original_question=original_question, 84 | focus_areas=focus_areas, 85 | raw_response=llm_response, 86 | confidence_score=confidence_score 87 | ) 88 | 89 | except Exception as e: 90 | self.logger.error(f"Error in parse_analysis: {str(e)}") 91 | return None 92 | 93 | def _clean_text(self, text: str) -> str: 94 | """Clean and normalize text for parsing""" 95 | text = re.sub(r'\n{3,}', '\n\n', text) 96 | text = re.sub(r'\s{2,}', ' ', text) 97 | text = re.sub(r'(\d+\))', r'\1.', text) 98 | return text.strip() 99 | 100 | def _extract_original_question(self, text: str) -> str: 101 | """Extract original question with improved matching""" 102 | for pattern in self.patterns['original_question']: 103 | match = re.search(pattern, text, re.DOTALL) 104 | if match: 105 | return self._clean_text(match.group(1)) 106 | return "" 107 | 108 | def _extract_research_areas(self, text: str) -> List[ResearchFocus]: 109 | """Extract research areas with enhanced validation""" 110 | areas = [] 111 | for pattern in self.patterns['research_gaps']: 112 | gap_match = re.search(pattern, text) 113 | if gap_match: 114 | sections = re.split(r'\n\s*\d+[\.)]\s+', text[gap_match.end():]) 115 | sections = [s for s in sections if s.strip()] 116 | 117 | for section in sections: 118 | focus = self._parse_research_focus(section) 119 | if focus and self._is_valid_focus(focus): 120 | areas.append(focus) 121 | break 122 | return areas 123 | 124 | def _parse_research_focus(self, text: str) -> Optional[ResearchFocus]: 125 | """Parse research focus with improved validation without reasoning.""" 126 | try: 127 | # Extract area 128 | area = text.split('\n')[0].strip() 129 | 130 | # Extract and validate priority 131 | priority = self._extract_priority(text) 132 | 133 | # Return ResearchFocus without reasoning 134 | return ResearchFocus( 135 | area=area, 136 | priority=priority 137 | ) 138 | 139 | except Exception as e: 140 | self.logger.error(f"Error parsing research focus: {str(e)}") 141 | return None 142 | 143 | def _extract_priority(self, text: str) -> int: 144 | """Extract priority with validation""" 145 | for pattern in self.patterns['priority']: 146 | priority_match = re.search(pattern, text) 147 | if priority_match: 148 | try: 149 | priority = int(priority_match.group(1)) 150 | return max(1, min(5, priority)) 151 | except ValueError: 152 | continue 153 | return 3 # Default priority 154 | 155 | def _is_valid_focus(self, focus: ResearchFocus) -> bool: 156 | """Validate research focus completeness and quality""" 157 | if not focus.area: # Only check if area exists and isn't empty 158 | return False 159 | if focus.priority < 1 or focus.priority > 5: 160 | return False 161 | return True 162 | 163 | def _normalize_focus_areas(self, areas: List[ResearchFocus]) -> List[ResearchFocus]: 164 | """Normalize and validate focus areas""" 165 | normalized = [] 166 | for area in areas: 167 | if not area.area.strip(): 168 | continue 169 | 170 | area.priority = max(1, min(5, area.priority)) 171 | 172 | if self._is_valid_focus(area): 173 | normalized.append(area) 174 | 175 | # Sort by priority (highest first) but don't add any filler areas 176 | normalized.sort(key=lambda x: x.priority, reverse=True) 177 | 178 | return normalized 179 | 180 | def _calculate_confidence_score(self, question: str, areas: List[ResearchFocus]) -> float: 181 | """Calculate confidence score for analysis quality""" 182 | score = 0.0 183 | 184 | # Question quality (0.3) 185 | if question and len(question.split()) >= 3: 186 | score += 0.3 187 | 188 | # Areas quality (0.7) 189 | if areas: 190 | # Valid areas ratio (0.35) - now based on proportion that are valid vs total 191 | num_areas = len(areas) 192 | if num_areas > 0: # Avoid division by zero 193 | valid_areas = sum(1 for a in areas if self._is_valid_focus(a)) 194 | score += 0.35 * (valid_areas / num_areas) 195 | 196 | # Priority distribution (0.35) - now based on having different priorities 197 | if num_areas > 0: # Avoid division by zero 198 | unique_priorities = len(set(a.priority for a in areas)) 199 | score += 0.35 * (unique_priorities / num_areas) 200 | 201 | return round(score, 2) 202 | 203 | def format_analysis_result(self, result: AnalysisResult) -> str: 204 | """Format analysis result for display without reasoning.""" 205 | formatted = [ 206 | "Strategic Analysis Result", 207 | "=" * 80, 208 | f"\nOriginal Question Analysis:\n{result.original_question}\n", 209 | f"Analysis Confidence Score: {result.confidence_score}", 210 | "\nResearch Focus Areas:" 211 | ] 212 | 213 | for i, focus in enumerate(result.focus_areas, 1): 214 | formatted.extend([ 215 | f"\n{i}. {focus.area}", 216 | f" Priority: {focus.priority}" 217 | ]) 218 | 219 | return "\n".join(formatted) 220 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automated-AI-Web-Researcher-Ollama (Windows) 2 | # This is a Windows-optimized version of the Automated-AI-Web-Researcher-Ollama, modified from the original Unix-based implementation to provide full Windows compatibility. 3 | 4 | 5 | ## Description 6 | Automated-AI-Web-Researcher is an innovative research assistant that leverages locally-run large language models through Ollama to conduct thorough, automated online research on any given topic or question. Unlike traditional LLM interactions, this tool actually performs structured research by breaking down queries into focused research areas, systematically investigating via web searching and then scraping of relevant websites each area, and compiling it's findings all saved automatically into a text document with all content found and links for the source of each, and whenever you want it to stop it's research you can input a command which then results in the research terminating and the LLM reviewing all the content it found and providing a comprehensive final summary to your original topic or question, and then you can also ask the LLM questions about it's research findings if you would like. 7 | 8 | ## Project Demonstration 9 | 10 | [![My Project Demo](https://img.youtube.com/vi/hS7Q1B8N1mQ/0.jpg)](https://youtu.be/hS7Q1B8N1mQ "My Project Demo") 11 | 12 | Click the image above to watch the demonstration of My Project. 13 | 14 | ## Here's how it works: 15 | 16 | 1. You provide a research query (e.g., "What year will global population begin to decrease rather than increase according to research?") 17 | 2. The LLM analyzes your query and generates 5 specific research focus areas, each with assigned priorities based on relevance to the topic or question. 18 | 3. Starting with the highest priority area, the LLM: 19 | - Formulates targeted search queries 20 | - Performs web searches 21 | - Analyzes search results selecting the most relevant web pages 22 | - Scrapes and extracts relevant information for selected web pages 23 | - Documents all content it has found during the research session into a research text file including links to websites that the content was retrieved from 24 | 4. After investigating all focus areas, the LLM based on information is found generates new focus areas, and repeating it's research cycle, often finding new relevant focus areas based on findings in research it has previously found leading to interesting and novel research focuses in some cases. 25 | 5. You can let it research as long as you would like at any time being able to input a quit command which then stops the research and causes the LLM to review all the content collected so far in full and generate a comprehensive summary to respond to your original query or topic. 26 | 6. Then the LLM will enter a conversation mode where you can ask specific questions about the research findings if desired. 27 | 28 | The key distinction is that this isn't just a chatbot - it's an automated research assistant that methodically investigates topics and maintains a documented research trail all from a single question or topic of your choosing, and depending on your system and model can do over a hundred searches and content retrievals in a relatively short amount of time, you can leave it running and come back to a full text document with over a hundred pieces of content from relevant websites, and then have it summarise the findings and then even ask it questions about what it found. 29 | 30 | ## Features 31 | - Automated research planning with prioritized focus areas 32 | - Systematic web searching and content analysis 33 | - All research content and source URLs saved into a detailed text document 34 | - Research summary generation 35 | - Post-research Q&A capability about findings 36 | - Self-improving search mechanism 37 | - Rich console output with status indicators 38 | - Comprehensive answer synthesis using web-sourced information 39 | - Research conversation mode for exploring findings 40 | 41 | ## Installation on Windows 42 | 43 | Python 3.11 with a virtual environment is recommended. 44 | 45 | 1. Clone the repository: 46 | 47 | ```sh 48 | git clone https://github.com/hafeezhmha/Automated-AI-Web-Researcher-Ollama.git 49 | cd Automated-AI-Web-Researcher-Ollama 50 | ``` 51 | 52 | 2. Create and activate a virtual environment: 53 | 54 | ```sh 55 | python -m venv venv 56 | venv\Scripts\activate 57 | ``` 58 | 59 | 3. Install dependencies: 60 | 61 | ```sh 62 | pip install -r requirements.txt 63 | ``` 64 | 65 | 4. Install and Configure Ollama: 66 | - Install Ollama following instructions at https://ollama.ai 67 | - Using your selected model file, create a custom model variant with the required context length 68 | (phi3:3.8b-mini-128k-instruct or phi3:14b-medium-128k-instruct are recommended) 69 | 70 | Create a file named `modelfile` with these exact contents: 71 | 72 | ``` 73 | FROM your-model-name 74 | 75 | PARAMETER num_ctx 38000 76 | ``` 77 | 78 | Replace "your-model-name" with your chosen model (e.g., phi3:medium-128k or phi3:medium). 79 | 80 | Then create the model: 81 | 82 | ```sh 83 | ollama create research-phi3 -f modelfile 84 | ``` 85 | 86 | Note: This specific configuration is necessary as recent Ollama versions have reduced context windows on models like phi3:3.8b-mini-128k-instruct despite the name suggesing high context which is why the modelfile step is necessary due to the high amount of information being used during the research process. 87 | 88 | ## Usage 89 | 90 | 1. Start Ollama: 91 | 92 | ```sh 93 | ollama serve 94 | ``` 95 | 96 | 2. Run the researcher: 97 | 98 | ```sh 99 | python Web-LLM.py 100 | ``` 101 | 102 | 3. Start a research session: 103 | - Type `@` followed by your research query 104 | - Press CTRL+Z to submit 105 | - Example: `@What year is global population projected to start declining?` 106 | 107 | 4. During research you can use the following commands by typing the letter associated with each and submitting with CTRL+Z: 108 | - Use `s` to show status. 109 | - Use `f` to show current focus. 110 | - Use `p` to pause and assess research progress, which will give you an assessment from the LLM after reviewing the entire research content whether it can answer your query or not with the content it has so far collected, then it waits for you to input one of two commands, `c` to continue with the research or `q` to terminate it which will result in a summary like if you terminated it without using the pause feature. 111 | - Use `q` to quit research. 112 | 113 | 5. After research completes: 114 | - Wait for the summary to be generated, and review the LLM's findings. 115 | - Enter conversation mode to ask specific questions about the findings. 116 | - Access the detailed research content found, avaliable in the in a research session text file which will appear in the programs directory, which includes: 117 | * All retrieved content 118 | * Source URLs for all information 119 | * Focus areas investigated 120 | * Generated summary 121 | 122 | ## Configuration 123 | 124 | The LLM settings can be modified in `llm_config.py`. You must specify your model name in the configuration for the researcher to function. The default configuration is optimized for research tasks with the specified Phi-3 model. 125 | 126 | ## Current Status 127 | This is a prototype that demonstrates functional automated research capabilities. While still in development, it successfully performs structured research tasks. Currently tested and working well with the phi3:3.8b-mini-128k-instruct model when the context is set as advised previously. 128 | 129 | ## Dependencies 130 | - Ollama 131 | - Python packages listed in requirements.txt 132 | - Recommended model: phi3:3.8b-mini-128k-instruct or phi3:14b-medium-128k-instruct (with custom context length as specified) 133 | 134 | ## Contributing 135 | Contributions are welcome! This is a prototype with room for improvements and new features. 136 | 137 | ## License 138 | This project is licensed under the MIT License - see the [LICENSE] file for details. 139 | 140 | ## Acknowledgments 141 | - Ollama team for their local LLM runtime 142 | - DuckDuckGo for their search API 143 | 144 | ## Personal Note 145 | This tool represents an attempt to bridge the gap between simple LLM interactions and genuine research capabilities. By structuring the research process and maintaining documentation, it aims to provide more thorough and verifiable results than traditional LLM conversations. It also represents an attempt to improve on my previous project 'Web-LLM-Assistant-Llamacpp-Ollama' which simply gave LLM's the ability to search and scrape websites to answer questions. This new program, unlike it's predecessor I feel thos program takes that capability and uses it in a novel and actually very useful way, I feel that it is the most advanced and useful way I could conceive of building on my previous program, as a very new programmer this being my second ever program I feel very good about the result, I hope that it hits the mark! 146 | Given how much I have now been using it myself, unlike the previous program which felt more like a novelty then an actual tool, this is actually quite useful and unique, but I am quite biased! 147 | 148 | Please enjoy! and feel free to submit any suggestions for improvements, so that we can make this automated AI researcher even more capable. 149 | 150 | ## Disclaimer 151 | This project is for educational purposes only. Ensure you comply with the terms of service of all APIs and services used. 152 | -------------------------------------------------------------------------------- /llm_response_parser.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List, Union, Optional 3 | import logging 4 | import json 5 | from strategic_analysis_parser import StrategicAnalysisParser, AnalysisResult, ResearchFocus 6 | 7 | # Set up logging 8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 9 | logger = logging.getLogger(__name__) 10 | 11 | class UltimateLLMResponseParser: 12 | def __init__(self): 13 | self.decision_keywords = { 14 | 'refine': ['refine', 'need more info', 'insufficient', 'unclear', 'more research', 'additional search'], 15 | 'answer': ['answer', 'sufficient', 'enough info', 'can respond', 'adequate', 'comprehensive'] 16 | } 17 | self.section_identifiers = [ 18 | ('decision', r'(?i)decision\s*:'), 19 | ('reasoning', r'(?i)reasoning\s*:'), 20 | ('selected_results', r'(?i)selected results\s*:'), 21 | ('response', r'(?i)response\s*:') 22 | ] 23 | # Initialize strategic analysis parser 24 | self.strategic_parser = StrategicAnalysisParser() 25 | 26 | def parse_llm_response(self, response: str, mode: str = 'search') -> Dict[str, Union[str, List[int], AnalysisResult]]: 27 | """ 28 | Parse LLM response based on mode 29 | 30 | Args: 31 | response (str): The LLM's response text 32 | mode (str): 'search' for web search, 'research' for strategic analysis 33 | 34 | Returns: 35 | Dict containing parsed response 36 | """ 37 | logger.info(f"Starting to parse LLM response in {mode} mode") 38 | 39 | if mode == 'research': 40 | return self._parse_research_response(response) 41 | 42 | # Original search mode parsing 43 | result = { 44 | 'decision': None, 45 | 'reasoning': None, 46 | 'selected_results': [], 47 | 'response': None 48 | } 49 | 50 | parsing_strategies = [ 51 | self._parse_structured_response, 52 | self._parse_json_response, 53 | self._parse_unstructured_response, 54 | self._parse_implicit_response 55 | ] 56 | 57 | for strategy in parsing_strategies: 58 | try: 59 | parsed_result = strategy(response) 60 | if self._is_valid_result(parsed_result): 61 | result.update(parsed_result) 62 | logger.info(f"Successfully parsed using strategy: {strategy.__name__}") 63 | break 64 | except Exception as e: 65 | logger.warning(f"Error in parsing strategy {strategy.__name__}: {str(e)}") 66 | 67 | if not self._is_valid_result(result): 68 | logger.warning("All parsing strategies failed. Using fallback parsing.") 69 | result = self._fallback_parsing(response) 70 | 71 | result = self._post_process_result(result) 72 | 73 | logger.info("Finished parsing LLM response") 74 | return result 75 | 76 | def _parse_research_response(self, response: str) -> Dict[str, Union[str, AnalysisResult]]: 77 | """Handle research mode specific parsing""" 78 | try: 79 | analysis_result = self.strategic_parser.parse_analysis(response) 80 | if analysis_result: 81 | return { 82 | 'mode': 'research', 83 | 'analysis_result': analysis_result, 84 | 'error': None 85 | } 86 | else: 87 | logger.error("Failed to parse strategic analysis") 88 | return { 89 | 'mode': 'research', 90 | 'analysis_result': None, 91 | 'error': 'Failed to parse strategic analysis' 92 | } 93 | except Exception as e: 94 | logger.error(f"Error in research response parsing: {str(e)}") 95 | return { 96 | 'mode': 'research', 97 | 'analysis_result': None, 98 | 'error': str(e) 99 | } 100 | 101 | def parse_search_query(self, query_response: str) -> Dict[str, str]: 102 | """Parse search query formulation response""" 103 | try: 104 | lines = query_response.strip().split('\n') 105 | result = { 106 | 'query': '', 107 | 'time_range': 'none' 108 | } 109 | 110 | for line in lines: 111 | if ':' in line: 112 | key, value = line.split(':', 1) 113 | key = key.strip().lower() 114 | value = value.strip() 115 | 116 | if 'query' in key: 117 | result['query'] = self._clean_query(value) 118 | elif 'time' in key or 'range' in key: 119 | result['time_range'] = self._validate_time_range(value) 120 | 121 | return result 122 | except Exception as e: 123 | logger.error(f"Error parsing search query: {str(e)}") 124 | return {'query': '', 'time_range': 'none'} 125 | 126 | def _parse_structured_response(self, response: str) -> Dict[str, Union[str, List[int]]]: 127 | result = {} 128 | for key, pattern in self.section_identifiers: 129 | match = re.search(f'{pattern}(.*?)(?={"|".join([p for k, p in self.section_identifiers if k != key])}|$)', 130 | response, re.IGNORECASE | re.DOTALL) 131 | if match: 132 | result[key] = match.group(1).strip() 133 | 134 | if 'selected_results' in result: 135 | result['selected_results'] = self._extract_numbers(result['selected_results']) 136 | 137 | return result 138 | 139 | def _parse_json_response(self, response: str) -> Dict[str, Union[str, List[int]]]: 140 | try: 141 | json_match = re.search(r'\{.*\}', response, re.DOTALL) 142 | if json_match: 143 | json_str = json_match.group(0) 144 | parsed_json = json.loads(json_str) 145 | return {k: v for k, v in parsed_json.items() 146 | if k in ['decision', 'reasoning', 'selected_results', 'response']} 147 | except json.JSONDecodeError: 148 | pass 149 | return {} 150 | 151 | def _parse_unstructured_response(self, response: str) -> Dict[str, Union[str, List[int]]]: 152 | result = {} 153 | lines = response.split('\n') 154 | current_section = None 155 | 156 | for line in lines: 157 | section_match = re.match(r'(.+?)[:.-](.+)', line) 158 | if section_match: 159 | key = self._match_section_to_key(section_match.group(1)) 160 | if key: 161 | current_section = key 162 | result[key] = section_match.group(2).strip() 163 | elif current_section: 164 | result[current_section] += ' ' + line.strip() 165 | 166 | if 'selected_results' in result: 167 | result['selected_results'] = self._extract_numbers(result['selected_results']) 168 | 169 | return result 170 | 171 | def _parse_implicit_response(self, response: str) -> Dict[str, Union[str, List[int]]]: 172 | result = {} 173 | 174 | decision = self._infer_decision(response) 175 | if decision: 176 | result['decision'] = decision 177 | 178 | numbers = self._extract_numbers(response) 179 | if numbers: 180 | result['selected_results'] = numbers 181 | 182 | if not result: 183 | result['response'] = response.strip() 184 | 185 | return result 186 | 187 | def _fallback_parsing(self, response: str) -> Dict[str, Union[str, List[int]]]: 188 | return { 189 | 'decision': self._infer_decision(response), 190 | 'reasoning': None, 191 | 'selected_results': self._extract_numbers(response), 192 | 'response': response.strip() 193 | } 194 | 195 | def _post_process_result(self, result: Dict[str, Union[str, List[int]]]) -> Dict[str, Union[str, List[int]]]: 196 | if result['decision'] not in ['refine', 'answer']: 197 | result['decision'] = self._infer_decision(str(result)) 198 | 199 | if not isinstance(result['selected_results'], list): 200 | result['selected_results'] = self._extract_numbers(str(result['selected_results'])) 201 | 202 | result['selected_results'] = result['selected_results'][:2] 203 | 204 | if not result['reasoning']: 205 | result['reasoning'] = f"Based on the {'presence' if result['selected_results'] else 'absence'} of selected results and the overall content." 206 | 207 | if not result['response']: 208 | result['response'] = result.get('reasoning', 'No clear response found.') 209 | 210 | return result 211 | 212 | def _match_section_to_key(self, section: str) -> Optional[str]: 213 | for key, pattern in self.section_identifiers: 214 | if re.search(pattern, section, re.IGNORECASE): 215 | return key 216 | return None 217 | 218 | def _extract_numbers(self, text: str) -> List[int]: 219 | return [int(num) for num in re.findall(r'\b(?:10|[1-9])\b', text)] 220 | 221 | def _infer_decision(self, text: str) -> str: 222 | text = text.lower() 223 | refine_score = sum(text.count(keyword) for keyword in self.decision_keywords['refine']) 224 | answer_score = sum(text.count(keyword) for keyword in self.decision_keywords['answer']) 225 | return 'refine' if refine_score > answer_score else 'answer' 226 | 227 | def _is_valid_result(self, result: Dict[str, Union[str, List[int]]]) -> bool: 228 | return bool(result.get('decision') or result.get('response') or result.get('selected_results')) 229 | 230 | def _clean_query(self, query: str) -> str: 231 | """Clean and validate search query""" 232 | query = re.sub(r'["\'\[\]]', '', query) 233 | query = re.sub(r'\s+', ' ', query) 234 | return query.strip()[:100] 235 | 236 | def _validate_time_range(self, time_range: str) -> str: 237 | """Validate time range value""" 238 | valid_ranges = ['d', 'w', 'm', 'y', 'none'] 239 | time_range = time_range.lower() 240 | return time_range if time_range in valid_ranges else 'none' 241 | -------------------------------------------------------------------------------- /Web-LLM.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from colorama import init, Fore, Style 4 | import logging 5 | import time 6 | from io import StringIO 7 | from Self_Improving_Search import EnhancedSelfImprovingSearch 8 | from llm_config import get_llm_config 9 | from llm_response_parser import UltimateLLMResponseParser 10 | from llm_wrapper import LLMWrapper 11 | from strategic_analysis_parser import StrategicAnalysisParser 12 | from research_manager import ResearchManager 13 | 14 | # Initialize colorama 15 | if os.name == 'nt': # Windows-specific initialization 16 | init(convert=True, strip=False, wrap=True) 17 | else: 18 | init() 19 | 20 | # Set up logging 21 | log_directory = 'logs' 22 | if not os.path.exists(log_directory): 23 | os.makedirs(log_directory) 24 | 25 | logger = logging.getLogger(__name__) 26 | logger.setLevel(logging.INFO) 27 | log_file = os.path.join(log_directory, 'web_llm.log') 28 | file_handler = logging.FileHandler(log_file) 29 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 30 | file_handler.setFormatter(formatter) 31 | logger.handlers = [] 32 | logger.addHandler(file_handler) 33 | logger.propagate = False 34 | 35 | # Disable other loggers 36 | for name in logging.root.manager.loggerDict: 37 | if name != __name__: 38 | logging.getLogger(name).disabled = True 39 | 40 | class OutputRedirector: 41 | def __init__(self, stream=None): 42 | self.stream = stream or StringIO() 43 | self.original_stdout = sys.stdout 44 | self.original_stderr = sys.stderr 45 | 46 | def __enter__(self): 47 | sys.stdout = self.stream 48 | sys.stderr = self.stream 49 | return self.stream 50 | 51 | def __exit__(self, exc_type, exc_val, exc_tb): 52 | sys.stdout = self.original_stdout 53 | sys.stderr = self.original_stderr 54 | 55 | def print_header(): 56 | print(Fore.CYAN + Style.BRIGHT + """ 57 | ╔══════════════════════════════════════════════════════════╗ 58 | ║ 🌐 Advanced Research Assistant 🤖 ║ 59 | ╚══════════════════════════════════════════════════════════╝ 60 | """ + Style.RESET_ALL) 61 | print(Fore.YELLOW + """ 62 | Welcome to the Advanced Research Assistant! 63 | 64 | Usage: 65 | - Start your research query with '@' 66 | Example: "@analyze the impact of AI on healthcare" 67 | 68 | Press CTRL+Z (Windows) to submit input. 69 | """ + Style.RESET_ALL) 70 | 71 | def get_multiline_input() -> str: 72 | """Get multiline input using msvcrt for Windows""" 73 | print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+Z to submit):{Style.RESET_ALL}") 74 | lines = [] 75 | current_line = [] 76 | 77 | import msvcrt 78 | 79 | try: 80 | while True: 81 | if msvcrt.kbhit(): 82 | char = msvcrt.getch() 83 | 84 | # CTRL+Z detection (Windows EOF) 85 | if char == b'\x1a': # ASCII code for CTRL+Z 86 | sys.stdout.write('\n') # New line for clean display 87 | if current_line: 88 | lines.append(''.join(current_line)) 89 | result = ''.join(lines).strip() if lines else ''.join(current_line).strip() 90 | return result if result else '' # Return empty string instead of None 91 | 92 | # Handle special characters 93 | elif char in [b'\r', b'\n']: # Enter 94 | sys.stdout.write('\n') 95 | if current_line: # Only append if there's content 96 | lines.append(''.join(current_line)) 97 | current_line = [] 98 | 99 | elif char == b'\x08': # Backspace 100 | if current_line: 101 | current_line.pop() 102 | sys.stdout.write('\b \b') # Erase character 103 | 104 | elif char == b'\x03': # CTRL+C 105 | sys.stdout.write('\n') 106 | return 'q' 107 | 108 | # Normal character 109 | elif 32 <= ord(char) <= 126: # Printable characters 110 | current_line.append(char.decode('utf-8')) 111 | sys.stdout.write(char.decode('utf-8')) 112 | 113 | # Flush output 114 | sys.stdout.flush() 115 | 116 | except Exception as e: 117 | logger.error(f"Error in multiline input: {str(e)}") 118 | return 'q' 119 | 120 | def initialize_system(): 121 | """Initialize system with proper error checking""" 122 | try: 123 | print(Fore.YELLOW + "Initializing system..." + Style.RESET_ALL) 124 | 125 | llm_config = get_llm_config() 126 | if llm_config['llm_type'] == 'ollama': 127 | import requests 128 | try: 129 | response = requests.get(llm_config['base_url'], timeout=5) 130 | if response.status_code != 200: 131 | raise ConnectionError("Cannot connect to Ollama server") 132 | except requests.exceptions.RequestException: 133 | raise ConnectionError( 134 | "\nCannot connect to Ollama server!" 135 | "\nPlease ensure:" 136 | "\n1. Ollama is installed" 137 | "\n2. Ollama server is running (try 'ollama serve')" 138 | "\n3. The model specified in llm_config.py is pulled" 139 | ) 140 | elif llm_config['llm_type'] == 'llama_cpp': 141 | model_path = llm_config.get('model_path') 142 | if not model_path or not os.path.exists(model_path): 143 | raise FileNotFoundError( 144 | f"\nLLama.cpp model not found at: {model_path}" 145 | "\nPlease ensure model path in llm_config.py is correct" 146 | ) 147 | 148 | with OutputRedirector() as output: 149 | llm_wrapper = LLMWrapper() 150 | try: 151 | test_response = llm_wrapper.generate("Test", max_tokens=10) 152 | if not test_response: 153 | raise ConnectionError("LLM failed to generate response") 154 | except Exception as e: 155 | raise ConnectionError(f"LLM test failed: {str(e)}") 156 | 157 | parser = UltimateLLMResponseParser() 158 | search_engine = EnhancedSelfImprovingSearch(llm_wrapper, parser) 159 | research_manager = ResearchManager(llm_wrapper, parser, search_engine) 160 | 161 | print(Fore.GREEN + "System initialized successfully." + Style.RESET_ALL) 162 | return llm_wrapper, parser, search_engine, research_manager 163 | except Exception as e: 164 | logger.error(f"Error initializing system: {str(e)}", exc_info=True) 165 | print(Fore.RED + f"System initialization failed: {str(e)}" + Style.RESET_ALL) 166 | return None, None, None, None 167 | def handle_search_mode(search_engine, query): 168 | """Handles web search operations""" 169 | print(f"{Fore.CYAN}Initiating web search...{Style.RESET_ALL}") 170 | try: 171 | # Change search() to search_and_improve() which is the correct method name 172 | results = search_engine.search_and_improve(query) 173 | print(f"\n{Fore.GREEN}Search Results:{Style.RESET_ALL}") 174 | print(results) 175 | except Exception as e: 176 | logger.error(f"Search error: {str(e)}") 177 | print(f"{Fore.RED}Search failed: {str(e)}{Style.RESET_ALL}") 178 | def handle_research_mode(research_manager, query): 179 | """Handles research mode operations""" 180 | print(f"{Fore.CYAN}Initiating research mode...{Style.RESET_ALL}") 181 | 182 | try: 183 | # Start the research 184 | research_manager.start_research(query) 185 | research_active = True # Flag to track research state 186 | 187 | submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D" 188 | print(f"\n{Fore.YELLOW}Research Running. Available Commands:{Style.RESET_ALL}") 189 | print(f"Type command and press {submit_key}:") 190 | print("'s' = Show status") 191 | print("'f' = Show focus") 192 | print("'p' = Pause and assess the research progress") 193 | print("'q' = Quit research") 194 | 195 | # While the research is active, keep checking for commands 196 | while research_active and research_manager.is_active(): 197 | try: 198 | print(f"\n{Fore.GREEN}Enter command (s/f/p/q) and press {submit_key} to submit:{Style.RESET_ALL}") 199 | command = get_multiline_input().strip().lower() 200 | 201 | # Handle empty input 202 | if not command: 203 | continue 204 | 205 | if command == 's': # Show status command 206 | status = research_manager.get_progress() 207 | print("\n" + status) 208 | # Don't break or stop research after showing status 209 | continue 210 | 211 | elif command == 'f': # Show current focus command 212 | if research_manager.current_focus: 213 | print(f"\n{Fore.CYAN}Current Focus:{Style.RESET_ALL}") 214 | print(f"Area: {research_manager.current_focus.area}") 215 | print(f"Priority: {research_manager.current_focus.priority}") 216 | print(f"Reasoning: {research_manager.current_focus.reasoning}") 217 | else: 218 | print(f"\n{Fore.YELLOW}No current focus area{Style.RESET_ALL}") 219 | continue 220 | 221 | elif command == 'p': # Pause research progress command 222 | research_manager.pause_and_assess() 223 | continue 224 | 225 | elif command == 'q': # Quit research 226 | print(f"\n{Fore.YELLOW}Research terminated by user.{Style.RESET_ALL}") 227 | research_active = False 228 | break 229 | 230 | else: 231 | print(f"{Fore.RED}Unknown command. Please enter a valid command (s/f/p/q).{Style.RESET_ALL}") 232 | continue 233 | 234 | except KeyboardInterrupt: 235 | print(f"\n{Fore.YELLOW}Research interrupted by user.{Style.RESET_ALL}") 236 | research_active = False 237 | break 238 | 239 | except Exception as e: 240 | logger.error(f"Error processing command: {str(e)}") 241 | print(f"{Fore.RED}An error occurred: {str(e)}{Style.RESET_ALL}") 242 | continue 243 | 244 | # Only terminate if research is no longer active 245 | if not research_active: 246 | print("\nInitiating research termination...") 247 | summary = research_manager.terminate_research() 248 | 249 | try: 250 | research_manager._cleanup_research_ui() 251 | except Exception as e: 252 | logger.error(f"Error during UI cleanup: {str(e)}") 253 | 254 | print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}") 255 | print(summary) 256 | 257 | if research_manager.research_complete and research_manager.research_summary: 258 | time.sleep(0.5) 259 | research_manager.start_conversation_mode() 260 | 261 | except KeyboardInterrupt: 262 | print(f"\n{Fore.YELLOW}Research interrupted.{Style.RESET_ALL}") 263 | research_manager.terminate_research() 264 | except Exception as e: 265 | logger.error(f"Research error: {str(e)}") 266 | print(f"\n{Fore.RED}Research error: {str(e)}{Style.RESET_ALL}") 267 | research_manager.terminate_research() 268 | def main(): 269 | print_header() 270 | try: 271 | llm, parser, search_engine, research_manager = initialize_system() 272 | if not all([llm, parser, search_engine, research_manager]): 273 | return 274 | 275 | while True: 276 | try: 277 | # Get input with improved CTRL+Z handling 278 | user_input = get_multiline_input() 279 | 280 | # Handle immediate CTRL+Z (empty input) 281 | if user_input == "": 282 | user_input = "@quit" # Convert empty CTRL+Z to quit command 283 | 284 | user_input = user_input.strip() 285 | 286 | # Check for special quit markers 287 | if user_input in ["@quit", "quit", "q"]: 288 | print(Fore.YELLOW + "\nGoodbye!" + Style.RESET_ALL) 289 | break 290 | 291 | if not user_input: 292 | continue 293 | 294 | if user_input.lower() == 'help': 295 | print_header() 296 | continue 297 | 298 | if user_input.startswith('/'): 299 | search_query = user_input[1:].strip() 300 | handle_search_mode(search_engine, search_query) 301 | 302 | elif user_input.startswith('@'): 303 | research_query = user_input[1:].strip() 304 | handle_research_mode(research_manager, research_query) 305 | 306 | else: 307 | print(f"{Fore.RED}Please start with '/' for search or '@' for research.{Style.RESET_ALL}") 308 | 309 | except KeyboardInterrupt: 310 | print(f"\n{Fore.YELLOW}Exiting program...{Style.RESET_ALL}") 311 | break 312 | 313 | except Exception as e: 314 | logger.error(f"Error in main loop: {str(e)}") 315 | print(f"{Fore.RED}An error occurred: {str(e)}{Style.RESET_ALL}") 316 | continue 317 | 318 | except KeyboardInterrupt: 319 | print(f"\n{Fore.YELLOW}Program terminated by user.{Style.RESET_ALL}") 320 | 321 | except Exception as e: 322 | logger.critical(f"Critical error: {str(e)}") 323 | print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}") 324 | 325 | finally: 326 | # Ensure proper cleanup on exit 327 | try: 328 | if 'research_manager' in locals() and research_manager: 329 | if hasattr(research_manager, 'ui'): 330 | research_manager.ui.cleanup() 331 | except: 332 | pass 333 | os._exit(0) 334 | 335 | if __name__ == "__main__": 336 | main() 337 | -------------------------------------------------------------------------------- /Self_Improving_Search.py: -------------------------------------------------------------------------------- 1 | import time 2 | import re 3 | import os 4 | from typing import List, Dict, Tuple, Union 5 | from colorama import Fore, Style 6 | import logging 7 | import sys 8 | from io import StringIO 9 | from web_scraper import get_web_content, can_fetch 10 | from llm_config import get_llm_config 11 | from llm_response_parser import UltimateLLMResponseParser 12 | from llm_wrapper import LLMWrapper 13 | from urllib.parse import urlparse 14 | 15 | # Set up logging 16 | log_directory = 'logs' 17 | if not os.path.exists(log_directory): 18 | os.makedirs(log_directory) 19 | 20 | # Configure logger 21 | logger = logging.getLogger(__name__) 22 | logger.setLevel(logging.INFO) 23 | log_file = os.path.join(log_directory, 'llama_output.log') 24 | file_handler = logging.FileHandler(log_file) 25 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 26 | file_handler.setFormatter(formatter) 27 | logger.handlers = [] 28 | logger.addHandler(file_handler) 29 | logger.propagate = False 30 | 31 | # Suppress other loggers 32 | for name in ['root', 'duckduckgo_search', 'requests', 'urllib3']: 33 | logging.getLogger(name).setLevel(logging.WARNING) 34 | logging.getLogger(name).handlers = [] 35 | logging.getLogger(name).propagate = False 36 | 37 | class OutputRedirector: 38 | def __init__(self, stream=None): 39 | self.stream = stream or StringIO() 40 | self.original_stdout = sys.stdout 41 | self.original_stderr = sys.stderr 42 | 43 | def __enter__(self): 44 | sys.stdout = self.stream 45 | sys.stderr = self.stream 46 | return self.stream 47 | 48 | def __exit__(self, exc_type, exc_val, exc_tb): 49 | sys.stdout = self.original_stdout 50 | sys.stderr = self.original_stderr 51 | 52 | class EnhancedSelfImprovingSearch: 53 | def __init__(self, llm: LLMWrapper, parser: UltimateLLMResponseParser, max_attempts: int = 5): 54 | self.llm = llm 55 | self.parser = parser 56 | self.max_attempts = max_attempts 57 | self.llm_config = get_llm_config() 58 | 59 | @staticmethod 60 | def initialize_llm(): 61 | llm_wrapper = LLMWrapper() 62 | return llm_wrapper 63 | 64 | def print_thinking(self): 65 | print(Fore.MAGENTA + "🧠 Thinking..." + Style.RESET_ALL) 66 | 67 | def print_searching(self): 68 | print(Fore.MAGENTA + "📝 Searching..." + Style.RESET_ALL) 69 | 70 | def search_and_improve(self, user_query: str) -> str: 71 | attempt = 0 72 | while attempt < self.max_attempts: 73 | print(f"\n{Fore.CYAN}Search attempt {attempt + 1}:{Style.RESET_ALL}") 74 | self.print_searching() 75 | 76 | try: 77 | formulated_query, time_range = self.formulate_query(user_query, attempt) 78 | 79 | print(f"{Fore.YELLOW}Original query: {user_query}{Style.RESET_ALL}") 80 | print(f"{Fore.YELLOW}Formulated query: {formulated_query}{Style.RESET_ALL}") 81 | print(f"{Fore.YELLOW}Time range: {time_range}{Style.RESET_ALL}") 82 | 83 | if not formulated_query: 84 | print(f"{Fore.RED}Error: Empty search query. Retrying...{Style.RESET_ALL}") 85 | attempt += 1 86 | continue 87 | 88 | search_results = self.perform_search(formulated_query, time_range) 89 | 90 | if not search_results: 91 | print(f"{Fore.RED}No results found. Retrying with a different query...{Style.RESET_ALL}") 92 | attempt += 1 93 | continue 94 | 95 | self.display_search_results(search_results) 96 | 97 | selected_urls = self.select_relevant_pages(search_results, user_query) 98 | 99 | if not selected_urls: 100 | print(f"{Fore.RED}No relevant URLs found. Retrying...{Style.RESET_ALL}") 101 | attempt += 1 102 | continue 103 | 104 | print(Fore.MAGENTA + "⚙️ Scraping selected pages..." + Style.RESET_ALL) 105 | # Scraping is done without OutputRedirector to ensure messages are visible 106 | scraped_content = self.scrape_content(selected_urls) 107 | 108 | if not scraped_content: 109 | print(f"{Fore.RED}Failed to scrape content. Retrying...{Style.RESET_ALL}") 110 | attempt += 1 111 | continue 112 | 113 | self.display_scraped_content(scraped_content) 114 | 115 | self.print_thinking() 116 | 117 | with OutputRedirector() as output: 118 | evaluation, decision = self.evaluate_scraped_content(user_query, scraped_content) 119 | llm_output = output.getvalue() 120 | logger.info(f"LLM Output in evaluate_scraped_content:\n{llm_output}") 121 | 122 | print(f"{Fore.MAGENTA}Evaluation: {evaluation}{Style.RESET_ALL}") 123 | print(f"{Fore.MAGENTA}Decision: {decision}{Style.RESET_ALL}") 124 | 125 | if decision == "answer": 126 | return self.generate_final_answer(user_query, scraped_content) 127 | elif decision == "refine": 128 | print(f"{Fore.YELLOW}Refining search...{Style.RESET_ALL}") 129 | attempt += 1 130 | else: 131 | print(f"{Fore.RED}Unexpected decision. Proceeding to answer.{Style.RESET_ALL}") 132 | return self.generate_final_answer(user_query, scraped_content) 133 | 134 | except Exception as e: 135 | print(f"{Fore.RED}An error occurred during search attempt. Check the log file for details.{Style.RESET_ALL}") 136 | logger.error(f"An error occurred during search: {str(e)}", exc_info=True) 137 | attempt += 1 138 | 139 | return self.synthesize_final_answer(user_query) 140 | 141 | def evaluate_scraped_content(self, user_query: str, scraped_content: Dict[str, str]) -> Tuple[str, str]: 142 | user_query_short = user_query[:200] 143 | prompt = f""" 144 | Evaluate if the following scraped content contains sufficient information to answer the user's question comprehensively: 145 | 146 | User's question: "{user_query_short}" 147 | 148 | Scraped Content: 149 | {self.format_scraped_content(scraped_content)} 150 | 151 | Your task: 152 | 1. Determine if the scraped content provides enough relevant and detailed information to answer the user's question thoroughly. 153 | 2. If the information is sufficient, decide to 'answer'. If more information or clarification is needed, decide to 'refine' the search. 154 | 155 | Respond using EXACTLY this format: 156 | Evaluation: [Your evaluation of the scraped content] 157 | Decision: [ONLY 'answer' if content is sufficient, or 'refine' if more information is needed] 158 | """ 159 | max_retries = 3 160 | for attempt in range(max_retries): 161 | try: 162 | response_text = self.llm.generate(prompt, max_tokens=200, stop=None) 163 | evaluation, decision = self.parse_evaluation_response(response_text) 164 | if decision in ['answer', 'refine']: 165 | return evaluation, decision 166 | except Exception as e: 167 | logger.warning(f"Error in evaluate_scraped_content (attempt {attempt + 1}): {str(e)}") 168 | 169 | logger.warning("Failed to get a valid decision in evaluate_scraped_content. Defaulting to 'refine'.") 170 | return "Failed to evaluate content.", "refine" 171 | 172 | def parse_evaluation_response(self, response: str) -> Tuple[str, str]: 173 | evaluation = "" 174 | decision = "" 175 | for line in response.strip().split('\n'): 176 | if line.startswith('Evaluation:'): 177 | evaluation = line.split(':', 1)[1].strip() 178 | elif line.startswith('Decision:'): 179 | decision = line.split(':', 1)[1].strip().lower() 180 | return evaluation, decision 181 | 182 | def formulate_query(self, user_query: str, attempt: int) -> Tuple[str, str]: 183 | user_query_short = user_query[:200] 184 | prompt = f""" 185 | Based on the following user question, formulate a concise and effective search query: 186 | "{user_query_short}" 187 | Your task: 188 | 1. Create a search query of 2-5 words that will yield relevant results. 189 | 2. Determine if a specific time range is needed for the search. 190 | Time range options: 191 | - 'd': Limit results to the past day. Use for very recent events or rapidly changing information. 192 | - 'w': Limit results to the past week. Use for recent events or topics with frequent updates. 193 | - 'm': Limit results to the past month. Use for relatively recent information or ongoing events. 194 | - 'y': Limit results to the past year. Use for annual events or information that changes yearly. 195 | - 'none': No time limit. Use for historical information or topics not tied to a specific time frame. 196 | Respond in the following format: 197 | Search query: [Your 2-5 word query] 198 | Time range: [d/w/m/y/none] 199 | Do not provide any additional information or explanation. 200 | """ 201 | max_retries = 3 202 | for retry in range(max_retries): 203 | with OutputRedirector() as output: 204 | response_text = self.llm.generate(prompt, max_tokens=50, stop=None) 205 | llm_output = output.getvalue() 206 | logger.info(f"LLM Output in formulate_query:\n{llm_output}") 207 | query, time_range = self.parse_query_response(response_text) 208 | if query and time_range: 209 | return query, time_range 210 | return self.fallback_query(user_query), "none" 211 | 212 | def parse_query_response(self, response: str) -> Tuple[str, str]: 213 | query = "" 214 | time_range = "none" 215 | for line in response.strip().split('\n'): 216 | if ":" in line: 217 | key, value = line.split(":", 1) 218 | key = key.strip().lower() 219 | value = value.strip() 220 | if "query" in key: 221 | query = self.clean_query(value) 222 | elif "time" in key or "range" in key: 223 | time_range = self.validate_time_range(value) 224 | return query, time_range 225 | 226 | def clean_query(self, query: str) -> str: 227 | query = re.sub(r'["\'\[\]]', '', query) 228 | query = re.sub(r'\s+', ' ', query) 229 | return query.strip()[:100] 230 | 231 | def validate_time_range(self, time_range: str) -> str: 232 | valid_ranges = ['d', 'w', 'm', 'y', 'none'] 233 | time_range = time_range.lower() 234 | return time_range if time_range in valid_ranges else 'none' 235 | 236 | def fallback_query(self, user_query: str) -> str: 237 | words = user_query.split() 238 | return " ".join(words[:5]) 239 | 240 | def perform_search(self, query: str, time_range: str) -> List[Dict]: 241 | if not query: 242 | return [] 243 | 244 | from duckduckgo_search import DDGS 245 | 246 | with DDGS() as ddgs: 247 | try: 248 | with OutputRedirector() as output: 249 | if time_range and time_range != 'none': 250 | results = list(ddgs.text(query, timelimit=time_range, max_results=10)) 251 | else: 252 | results = list(ddgs.text(query, max_results=10)) 253 | ddg_output = output.getvalue() 254 | logger.info(f"DDG Output in perform_search:\n{ddg_output}") 255 | return [{'number': i+1, **result} for i, result in enumerate(results)] 256 | except Exception as e: 257 | print(f"{Fore.RED}Search error: {str(e)}{Style.RESET_ALL}") 258 | return [] 259 | 260 | def display_search_results(self, results: List[Dict]) -> None: 261 | """Display search results with minimal output""" 262 | try: 263 | if not results: 264 | return 265 | 266 | # Only show search success status 267 | print(f"\nSearch query sent to DuckDuckGo: {self.last_query}") 268 | print(f"Time range sent to DuckDuckGo: {self.last_time_range}") 269 | print(f"Number of results: {len(results)}") 270 | 271 | except Exception as e: 272 | logger.error(f"Error displaying search results: {str(e)}") 273 | 274 | def select_relevant_pages(self, search_results: List[Dict], user_query: str) -> List[str]: 275 | prompt = f""" 276 | Given the following search results for the user's question: "{user_query}" 277 | Select the 2 most relevant results to scrape and analyze. Explain your reasoning for each selection. 278 | 279 | Search Results: 280 | {self.format_results(search_results)} 281 | 282 | Instructions: 283 | 1. You MUST select exactly 2 result numbers from the search results. 284 | 2. Choose the results that are most likely to contain comprehensive and relevant information to answer the user's question. 285 | 3. Provide a brief reason for each selection. 286 | 287 | You MUST respond using EXACTLY this format and nothing else: 288 | 289 | Selected Results: [Two numbers corresponding to the selected results] 290 | Reasoning: [Your reasoning for the selections] 291 | """ 292 | 293 | max_retries = 3 294 | for retry in range(max_retries): 295 | with OutputRedirector() as output: 296 | response_text = self.llm.generate(prompt, max_tokens=200, stop=None) 297 | llm_output = output.getvalue() 298 | logger.info(f"LLM Output in select_relevant_pages:\n{llm_output}") 299 | 300 | parsed_response = self.parse_page_selection_response(response_text) 301 | if parsed_response and self.validate_page_selection_response(parsed_response, len(search_results)): 302 | selected_urls = [result['href'] for result in search_results if result['number'] in parsed_response['selected_results']] 303 | 304 | allowed_urls = [url for url in selected_urls if can_fetch(url)] 305 | if allowed_urls: 306 | return allowed_urls 307 | else: 308 | print(f"{Fore.YELLOW}Warning: All selected URLs are disallowed by robots.txt. Retrying selection.{Style.RESET_ALL}") 309 | else: 310 | print(f"{Fore.YELLOW}Warning: Invalid page selection. Retrying.{Style.RESET_ALL}") 311 | 312 | print(f"{Fore.YELLOW}Warning: All attempts to select relevant pages failed. Falling back to top allowed results.{Style.RESET_ALL}") 313 | allowed_urls = [result['href'] for result in search_results if can_fetch(result['href'])][:2] 314 | return allowed_urls 315 | 316 | def parse_page_selection_response(self, response: str) -> Dict[str, Union[List[int], str]]: 317 | lines = response.strip().split('\n') 318 | parsed = {} 319 | for line in lines: 320 | if line.startswith('Selected Results:'): 321 | parsed['selected_results'] = [int(num.strip()) for num in re.findall(r'\d+', line)] 322 | elif line.startswith('Reasoning:'): 323 | parsed['reasoning'] = line.split(':', 1)[1].strip() 324 | return parsed if 'selected_results' in parsed and 'reasoning' in parsed else None 325 | 326 | def validate_page_selection_response(self, parsed_response: Dict[str, Union[List[int], str]], num_results: int) -> bool: 327 | if len(parsed_response['selected_results']) != 2: 328 | return False 329 | if any(num < 1 or num > num_results for num in parsed_response['selected_results']): 330 | return False 331 | return True 332 | 333 | def format_results(self, results: List[Dict]) -> str: 334 | formatted_results = [] 335 | for result in results: 336 | formatted_result = f"{result['number']}. Title: {result.get('title', 'N/A')}\n" 337 | formatted_result += f" Snippet: {result.get('body', 'N/A')[:200]}...\n" 338 | formatted_result += f" URL: {result.get('href', 'N/A')}\n" 339 | formatted_results.append(formatted_result) 340 | return "\n".join(formatted_results) 341 | 342 | def scrape_content(self, urls: List[str]) -> Dict[str, str]: 343 | scraped_content = {} 344 | blocked_urls = [] 345 | for url in urls: 346 | robots_allowed = can_fetch(url) 347 | if robots_allowed: 348 | content = get_web_content([url]) 349 | if content: 350 | scraped_content.update(content) 351 | print(Fore.YELLOW + f"Successfully scraped: {url}" + Style.RESET_ALL) 352 | logger.info(f"Successfully scraped: {url}") 353 | else: 354 | print(Fore.RED + f"Robots.txt disallows scraping of {url}" + Style.RESET_ALL) 355 | logger.warning(f"Robots.txt disallows scraping of {url}") 356 | else: 357 | blocked_urls.append(url) 358 | print(Fore.RED + f"Warning: Robots.txt disallows scraping of {url}" + Style.RESET_ALL) 359 | logger.warning(f"Robots.txt disallows scraping of {url}") 360 | 361 | print(Fore.CYAN + f"Scraped content received for {len(scraped_content)} URLs" + Style.RESET_ALL) 362 | logger.info(f"Scraped content received for {len(scraped_content)} URLs") 363 | 364 | if blocked_urls: 365 | print(Fore.RED + f"Warning: {len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions." + Style.RESET_ALL) 366 | logger.warning(f"{len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions: {', '.join(blocked_urls)}") 367 | 368 | return scraped_content 369 | 370 | def display_scraped_content(self, scraped_content: Dict[str, str]): 371 | print(f"\n{Fore.CYAN}Scraped Content:{Style.RESET_ALL}") 372 | for url, content in scraped_content.items(): 373 | print(f"{Fore.GREEN}URL: {url}{Style.RESET_ALL}") 374 | print(f"Content: {content[:4000]}...\n") 375 | 376 | def generate_final_answer(self, user_query: str, scraped_content: Dict[str, str]) -> str: 377 | user_query_short = user_query[:200] 378 | prompt = f""" 379 | You are an AI assistant. Provide a comprehensive and detailed answer to the following question using ONLY the information provided in the scraped content. Do not include any references or mention any sources. Answer directly and thoroughly. 380 | 381 | Question: "{user_query_short}" 382 | 383 | Scraped Content: 384 | {self.format_scraped_content(scraped_content)} 385 | 386 | Important Instructions: 387 | 1. Do not use phrases like "Based on the absence of selected results" or similar. 388 | 2. If the scraped content does not contain enough information to answer the question, say so explicitly and explain what information is missing. 389 | 3. Provide as much relevant detail as possible from the scraped content. 390 | 391 | Answer: 392 | """ 393 | max_retries = 3 394 | for attempt in range(max_retries): 395 | with OutputRedirector() as output: 396 | response_text = self.llm.generate(prompt, max_tokens=1024, stop=None) 397 | llm_output = output.getvalue() 398 | logger.info(f"LLM Output in generate_final_answer:\n{llm_output}") 399 | if response_text: 400 | logger.info(f"LLM Response:\n{response_text}") 401 | return response_text 402 | 403 | error_message = "I apologize, but I couldn't generate a satisfactory answer based on the available information." 404 | logger.warning(f"Failed to generate a response after {max_retries} attempts. Returning error message.") 405 | return error_message 406 | 407 | def format_scraped_content(self, scraped_content: Dict[str, str]) -> str: 408 | formatted_content = [] 409 | for url, content in scraped_content.items(): 410 | content = re.sub(r'\s+', ' ', content) 411 | formatted_content.append(f"Content from {url}:\n{content}\n") 412 | return "\n".join(formatted_content) 413 | 414 | def synthesize_final_answer(self, user_query: str) -> str: 415 | prompt = f""" 416 | After multiple search attempts, we couldn't find a fully satisfactory answer to the user's question: "{user_query}" 417 | 418 | Please provide the best possible answer you can, acknowledging any limitations or uncertainties. 419 | If appropriate, suggest ways the user might refine their question or where they might find more information. 420 | 421 | Respond in a clear, concise, and informative manner. 422 | """ 423 | try: 424 | with OutputRedirector() as output: 425 | response_text = self.llm.generate(prompt, max_tokens=self.llm_config.get('max_tokens', 1024), stop=self.llm_config.get('stop', None)) 426 | llm_output = output.getvalue() 427 | logger.info(f"LLM Output in synthesize_final_answer:\n{llm_output}") 428 | if response_text: 429 | return response_text.strip() 430 | except Exception as e: 431 | logger.error(f"Error in synthesize_final_answer: {str(e)}", exc_info=True) 432 | return "I apologize, but after multiple attempts, I wasn't able to find a satisfactory answer to your question. Please try rephrasing your question or breaking it down into smaller, more specific queries." 433 | 434 | # End of EnhancedSelfImprovingSearch class 435 | -------------------------------------------------------------------------------- /research_manager.py: -------------------------------------------------------------------------------- 1 | import msvcrt 2 | import os 3 | import sys 4 | import threading 5 | import time 6 | import re 7 | import json 8 | import logging 9 | import curses 10 | import signal 11 | from typing import List, Dict, Set, Optional, Tuple, Union 12 | from dataclasses import dataclass 13 | from queue import Queue 14 | from datetime import datetime 15 | from io import StringIO 16 | from colorama import init, Fore, Style 17 | import select 18 | from threading import Event 19 | from urllib.parse import urlparse 20 | from pathlib import Path 21 | 22 | # Initialize colorama for cross-platform color support 23 | if os.name == 'nt': # Windows-specific initialization 24 | init(convert=True, strip=False, wrap=True) 25 | else: 26 | init() 27 | 28 | # Set up logging 29 | log_directory = 'logs' 30 | if not os.path.exists(log_directory): 31 | os.makedirs(log_directory) 32 | 33 | logger = logging.getLogger(__name__) 34 | logger.setLevel(logging.INFO) 35 | log_file = os.path.join(log_directory, 'research_llm.log') 36 | file_handler = logging.FileHandler(log_file) 37 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 38 | file_handler.setFormatter(formatter) 39 | logger.handlers = [] 40 | logger.addHandler(file_handler) 41 | logger.propagate = False 42 | 43 | # Suppress other loggers 44 | for name in logging.root.manager.loggerDict: 45 | if name != __name__: 46 | logging.getLogger(name).disabled = True 47 | 48 | @dataclass 49 | class ResearchFocus: 50 | """Represents a specific area of research focus""" 51 | area: str 52 | priority: int 53 | source_query: str = "" 54 | timestamp: str = "" 55 | search_queries: List[str] = None 56 | reasoning: Optional[str] = None 57 | 58 | def __post_init__(self): 59 | if not self.timestamp: 60 | self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 61 | if self.search_queries is None: 62 | self.search_queries = [] 63 | 64 | @dataclass 65 | class AnalysisResult: 66 | """Contains the complete analysis result""" 67 | original_question: str 68 | focus_areas: List[ResearchFocus] 69 | raw_response: str 70 | timestamp: str = "" 71 | 72 | def __post_init__(self): 73 | if not self.timestamp: 74 | self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 75 | 76 | class StrategicAnalysisParser: 77 | def __init__(self, llm=None): 78 | self.llm = llm 79 | self.logger = logging.getLogger(__name__) 80 | # Simplify patterns to match exactly what we expect 81 | self.patterns = { 82 | 'priority': [ 83 | r"Priority:\s*(\d+)", # Match exactly what's in our prompt 84 | ] 85 | } 86 | 87 | def strategic_analysis(self, original_query: str) -> Optional[AnalysisResult]: 88 | """Generate and process research areas with retries until success""" 89 | max_retries = 3 90 | try: 91 | self.logger.info("Starting strategic analysis...") 92 | prompt = f""" 93 | You must select exactly 5 areas to investigate in order to explore and gather information to answer the research question: 94 | "{original_query}" 95 | 96 | You MUST provide exactly 5 areas numbered 1-5. Each must have a priority, YOU MUST ensure that you only assign one priority per area. 97 | Assign priority based on the likelihood of a focus area being investigated to provide information that directly will allow you to respond to "{original_query}" with 5 being most likely and 1 being least. 98 | Follow this EXACT format without any deviations or additional text: 99 | 100 | 1. [First research topic] 101 | Priority: [number 1-5] 102 | 103 | 2. [Second research topic] 104 | Priority: [number 1-5] 105 | 106 | 3. [Third research topic] 107 | Priority: [number 1-5] 108 | 109 | 4. [Fourth research topic] 110 | Priority: [number 1-5] 111 | 112 | 5. [Fifth research topic] 113 | Priority: [number 1-5] 114 | """ 115 | for attempt in range(max_retries): 116 | response = self.llm.generate(prompt, max_tokens=1000) 117 | focus_areas = self._extract_research_areas(response) 118 | 119 | if focus_areas: # If we got any valid areas 120 | # Sort by priority (highest first) 121 | focus_areas.sort(key=lambda x: x.priority, reverse=True) 122 | 123 | return AnalysisResult( 124 | original_question=original_query, 125 | focus_areas=focus_areas, 126 | raw_response=response, 127 | timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") 128 | ) 129 | else: 130 | self.logger.warning(f"Attempt {attempt + 1}: No valid areas generated, retrying...") 131 | print(f"\nRetrying research area generation (Attempt {attempt + 1}/{max_retries})...") 132 | 133 | # If all retries failed, try one final time with a stronger prompt 134 | prompt += "\n\nIMPORTANT: You MUST provide exactly 5 research areas with priorities. This is crucial." 135 | response = self.llm.generate(prompt, max_tokens=1000) 136 | focus_areas = self._extract_research_areas(response) 137 | 138 | if focus_areas: 139 | focus_areas.sort(key=lambda x: x.priority, reverse=True) 140 | return AnalysisResult( 141 | original_question=original_query, 142 | focus_areas=focus_areas, 143 | raw_response=response, 144 | timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") 145 | ) 146 | 147 | self.logger.error("Failed to generate any valid research areas after all attempts") 148 | return None 149 | 150 | except Exception as e: 151 | self.logger.error(f"Error in strategic analysis: {str(e)}") 152 | return None 153 | 154 | def _extract_research_areas(self, text: str) -> List[ResearchFocus]: 155 | """Extract research areas with enhanced parsing to handle priorities in various formats.""" 156 | areas = [] 157 | lines = text.strip().split('\n') 158 | 159 | current_area = None 160 | current_priority = None 161 | 162 | for i in range(len(lines)): 163 | line = lines[i].strip() 164 | if not line: 165 | continue 166 | 167 | # Check for numbered items (e.g., '1. Area Name') 168 | number_match = re.match(r'^(\d+)\.\s*(.*)', line) 169 | if number_match: 170 | # If we have a previous area, add it to our list 171 | if current_area is not None: 172 | areas.append(ResearchFocus( 173 | area=current_area.strip(' -:'), 174 | priority=current_priority or 3, 175 | )) 176 | # Start a new area 177 | area_line = number_match.group(2) 178 | 179 | # Search for 'priority' followed by a number, anywhere in the area_line 180 | priority_inline_match = re.search( 181 | r'(?i)\bpriority\b\s*(?:[:=]?\s*)?(\d+)', area_line) 182 | if priority_inline_match: 183 | # Extract and set the priority 184 | try: 185 | current_priority = int(priority_inline_match.group(1)) 186 | current_priority = max(1, min(5, current_priority)) 187 | except ValueError: 188 | current_priority = 3 # Default priority if parsing fails 189 | # Remove the 'priority' portion from area_line 190 | area_line = area_line[:priority_inline_match.start()] + area_line[priority_inline_match.end():] 191 | area_line = area_line.strip(' -:') 192 | else: 193 | current_priority = None # Priority might be on the next line 194 | 195 | current_area = area_line.strip() 196 | 197 | elif re.match(r'(?i)^priority\s*(?:[:=]?\s*)?(\d+)', line): 198 | # Extract priority from the line following the area 199 | try: 200 | priority_match = re.match(r'(?i)^priority\s*(?:[:=]?\s*)?(\d+)', line) 201 | current_priority = int(priority_match.group(1)) 202 | current_priority = max(1, min(5, current_priority)) 203 | except (ValueError, IndexError): 204 | current_priority = 3 # Default priority if parsing fails 205 | 206 | # Check if this is the last line or the next line is a new area 207 | next_line_is_new_area = (i + 1 < len(lines)) and re.match(r'^\d+\.', lines[i + 1].strip()) 208 | if next_line_is_new_area or i + 1 == len(lines): 209 | if current_area is not None: 210 | # Append the current area and priority to the list 211 | areas.append(ResearchFocus( 212 | area=current_area.strip(' -:'), 213 | priority=current_priority or 3, 214 | )) 215 | current_area = None 216 | current_priority = None 217 | 218 | return areas 219 | 220 | def _clean_text(self, text: str) -> str: 221 | """Clean and normalize text""" 222 | text = re.sub(r'\s+', ' ', text) 223 | text = re.sub(r'(\d+\))', r'\1.', text) 224 | text = re.sub(r'(?i)priority:', 'P:', text) 225 | return text.strip() 226 | 227 | def _add_area(self, areas: List[ResearchFocus], area: str, priority: Optional[int]): 228 | """Add area with basic validation""" 229 | if not area or len(area.split()) < 3: # Basic validation 230 | return 231 | 232 | areas.append(ResearchFocus( 233 | area=area, 234 | priority=priority or 3, 235 | timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 236 | search_queries=[] 237 | )) 238 | 239 | def _normalize_focus_areas(self, areas: List[ResearchFocus]) -> List[ResearchFocus]: 240 | """Normalize and prepare final list of areas""" 241 | if not areas: 242 | return [] 243 | 244 | # Sort by priority 245 | areas.sort(key=lambda x: x.priority, reverse=True) 246 | 247 | # Ensure priorities are properly spread 248 | for i, area in enumerate(areas): 249 | area.priority = max(1, min(5, area.priority)) 250 | 251 | return areas[:5] 252 | 253 | def format_analysis_result(self, result: AnalysisResult) -> str: 254 | """Format the results for display""" 255 | if not result: 256 | return "No valid analysis result generated." 257 | 258 | formatted = [ 259 | f"\nResearch Areas for: {result.original_question}\n" 260 | ] 261 | 262 | for i, focus in enumerate(result.focus_areas, 1): 263 | formatted.extend([ 264 | f"\n{i}. {focus.area}", 265 | f" Priority: {focus.priority}" 266 | ]) 267 | 268 | return "\n".join(formatted) 269 | 270 | class OutputRedirector: 271 | """Redirects stdout and stderr to a string buffer""" 272 | def __init__(self, stream=None): 273 | self.stream = stream or StringIO() 274 | self.original_stdout = sys.stdout 275 | self.original_stderr = sys.stderr 276 | 277 | def __enter__(self): 278 | sys.stdout = self.stream 279 | sys.stderr = self.stream 280 | return self.stream 281 | 282 | def __exit__(self, exc_type, exc_val, exc_tb): 283 | sys.stdout = self.original_stdout 284 | sys.stderr = self.original_stderr 285 | 286 | class TerminalUI: 287 | """Manages terminal display with fixed input area at bottom""" 288 | def __init__(self): 289 | self.stdscr = None 290 | self.input_win = None 291 | self.output_win = None 292 | self.status_win = None 293 | self.max_y = 0 294 | self.max_x = 0 295 | self.input_buffer = "" 296 | self.is_setup = False 297 | self.old_terminal_settings = None 298 | self.should_terminate = Event() 299 | self.shutdown_event = Event() 300 | self.research_thread = None 301 | self.last_display_height = 0 # Track display height for corruption fix 302 | 303 | def setup(self): 304 | """Initialize the terminal UI""" 305 | if self.is_setup: 306 | return 307 | 308 | # Save terminal settings 309 | if os.name != 'nt': # Unix-like systems 310 | self.old_terminal_settings = termios.tcgetattr(sys.stdin.fileno()) 311 | 312 | self.stdscr = curses.initscr() 313 | curses.start_color() 314 | curses.noecho() 315 | curses.cbreak() 316 | self.stdscr.keypad(True) 317 | 318 | # Get terminal dimensions 319 | self.max_y, self.max_x = self.stdscr.getmaxyx() 320 | 321 | # Create windows 322 | self.output_win = curses.newwin(self.max_y - 4, self.max_x, 0, 0) 323 | self.status_win = curses.newwin(1, self.max_x, self.max_y - 4, 0) 324 | self.input_win = curses.newwin(3, self.max_x, self.max_y - 3, 0) 325 | 326 | # Setup colors 327 | curses.init_pair(1, curses.COLOR_GREEN, curses.COLOR_BLACK) 328 | curses.init_pair(2, curses.COLOR_CYAN, curses.COLOR_BLACK) 329 | curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK) 330 | 331 | # Enable scrolling 332 | self.output_win.scrollok(True) 333 | self.output_win.idlok(True) 334 | self.input_win.scrollok(True) 335 | 336 | self.is_setup = True 337 | self._refresh_input_prompt() 338 | 339 | def cleanup(self): 340 | """Public cleanup method with enhanced terminal restoration""" 341 | if not self.is_setup: 342 | return 343 | try: 344 | # Ensure all windows are properly closed 345 | for win in [self.input_win, self.output_win, self.status_win]: 346 | if win: 347 | win.clear() 348 | win.refresh() 349 | 350 | # Restore terminal state 351 | if self.stdscr: 352 | self.stdscr.keypad(False) 353 | curses.nocbreak() 354 | curses.echo() 355 | curses.endwin() 356 | 357 | # Restore original terminal settings 358 | if self.old_terminal_settings and os.name != 'nt': 359 | import termios 360 | termios.tcsetattr( 361 | sys.stdin.fileno(), 362 | termios.TCSADRAIN, 363 | self.old_terminal_settings 364 | ) 365 | except Exception as e: 366 | logger.error(f"Error during terminal cleanup: {str(e)}") 367 | finally: 368 | self.is_setup = False 369 | self.stdscr = None 370 | self.input_win = None 371 | self.output_win = None 372 | self.status_win = None 373 | 374 | def _cleanup(self): 375 | """Enhanced resource cleanup with better process handling""" 376 | self.should_terminate.set() 377 | 378 | # Handle research thread with improved termination 379 | if self.research_thread and self.research_thread.is_alive(): 380 | try: 381 | self.research_thread.join(timeout=1.0) 382 | if self.research_thread.is_alive(): 383 | import ctypes 384 | ctypes.pythonapi.PyThreadState_SetAsyncExc( 385 | ctypes.c_long(self.research_thread.ident), 386 | ctypes.py_object(SystemExit)) 387 | time.sleep(0.1) # Give thread time to exit 388 | if self.research_thread.is_alive(): # Double-check 389 | ctypes.pythonapi.PyThreadState_SetAsyncExc( 390 | ctypes.c_long(self.research_thread.ident), 391 | 0) # Reset exception 392 | except Exception as e: 393 | logger.error(f"Error terminating research thread: {str(e)}") 394 | 395 | # Clean up LLM with improved error handling 396 | if hasattr(self, 'llm') and hasattr(self.llm, '_cleanup'): 397 | try: 398 | self.llm.cleanup() 399 | except Exception as e: 400 | logger.error(f"Error cleaning up LLM: {str(e)}") 401 | 402 | # Ensure terminal is restored 403 | try: 404 | curses.endwin() 405 | except: 406 | pass 407 | 408 | # Final cleanup of UI 409 | self.cleanup() 410 | 411 | def _refresh_input_prompt(self, prompt="Enter command: "): 412 | """Refresh the fixed input prompt at bottom with display fix""" 413 | if not self.is_setup: 414 | return 415 | 416 | try: 417 | # Clear the entire input window first 418 | self.input_win.clear() 419 | 420 | # Calculate proper cursor position 421 | cursor_y = 0 422 | cursor_x = len(prompt) + len(self.input_buffer) 423 | 424 | # Add the prompt and buffer 425 | self.input_win.addstr(0, 0, f"{prompt}{self.input_buffer}", curses.color_pair(1)) 426 | 427 | # Position cursor correctly 428 | try: 429 | self.input_win.move(cursor_y, cursor_x) 430 | except curses.error: 431 | pass # Ignore if cursor would be off-screen 432 | 433 | self.input_win.refresh() 434 | except curses.error: 435 | pass 436 | 437 | def update_output(self, text: str): 438 | """Update output window with display corruption fix""" 439 | if not self.is_setup: 440 | return 441 | 442 | try: 443 | # Clean ANSI escape codes 444 | clean_text = re.sub(r'\x1b\[[0-9;]*[mK]', '', text) 445 | 446 | # Store current position 447 | current_y, _ = self.output_win.getyx() 448 | 449 | # Clear any potential corruption 450 | if current_y > self.last_display_height: 451 | self.output_win.clear() 452 | 453 | self.output_win.addstr(clean_text + "\n", curses.color_pair(2)) 454 | new_y, _ = self.output_win.getyx() 455 | self.last_display_height = new_y 456 | 457 | self.output_win.refresh() 458 | self._refresh_input_prompt() 459 | except curses.error: 460 | pass 461 | 462 | def update_status(self, text: str): 463 | """Update the status line above input area""" 464 | if not self.is_setup: 465 | return 466 | 467 | try: 468 | self.status_win.clear() 469 | self.status_win.addstr(0, 0, text, curses.color_pair(3)) 470 | self.status_win.refresh() 471 | self._refresh_input_prompt() # Ensure prompt is refreshed after status update 472 | except curses.error: 473 | pass 474 | 475 | def get_input(self, prompt: Optional[str] = None) -> Optional[str]: 476 | """Windows-compatible input handling""" 477 | try: 478 | if prompt: 479 | self.update_status(prompt) 480 | if not self.is_setup: 481 | self.setup() 482 | self.input_buffer = "" 483 | self._refresh_input_prompt() 484 | 485 | while True: 486 | if self.should_terminate.is_set(): 487 | return None 488 | 489 | if msvcrt.kbhit(): 490 | ch = msvcrt.getch() 491 | 492 | if ch == b'\x04': # Ctrl+D 493 | result = self.input_buffer.strip() 494 | self.input_buffer = "" 495 | if not result: 496 | self.cleanup() 497 | return "@quit" 498 | return result 499 | 500 | elif ch == b'\x03': # Ctrl+C 501 | self.should_terminate.set() 502 | self.cleanup() 503 | return "@quit" 504 | 505 | elif ch == b'\r': # Enter 506 | result = self.input_buffer.strip() 507 | if result: 508 | self.input_buffer = "" 509 | return result 510 | continue 511 | 512 | elif ch == b'\x08': # Backspace 513 | if self.input_buffer: 514 | self.input_buffer = self.input_buffer[:-1] 515 | self._refresh_input_prompt() 516 | 517 | elif 32 <= ord(ch[0]) <= 126: # Printable characters 518 | self.input_buffer += ch.decode('utf-8') 519 | self._refresh_input_prompt() 520 | 521 | except Exception as e: 522 | logger.error(f"Error in get_input: {str(e)}") 523 | self.should_terminate.set() 524 | self.cleanup() 525 | return "@quit" 526 | 527 | def force_exit(self): 528 | """Force immediate exit with enhanced cleanup""" 529 | try: 530 | self.should_terminate.set() 531 | self.shutdown_event.set() 532 | self._cleanup() # Call private cleanup first 533 | self.cleanup() # Then public cleanup 534 | curses.endwin() # Final attempt to restore terminal 535 | except: 536 | pass 537 | finally: 538 | os._exit(0) # Ensure exit 539 | 540 | class NonBlockingInput: 541 | """Handles non-blocking keyboard input for Windows systems.""" 542 | def __init__(self): 543 | """Initialize NonBlockingInput (no special setup required on Windows).""" 544 | if os.name != 'nt': 545 | raise EnvironmentError("NonBlockingInput is designed for Windows only.") 546 | 547 | def __enter__(self): 548 | """Enter the context (no-op for Windows).""" 549 | return self 550 | 551 | def __exit__(self, type, value, traceback): 552 | """Exit the context (no-op for Windows).""" 553 | pass 554 | 555 | def check_input(self, timeout=0.1): 556 | """ 557 | Check for keyboard input without blocking. 558 | 559 | Args: 560 | timeout (float): Time in seconds to wait before returning if no input. 561 | 562 | Returns: 563 | str or None: The input character as a string, or None if no input is detected. 564 | """ 565 | start_time = time.time() 566 | while True: 567 | if msvcrt.kbhit(): 568 | try: 569 | return msvcrt.getch().decode('utf-8') 570 | except UnicodeDecodeError: 571 | # Handle non-ASCII characters gracefully 572 | return None 573 | 574 | if time.time() - start_time > timeout: 575 | return None 576 | 577 | class ResearchManager: 578 | """Manages the research process including analysis, search, and documentation""" 579 | def __init__(self, llm_wrapper, parser, search_engine, max_searches_per_cycle: int = 5): 580 | self.llm = llm_wrapper 581 | self.parser = parser 582 | self.search_engine = search_engine 583 | self.max_searches = max_searches_per_cycle 584 | self.should_terminate = threading.Event() 585 | self.shutdown_event = Event() 586 | self.research_started = threading.Event() 587 | self.research_thread = None 588 | self.thinking = False 589 | self.stop_words = { 590 | 'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i', 591 | 'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at' 592 | } 593 | 594 | # State tracking 595 | self.searched_urls: Set[str] = set() 596 | self.current_focus: Optional[ResearchFocus] = None 597 | self.original_query: str = "" 598 | self.focus_areas: List[ResearchFocus] = [] 599 | self.is_running = False 600 | 601 | # New conversation mode attributes 602 | self.research_complete = False 603 | self.research_summary = "" 604 | self.conversation_active = False 605 | self.research_content = "" 606 | 607 | # Initialize document paths 608 | self.document_path = None 609 | self.session_files = [] 610 | 611 | # Initialize UI and parser 612 | self.ui = TerminalUI() 613 | self.strategic_parser = StrategicAnalysisParser(llm=self.llm) 614 | 615 | # Initialize new flags for pausing and assessment 616 | self.research_paused = False 617 | self.awaiting_user_decision = False 618 | 619 | # Setup signal handlers 620 | signal.signal(signal.SIGINT, self._signal_handler) 621 | signal.signal(signal.SIGTERM, self._signal_handler) 622 | 623 | def _signal_handler(self, signum, frame): 624 | """Handle interrupt signals""" 625 | self.shutdown_event.set() 626 | self.should_terminate.set() 627 | self._cleanup() 628 | 629 | def print_thinking(self): 630 | """Display thinking indicator to user""" 631 | self.ui.update_output("🧠 Thinking...") 632 | 633 | @staticmethod 634 | def get_initial_input() -> str: 635 | """Get the initial research query from user""" 636 | print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+D to submit):{Style.RESET_ALL}") 637 | lines = [] 638 | try: 639 | while True: 640 | line = input() 641 | if line: # Only add non-empty lines 642 | lines.append(line) 643 | if not line: # Empty line (just Enter pressed) 644 | break 645 | except EOFError: # Ctrl+D pressed 646 | pass 647 | except KeyboardInterrupt: # Ctrl+C pressed 648 | print("\nOperation cancelled") 649 | sys.exit(0) 650 | 651 | return " ".join(lines).strip() 652 | def get_multiline_input(self) -> str: 653 | """Get multiline input with proper command handling""" 654 | buffer = [] 655 | current_line = [] 656 | 657 | try: 658 | while True: 659 | if msvcrt.kbhit(): 660 | char = msvcrt.getch() 661 | 662 | # Handle CTRL+Z detection 663 | if char in [b'\x1a']: # CTRL+Z (Windows) 664 | sys.stdout.write('\n') 665 | if current_line: 666 | buffer.append(''.join(current_line)) 667 | return ' '.join(buffer).strip() 668 | 669 | # Handle single-character commands immediately 670 | if not buffer and not current_line and char in [b's', b'f', b'p', b'q']: 671 | command = char.decode('utf-8').lower() 672 | sys.stdout.write(command + '\n') 673 | return command 674 | 675 | # Handle special characters 676 | elif char == b'\r': # Enter 677 | sys.stdout.write('\n') 678 | if current_line: 679 | buffer.append(''.join(current_line)) 680 | current_line = [] 681 | 682 | elif char == b'\x08': # Backspace 683 | if current_line: 684 | current_line.pop() 685 | sys.stdout.write('\b \b') 686 | 687 | elif char == b'\x03': # CTRL+C 688 | sys.stdout.write('\n') 689 | return 'q' 690 | 691 | # Normal character input 692 | elif 32 <= ord(char) <= 126: # Printable characters 693 | current_line.append(char.decode('utf-8')) 694 | sys.stdout.write(char.decode('utf-8')) 695 | 696 | sys.stdout.flush() 697 | 698 | except Exception as e: 699 | logger.error(f"Error in multiline input: {str(e)}") 700 | return '' 701 | 702 | def formulate_search_queries(self, focus_area: ResearchFocus) -> List[str]: 703 | """Generate search queries for a focus area""" 704 | try: 705 | self.print_thinking() 706 | 707 | prompt = f""" 708 | In order to research this query/topic: 709 | 710 | Context: {self.original_query} 711 | 712 | Base a search query to investigate the following research focus, which is related to the original query/topic: 713 | 714 | Area: {focus_area.area} 715 | 716 | Create a search query that will yield specific, search results thare are directly relevant to your focus area. 717 | Format your response EXACTLY like this: 718 | 719 | Search query: [Your 2-5 word query] 720 | Time range: [d/w/m/y/none] 721 | 722 | Do not provide any additional information or explanation, note that the time range allows you to see results within a time range (d is within the last day, w is within the last week, m is within the last month, y is within the last year, and none is results from anytime, only select one, using only the corresponding letter for whichever of these options you select as indicated in the response format) use your judgement as many searches will not require a time range and some may depending on what the research focus is. 723 | """ 724 | response_text = self.llm.generate(prompt, max_tokens=50, stop=None) 725 | query, time_range = self.parse_query_response(response_text) 726 | 727 | if not query: 728 | self.ui.update_output(f"{Fore.RED}Error: Empty search query. Using focus area as query...{Style.RESET_ALL}") 729 | return [focus_area.area] 730 | 731 | self.ui.update_output(f"{Fore.YELLOW}Original focus: {focus_area.area}{Style.RESET_ALL}") 732 | self.ui.update_output(f"{Fore.YELLOW}Formulated query: {query}{Style.RESET_ALL}") 733 | self.ui.update_output(f"{Fore.YELLOW}Time range: {time_range}{Style.RESET_ALL}") 734 | 735 | return [query] 736 | 737 | except Exception as e: 738 | logger.error(f"Error formulating query: {str(e)}") 739 | return [focus_area.area] 740 | 741 | def parse_search_query(self, query_response: str) -> Dict[str, str]: 742 | """Parse search query formulation response with improved time range detection""" 743 | try: 744 | lines = query_response.strip().split('\n') 745 | result = { 746 | 'query': '', 747 | 'time_range': 'none' 748 | } 749 | 750 | # First try to find standard format 751 | for line in lines: 752 | if ':' in line: 753 | key, value = line.split(':', 1) 754 | key = key.strip().lower() 755 | value = value.strip() 756 | 757 | if 'query' in key: 758 | result['query'] = self._clean_query(value) 759 | elif ('time' in key or 'range' in key) and value.strip().lower() in ['d', 'w', 'm', 'y', 'none']: 760 | result['time_range'] = value.strip().lower() 761 | 762 | # If no time range found, look for individual characters 763 | if result['time_range'] == 'none': 764 | # Get all text except the query itself 765 | full_text = query_response.lower() 766 | if result['query']: 767 | full_text = full_text.replace(result['query'].lower(), '') 768 | 769 | # Look for isolated d, w, m, or y characters 770 | time_chars = set() 771 | for char in ['d', 'w', 'm', 'y']: 772 | # Check if char exists by itself (not part of another word) 773 | matches = re.finditer(r'\b' + char + r'\b', full_text) 774 | for match in matches: 775 | # Verify it's not part of a word 776 | start, end = match.span() 777 | if (start == 0 or not full_text[start-1].isalpha()) and \ 778 | (end == len(full_text) or not full_text[end].isalpha()): 779 | time_chars.add(char) 780 | 781 | # If exactly one time char found, use it 782 | if len(time_chars) == 1: 783 | result['time_range'] = time_chars.pop() 784 | 785 | return result 786 | except Exception as e: 787 | logger.error(f"Error parsing search query: {str(e)}") 788 | return {'query': '', 'time_range': 'none'} 789 | 790 | def _cleanup(self): 791 | """Enhanced cleanup to handle conversation mode""" 792 | self.conversation_active = False 793 | self.should_terminate.set() 794 | 795 | if self.research_thread and self.research_thread.is_alive(): 796 | try: 797 | self.research_thread.join(timeout=1.0) 798 | if self.research_thread.is_alive(): 799 | import ctypes 800 | ctypes.pythonapi.PyThreadState_SetAsyncExc( 801 | ctypes.c_long(self.research_thread.ident), 802 | ctypes.py_object(SystemExit) 803 | ) 804 | except Exception as e: 805 | logger.error(f"Error terminating research thread: {str(e)}") 806 | 807 | if hasattr(self.llm, 'cleanup'): 808 | try: 809 | self.llm.cleanup() 810 | except Exception as e: 811 | logger.error(f"Error cleaning up LLM: {str(e)}") 812 | 813 | if hasattr(self.ui, 'cleanup'): 814 | self.ui.cleanup() 815 | 816 | def _initialize_document(self): 817 | """Initialize research session document""" 818 | try: 819 | # Get all existing research session files 820 | self.session_files = [] 821 | for file in os.listdir(): 822 | if file.startswith("research_session_") and file.endswith(".txt"): 823 | try: 824 | num = int(file.split("_")[2].split(".")[0]) 825 | self.session_files.append(num) 826 | except ValueError: 827 | continue 828 | 829 | # Determine next session number 830 | next_session = 1 if not self.session_files else max(self.session_files) + 1 831 | self.document_path = f"research_session_{next_session}.txt" 832 | 833 | # Initialize the new document 834 | with open(self.document_path, 'w', encoding='utf-8') as f: 835 | f.write(f"Research Session {next_session}\n") 836 | f.write(f"Topic: {self.original_query}\n") 837 | f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") 838 | f.write("="*80 + "\n\n") 839 | f.flush() 840 | 841 | except Exception as e: 842 | logger.error(f"Error initializing document: {str(e)}") 843 | self.document_path = "research_findings.txt" 844 | with open(self.document_path, 'w', encoding='utf-8') as f: 845 | f.write("Research Findings:\n\n") 846 | f.flush() 847 | 848 | def add_to_document(self, content: str, source_url: str, focus_area: str): 849 | """Add research findings to current session document""" 850 | try: 851 | with open(self.document_path, 'a', encoding='utf-8') as f: 852 | if source_url not in self.searched_urls: 853 | f.write(f"\n{'='*80}\n") 854 | f.write(f"Research Focus: {focus_area}\n") 855 | f.write(f"Source: {source_url}\n") 856 | f.write(f"Content:\n{content}\n") 857 | f.write(f"{'='*80}\n") 858 | f.flush() 859 | self.searched_urls.add(source_url) 860 | self.ui.update_output(f"Added content from: {source_url}") 861 | except Exception as e: 862 | logger.error(f"Error adding to document: {str(e)}") 863 | self.ui.update_output(f"Error saving content: {str(e)}") 864 | 865 | def get_multiline_conversation_input(self) -> str: 866 | """Windows-compatible multiline input""" 867 | buffer = [] 868 | current_line = [] 869 | 870 | try: 871 | while True: 872 | if msvcrt.kbhit(): 873 | char = msvcrt.getch() 874 | 875 | # CTRL+D or CTRL+Z detection 876 | if char in [b'\x04', b'\x1a']: # CTRL+D (Unix) and CTRL+Z (Windows) 877 | sys.stdout.write('\n') 878 | if current_line: 879 | buffer.append(''.join(current_line)) 880 | return ' '.join(buffer).strip() 881 | 882 | # Handle special characters 883 | elif char == b'\r': # Enter 884 | sys.stdout.write('\n') 885 | buffer.append(''.join(current_line)) 886 | current_line = [] 887 | 888 | elif char == b'\x08': # Backspace 889 | if current_line: 890 | current_line.pop() 891 | sys.stdout.write('\b \b') 892 | 893 | elif char == b'\x03': # CTRL+C 894 | sys.stdout.write('\n') 895 | return 'quit' 896 | 897 | # Handle command inputs like 'p', 's', 'f', 'q' 898 | elif char == b's': # Show status command 899 | buffer.append('s') 900 | sys.stdout.write('s') # Print 's' to indicate the command 901 | elif char == b'f': # Show focus command 902 | buffer.append('f') 903 | sys.stdout.write('f') # Print 'f' to indicate the command 904 | elif char == b'p': # Pause command 905 | buffer.append('p') 906 | sys.stdout.write('p') # Print 'p' to indicate the command 907 | elif char == b'q': # Quit command 908 | buffer.append('q') 909 | sys.stdout.write('q') # Print 'q' to indicate the command 910 | 911 | # Normal character input 912 | elif 32 <= ord(char) <= 126: # Printable characters 913 | current_line.append(char.decode('utf-8')) 914 | sys.stdout.write(char.decode('utf-8')) 915 | 916 | sys.stdout.flush() 917 | 918 | except Exception as e: 919 | logger.error(f"Error in multiline input: {str(e)}") 920 | return 'quit' 921 | def _process_search_results(self, results: Dict[str, str], focus_area: str): 922 | """Process and store search results""" 923 | if not results: 924 | return 925 | 926 | for url, content in results.items(): 927 | if url not in self.searched_urls: 928 | self.add_to_document(content, url, focus_area) 929 | 930 | def _research_loop(self): 931 | """Main research loop with comprehensive functionality""" 932 | self.is_running = True 933 | try: 934 | logging.debug("Research loop started.") 935 | self.research_started.set() 936 | 937 | while not self.should_terminate.is_set() and not self.shutdown_event.is_set(): 938 | # Check if research is paused 939 | if self.research_paused: 940 | logging.debug("Research is paused.") 941 | time.sleep(1) 942 | continue 943 | 944 | self.ui.update_output("\nAnalyzing research progress...") 945 | logging.debug("Analyzing research progress.") 946 | 947 | # Generate focus areas 948 | self.ui.update_output("\nGenerating research focus areas...") 949 | logging.debug("Generating research focus areas.") 950 | analysis_result = self.strategic_parser.strategic_analysis(self.original_query) 951 | 952 | if not analysis_result: 953 | self.ui.update_output("\nFailed to generate analysis result. Retrying...") 954 | logging.warning("Failed to generate analysis result. Retrying...") 955 | continue 956 | 957 | focus_areas = analysis_result.focus_areas 958 | if not focus_areas: 959 | self.ui.update_output("\nNo valid focus areas generated. Retrying...") 960 | logging.warning("No valid focus areas generated. Retrying...") 961 | continue 962 | 963 | self.ui.update_output(f"\nGenerated {len(focus_areas)} research areas:") 964 | logging.debug(f"Generated {len(focus_areas)} research areas.") 965 | for i, focus in enumerate(focus_areas, 1): 966 | self.ui.update_output(f"\nArea {i}: {focus.area}") 967 | self.ui.update_output(f"Priority: {focus.priority}") 968 | logging.debug(f"Area {i}: {focus.area}, Priority: {focus.priority}") 969 | 970 | # Process each focus area in priority order 971 | for focus_area in focus_areas: 972 | if self.should_terminate.is_set(): 973 | logging.debug("Termination signal received. Exiting focus area processing.") 974 | break 975 | 976 | # Check if research is paused 977 | while self.research_paused and not self.should_terminate.is_set(): 978 | logging.debug("Research is paused during focus area processing.") 979 | time.sleep(1) 980 | 981 | if self.should_terminate.is_set(): 982 | logging.debug("Termination signal received. Exiting focus area processing.") 983 | break 984 | 985 | self.current_focus = focus_area 986 | self.ui.update_output(f"\nInvestigating: {focus_area.area}") 987 | logging.debug(f"Investigating focus area: {focus_area.area}") 988 | 989 | queries = self.formulate_search_queries(focus_area) 990 | if not queries: 991 | logging.warning("No queries formulated for focus area.") 992 | continue 993 | 994 | for query in queries: 995 | if self.should_terminate.is_set(): 996 | logging.debug("Termination signal received. Exiting query processing.") 997 | break 998 | 999 | # Check if research is paused 1000 | while self.research_paused and not self.should_terminate.is_set(): 1001 | logging.debug("Research is paused during query processing.") 1002 | time.sleep(1) 1003 | 1004 | if self.should_terminate.is_set(): 1005 | logging.debug("Termination signal received. Exiting query processing.") 1006 | break 1007 | 1008 | try: 1009 | self.ui.update_output(f"\nSearching: {query}") 1010 | logging.debug(f"Performing search for query: {query}") 1011 | results = self.search_engine.perform_search(query, time_range='none') 1012 | 1013 | if results: 1014 | selected_urls = self.search_engine.select_relevant_pages(results, query) 1015 | 1016 | if selected_urls: 1017 | self.ui.update_output("\n⚙️ Scraping selected pages...") 1018 | logging.debug("Scraping selected pages.") 1019 | scraped_content = self.search_engine.scrape_content(selected_urls) 1020 | if scraped_content: 1021 | for url, content in scraped_content.items(): 1022 | if url not in self.searched_urls: 1023 | self.add_to_document(content, url, focus_area.area) 1024 | 1025 | except Exception as e: 1026 | logger.error(f"Error in search: {str(e)}") 1027 | self.ui.update_output(f"Error during search: {str(e)}") 1028 | 1029 | if self.check_document_size(): 1030 | self.ui.update_output("\nDocument size limit reached. Finalizing research.") 1031 | logging.info("Document size limit reached. Finalizing research.") 1032 | return 1033 | 1034 | # After processing all areas, cycle back to generate new ones 1035 | self.ui.update_output("\nAll current focus areas investigated. Generating new areas...") 1036 | logging.debug("All current focus areas investigated. Generating new areas.") 1037 | 1038 | except Exception as e: 1039 | logger.error(f"Error in research loop: {str(e)}") 1040 | self.ui.update_output(f"Error in research process: {str(e)}") 1041 | finally: 1042 | self.is_running = False 1043 | logging.debug("Research loop ended.") 1044 | 1045 | def start_research(self, topic: str): 1046 | """Start research with new session document""" 1047 | try: 1048 | submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D" 1049 | 1050 | logging.debug("Setting up UI and initializing document.") 1051 | self.ui.setup() 1052 | self.original_query = topic 1053 | self._initialize_document() 1054 | 1055 | self.ui.update_output(f"\nStarting research on: {topic}") 1056 | self.ui.update_output(f"Session document: {self.document_path}") 1057 | 1058 | # Clear previous state 1059 | self.should_terminate.clear() 1060 | self.research_started.clear() 1061 | self.research_paused = False 1062 | self.awaiting_user_decision = False 1063 | self.is_running = True # Set running state explicitly 1064 | 1065 | # Start research thread 1066 | logging.debug("Starting research thread.") 1067 | self.research_thread = threading.Thread(target=self._research_loop, daemon=True) 1068 | self.research_thread.start() 1069 | 1070 | # Wait for research to actually start 1071 | if not self.research_started.wait(timeout=10): 1072 | self.ui.update_output("Error: Research failed to start within timeout period") 1073 | logging.error("Research failed to start within timeout period.") 1074 | self.should_terminate.set() 1075 | return 1076 | 1077 | # Enter command loop 1078 | while self.is_active(): # Use is_active() instead of should_terminate 1079 | try: 1080 | print(f"\n{Fore.GREEN}Enter command (s/f/p/q) and press {submit_key} to submit:{Style.RESET_ALL}") 1081 | command = self.get_multiline_input().strip().lower() # Use self.get_multiline_input() 1082 | 1083 | if command: 1084 | self._handle_command(command) 1085 | 1086 | if self.should_terminate.is_set(): 1087 | break 1088 | 1089 | except KeyboardInterrupt: 1090 | self.ui.update_output("\nOperation interrupted. Submit 'q' to quit.") 1091 | continue 1092 | 1093 | except Exception as e: 1094 | logging.error(f"Error in research process: {str(e)}") 1095 | self.ui.update_output(f"Error in research process: {str(e)}") 1096 | finally: 1097 | logging.debug("Cleaning up resources.") 1098 | self._cleanup() 1099 | 1100 | def check_document_size(self) -> bool: 1101 | """Check if document size is approaching context limit""" 1102 | try: 1103 | with open(self.document_path, 'r', encoding='utf-8') as f: 1104 | content = f.read() 1105 | estimated_tokens = len(content.split()) * 1.3 1106 | max_tokens = self.llm.llm_config.get('n_ctx', 2048) 1107 | current_ratio = estimated_tokens / max_tokens 1108 | 1109 | if current_ratio > 0.8: 1110 | logger.warning(f"Document size at {current_ratio*100:.1f}% of context limit") 1111 | self.ui.update_output(f"Warning: Document size at {current_ratio*100:.1f}% of context limit") 1112 | 1113 | return current_ratio > 0.9 1114 | except Exception as e: 1115 | logger.error(f"Error checking document size: {str(e)}") 1116 | return True 1117 | 1118 | def _handle_command(self, cmd: str): 1119 | """Handle user commands during research""" 1120 | try: 1121 | if cmd.lower() == 's': 1122 | progress = self.get_progress() 1123 | self.ui.update_output("\n" + progress) 1124 | return # Don't terminate after showing status 1125 | 1126 | elif cmd.lower() == 'f': 1127 | if self.current_focus: 1128 | self.ui.update_output("\nCurrent Focus:") 1129 | self.ui.update_output(f"Area: {self.current_focus.area}") 1130 | self.ui.update_output(f"Priority: {self.current_focus.priority}") 1131 | else: 1132 | self.ui.update_output("\nNo current focus area") 1133 | return # Don't terminate after showing focus 1134 | 1135 | elif cmd.lower() == 'p': 1136 | self.pause_and_assess() 1137 | return # Don't terminate after pausing 1138 | 1139 | elif cmd.lower() == 'q': 1140 | self.ui.update_output("\nInitiating research termination...") 1141 | self.should_terminate.set() 1142 | self.ui.update_output("\nGenerating research summary... please wait...") 1143 | summary = self.terminate_research() 1144 | self.ui.update_output("\nFinal Research Summary:") 1145 | self.ui.update_output(summary) 1146 | 1147 | except Exception as e: 1148 | logger.error(f"Error handling command: {str(e)}") 1149 | self.ui.update_output(f"Error processing command: {str(e)}") 1150 | 1151 | def show_progress_indicator(self, message="Generating summary, please wait..."): 1152 | """Show a rotating progress indicator until the summary is ready.""" 1153 | symbols = ['|', '/', '-', '\\'] 1154 | idx = 0 1155 | self.summary_ready = False # Track whether the summary is complete 1156 | 1157 | try: 1158 | while not self.summary_ready: 1159 | sys.stdout.write(f"\r{message} {symbols[idx]}") 1160 | sys.stdout.flush() 1161 | idx = (idx + 1) % len(symbols) 1162 | time.sleep(0.2) # Adjust the speed of the rotation if needed 1163 | except KeyboardInterrupt: 1164 | sys.stdout.write("\rOperation interrupted.\n") 1165 | self.summary_ready = True 1166 | finally: 1167 | sys.stdout.write("\r" + " " * (len(message) + 2) + "\r") # Clear the line when done 1168 | sys.stdout.flush() 1169 | def _cleanup_research_ui(self): 1170 | """Clean up just the research UI components""" 1171 | if hasattr(self, 'ui') and self.ui: 1172 | self.ui.cleanup() 1173 | 1174 | def show_thinking_indicator(self, message: str, stop_flag_name: str): 1175 | """Show a rotating thinking indicator with custom message""" 1176 | symbols = ['|', '/', '-', '\\'] 1177 | idx = 0 1178 | while getattr(self, stop_flag_name): # Use dynamic attribute lookup 1179 | sys.stdout.write(f"\r{message} {symbols[idx]}") 1180 | sys.stdout.flush() 1181 | idx = (idx + 1) % len(symbols) 1182 | time.sleep(0.2) 1183 | sys.stdout.write("\r" + " " * (len(message) + 2) + "\r") # Clear the line when done 1184 | 1185 | def start_conversation_mode(self): 1186 | """Start interactive conversation mode with CTRL+D input handling and thinking indicator""" 1187 | self.conversation_active = True 1188 | self.thinking = False 1189 | 1190 | # Print header with clear instructions 1191 | print("\n" + "="*80) 1192 | print(Fore.CYAN + "Research Conversation Mode" + Style.RESET_ALL) 1193 | print("="*80) 1194 | print(Fore.YELLOW + "\nInstructions:") 1195 | print("- Type your question and press CTRL+D to submit") 1196 | print("- Type 'quit' and press CTRL+D to exit") 1197 | print("- Your messages appear in green") 1198 | print("- AI responses appear in cyan" + Style.RESET_ALL + "\n") 1199 | 1200 | while self.conversation_active: 1201 | try: 1202 | # Show prompt with user input in green 1203 | print(Fore.GREEN + "Your question (Press CTRL+D to submit):" + Style.RESET_ALL) 1204 | user_input = self.get_multiline_conversation_input() 1205 | 1206 | # Handle exit commands 1207 | if not user_input or user_input.lower() in ['quit', 'exit', 'q']: 1208 | print(Fore.YELLOW + "\nExiting conversation mode..." + Style.RESET_ALL) 1209 | self.conversation_active = False 1210 | break 1211 | 1212 | # Skip empty input 1213 | if not user_input.strip(): 1214 | continue 1215 | 1216 | # Echo the submitted question for clarity 1217 | print(Fore.GREEN + "Submitted question:" + Style.RESET_ALL) 1218 | print(Fore.GREEN + user_input + Style.RESET_ALL + "\n") 1219 | 1220 | # Start thinking indicator in a separate thread 1221 | self.thinking = True # Set flag before starting thread 1222 | thinking_thread = threading.Thread( 1223 | target=self.show_thinking_indicator, 1224 | args=("Thinking...", "thinking") 1225 | ) 1226 | thinking_thread.daemon = True 1227 | thinking_thread.start() 1228 | 1229 | try: 1230 | # Generate response 1231 | response = self._generate_conversation_response(user_input) 1232 | 1233 | # Stop thinking indicator 1234 | self.thinking = False 1235 | thinking_thread.join() 1236 | 1237 | # Display response in cyan 1238 | print(Fore.CYAN + "AI Response:" + Style.RESET_ALL) 1239 | print(f"{Fore.CYAN}{response}{Style.RESET_ALL}\n") 1240 | print("-" * 80 + "\n") # Separator between QA pairs 1241 | 1242 | except Exception as e: 1243 | self.thinking = False # Ensure thinking indicator stops 1244 | thinking_thread.join() 1245 | raise e 1246 | 1247 | except KeyboardInterrupt: 1248 | self.thinking = False # Ensure thinking indicator stops 1249 | print(Fore.YELLOW + "\nOperation cancelled. Submit 'quit' to exit." + Style.RESET_ALL) 1250 | except Exception as e: 1251 | logger.error(f"Error in conversation mode: {str(e)}") 1252 | print(Fore.RED + f"Error processing question: {str(e)}" + Style.RESET_ALL) 1253 | def _generate_conversation_response(self, user_query: str) -> str: 1254 | """Generate contextual responses with improved context handling""" 1255 | try: 1256 | # Add debug logging to verify content 1257 | logger.info(f"Research summary length: {len(self.research_summary) if self.research_summary else 0}") 1258 | logger.info(f"Research content length: {len(self.research_content) if self.research_content else 0}") 1259 | 1260 | # First verify we have content 1261 | if not self.research_content and not self.research_summary: 1262 | # Try to reload from file if available 1263 | try: 1264 | if os.path.exists(self.document_path): 1265 | with open(self.document_path, 'r', encoding='utf-8') as f: 1266 | self.research_content = f.read().strip() 1267 | except Exception as e: 1268 | logger.error(f"Failed to reload research content: {str(e)}") 1269 | 1270 | # Prepare context, ensuring we have content 1271 | context = f""" 1272 | Research Content: 1273 | {self.research_content} 1274 | 1275 | Research Summary: 1276 | {self.research_summary if self.research_summary else 'No summary available'} 1277 | """ 1278 | 1279 | prompt = f""" 1280 | Based on the following research content and summary, please answer this question: 1281 | 1282 | {context} 1283 | 1284 | Question: {user_query} 1285 | 1286 | you have 2 sets of instructions the applied set and the unapplied set, the applied set should be followed if the question is directly relating to the research content whereas anything else other then direct questions about the content of the research will result in you instead following the unapplied ruleset 1287 | 1288 | Applied: 1289 | 1290 | Instructions: 1291 | 1. Answer based ONLY on the research content provided above if asked a question about your research or that content. 1292 | 2. If the information requested isn't in the research, clearly state that it isn't in the content you gathered. 1293 | 3. Be direct and specific in your response, DO NOT directly cite research unless specifically asked to, be concise and give direct answers to questions based on the research, unless instructed otherwise. 1294 | 1295 | Unapplied: 1296 | 1297 | Instructions: 1298 | 1299 | 1. Do not make up anything that isn't actually true. 1300 | 2. Respond directly to the user's question in an honest and thoughtful manner. 1301 | 3. disregard rules in the applied set for queries not DIRECTLY related to the research, including queries about the research process or what you remember about the research should result in the unapplied ruleset being used. 1302 | 1303 | Answer: 1304 | """ 1305 | 1306 | response = self.llm.generate( 1307 | prompt, 1308 | max_tokens=1000, # Increased for more detailed responses 1309 | temperature=0.7 1310 | ) 1311 | 1312 | if not response or not response.strip(): 1313 | return "I apologize, but I cannot find relevant information in the research content to answer your question." 1314 | 1315 | return response.strip() 1316 | 1317 | except Exception as e: 1318 | logger.error(f"Error generating response: {str(e)}") 1319 | return f"I apologize, but I encountered an error processing your question: {str(e)}" 1320 | def pause_and_assess(self): 1321 | """Pause the research and assess if the collected content is sufficient.""" 1322 | try: 1323 | # Pause the research thread 1324 | self.ui.update_output("\nPausing research for assessment...") 1325 | self.research_paused = True 1326 | 1327 | # Start progress indicator in a separate thread 1328 | self.summary_ready = False 1329 | indicator_thread = threading.Thread( 1330 | target=self.show_progress_indicator, 1331 | args=("Assessing the researched information...",) 1332 | ) 1333 | indicator_thread.daemon = True 1334 | indicator_thread.start() 1335 | 1336 | # Read the current research content 1337 | if not os.path.exists(self.document_path): 1338 | self.summary_ready = True 1339 | indicator_thread.join() 1340 | self.ui.update_output("No research data found to assess.") 1341 | self.research_paused = False 1342 | return 1343 | 1344 | with open(self.document_path, 'r', encoding='utf-8') as f: 1345 | content = f.read().strip() 1346 | 1347 | if not content: 1348 | self.summary_ready = True 1349 | indicator_thread.join() 1350 | self.ui.update_output("No research data was collected to assess.") 1351 | self.research_paused = False 1352 | return 1353 | 1354 | # Prepare the prompt for the AI assessment 1355 | assessment_prompt = f""" 1356 | Based on the following research content, please assess whether the original query "{self.original_query}" can be answered sufficiently with the collected information. 1357 | 1358 | Research Content: 1359 | {content} 1360 | 1361 | Instructions: 1362 | 1. If the research content provides enough information to answer the original query in detail, respond with: "The research is sufficient to answer the query." 1363 | 2. If not, respond with: "The research is insufficient and it would be advisable to continue gathering information." 1364 | 3. Do not provide any additional information or details. 1365 | 1366 | Assessment: 1367 | """ 1368 | 1369 | # Generate the assessment 1370 | assessment = self.llm.generate(assessment_prompt, max_tokens=200) 1371 | 1372 | # Stop the progress indicator 1373 | self.summary_ready = True 1374 | indicator_thread.join() 1375 | 1376 | # Display the assessment 1377 | self.ui.update_output("\nAssessment Result:") 1378 | self.ui.update_output(assessment.strip()) 1379 | 1380 | # Provide user with options to continue or quit 1381 | self.ui.update_output("\nEnter 'c' to continue the research or 'q' to terminate and generate the summary.") 1382 | self.awaiting_user_decision = True # Flag to indicate we are waiting for user's decision 1383 | 1384 | while self.awaiting_user_decision: 1385 | cmd = self.ui.get_input("Enter command ('c' to continue, 'q' to quit): ") 1386 | if cmd is None: 1387 | continue # Ignore invalid inputs 1388 | cmd = cmd.strip().lower() 1389 | if cmd == 'c': 1390 | self.ui.update_output("\nResuming research...") 1391 | self.research_paused = False 1392 | self.awaiting_user_decision = False 1393 | elif cmd == 'q': 1394 | self.ui.update_output("\nTerminating research and generating summary...") 1395 | self.awaiting_user_decision = False 1396 | self.should_terminate.set() 1397 | summary = self.terminate_research() 1398 | self.ui.update_output("\nFinal Research Summary:") 1399 | self.ui.update_output(summary) 1400 | break 1401 | else: 1402 | self.ui.update_output("Invalid command. Please enter 'c' to continue or 'q' to quit.") 1403 | 1404 | except Exception as e: 1405 | logger.error(f"Error during pause and assess: {str(e)}") 1406 | self.ui.update_output(f"Error during assessment: {str(e)}") 1407 | self.research_paused = False 1408 | finally: 1409 | self.summary_ready = True # Ensure the indicator thread can exit 1410 | 1411 | def get_progress(self) -> str: 1412 | """Get current research progress""" 1413 | status = 'Active' if self.is_active() else 'Stopped' 1414 | if self.research_paused: 1415 | status = 'Paused' 1416 | 1417 | return f""" 1418 | Research Progress: 1419 | - Original Query: {self.original_query} 1420 | - Sources analyzed: {len(self.searched_urls)} 1421 | - Status: {status} 1422 | - Current focus: {self.current_focus.area if self.current_focus else 'Initializing'} 1423 | """ 1424 | 1425 | def is_active(self) -> bool: 1426 | """Check if research is currently active""" 1427 | return (self.is_running and 1428 | self.research_thread and 1429 | self.research_thread.is_alive() and 1430 | not self.should_terminate.is_set()) 1431 | 1432 | def terminate_research(self) -> str: 1433 | """Terminate research and return to main terminal""" 1434 | try: 1435 | print("Initiating research termination...") 1436 | sys.stdout.flush() 1437 | 1438 | # Start progress indicator in a separate thread immediately 1439 | indicator_thread = threading.Thread(target=self.show_progress_indicator) 1440 | indicator_thread.daemon = True 1441 | indicator_thread.start() 1442 | 1443 | if not os.path.exists(self.document_path): 1444 | self.summary_ready = True 1445 | indicator_thread.join(timeout=1.0) 1446 | self._cleanup() 1447 | return "No research data found to summarize." 1448 | 1449 | with open(self.document_path, 'r', encoding='utf-8') as f: 1450 | content = f.read().strip() 1451 | self.research_content = content # Store for conversation mode 1452 | 1453 | if not content or content == "Research Findings:\n\n": 1454 | self.summary_ready = True 1455 | indicator_thread.join(timeout=1.0) 1456 | self._cleanup() 1457 | return "No research data was collected to summarize." 1458 | 1459 | try: 1460 | # Generate summary using LLM 1461 | summary_prompt = f""" 1462 | Analyze the following content to provide a comprehensive research summary and a response to the user's original query "{self.original_query}" ensuring that you conclusively answer the query in detail: 1463 | 1464 | Research Content: 1465 | {content} 1466 | 1467 | Important Instructions: 1468 | > Summarize the research findings that are relevant to the Original topic/question: "{self.original_query}" 1469 | > Ensure that in your summary you directly answer the original question/topic conclusively to the best of your ability in detail. 1470 | > Read the original topic/question again "{self.original_query}" and abide by any additional instructions that it contains, exactly as instructed in your summary otherwise provide it normally should it not have any specific instructions 1471 | 1472 | Summary: 1473 | """ 1474 | 1475 | summary = self.llm.generate(summary_prompt, max_tokens=4000) 1476 | 1477 | # Signal that summary is complete to stop the progress indicator 1478 | self.summary_ready = True 1479 | indicator_thread.join(timeout=1.0) 1480 | 1481 | # Store summary and mark research as complete 1482 | self.research_summary = summary 1483 | self.research_complete = True 1484 | 1485 | # Format summary 1486 | formatted_summary = f""" 1487 | {'='*80} 1488 | RESEARCH SUMMARY 1489 | {'='*80} 1490 | 1491 | Original Query: {self.original_query} 1492 | Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} 1493 | 1494 | {summary} 1495 | 1496 | {'='*80} 1497 | End of Summary 1498 | {'='*80} 1499 | """ 1500 | 1501 | # Write to document 1502 | with open(self.document_path, 'a', encoding='utf-8') as f: 1503 | f.write("\n\n" + formatted_summary) 1504 | 1505 | # Clean up research UI 1506 | if hasattr(self, 'ui') and self.ui: 1507 | self.ui.cleanup() 1508 | 1509 | return formatted_summary 1510 | 1511 | except Exception as e: 1512 | self.summary_ready = True 1513 | indicator_thread.join(timeout=1.0) 1514 | raise e 1515 | 1516 | except Exception as e: 1517 | error_msg = f"Error generating summary: {str(e)}" 1518 | logger.error(error_msg) 1519 | return error_msg 1520 | 1521 | finally: 1522 | # Clean up research UI 1523 | self._cleanup_research_ui() 1524 | 1525 | 1526 | if __name__ == "__main__": 1527 | from llm_wrapper import LLMWrapper 1528 | from llm_response_parser import UltimateLLMResponseParser 1529 | from Self_Improving_Search import EnhancedSelfImprovingSearch 1530 | 1531 | try: 1532 | print(f"{Fore.CYAN}Initializing Research System...{Style.RESET_ALL}") 1533 | llm = LLMWrapper() 1534 | parser = UltimateLLMResponseParser() 1535 | search_engine = EnhancedSelfImprovingSearch(llm, parser) 1536 | manager = ResearchManager(llm, parser, search_engine) 1537 | 1538 | print(f"{Fore.GREEN}System initialized. Enter your research topic or 'quit' to exit.{Style.RESET_ALL}") 1539 | while True: 1540 | try: 1541 | topic = ResearchManager.get_initial_input() 1542 | if topic.lower() == 'quit': 1543 | break 1544 | 1545 | if not topic: 1546 | continue 1547 | 1548 | if not topic.startswith('@'): 1549 | print(f"{Fore.YELLOW}Please start your research query with '@'{Style.RESET_ALL}") 1550 | continue 1551 | 1552 | topic = topic[1:] # Remove @ prefix 1553 | manager.start_research(topic) 1554 | summary = manager.terminate_research() 1555 | print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}") 1556 | print(summary) 1557 | print(f"\n{Fore.GREEN}Research completed. Ready for next topic.{Style.RESET_ALL}\n") 1558 | 1559 | except KeyboardInterrupt: 1560 | print(f"\n{Fore.YELLOW}Operation cancelled. Ready for next topic.{Style.RESET_ALL}") 1561 | if 'manager' in locals(): 1562 | manager.terminate_research() 1563 | continue 1564 | 1565 | except KeyboardInterrupt: 1566 | print(f"\n{Fore.YELLOW}Research system shutting down.{Style.RESET_ALL}") 1567 | if 'manager' in locals(): 1568 | manager.terminate_research() 1569 | except Exception as e: 1570 | print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}") 1571 | logger.error("Critical error in main loop", exc_info=True) 1572 | 1573 | if os.name == 'nt': 1574 | print(f"{Fore.YELLOW}Running on Windows - Some features may be limited{Style.RESET_ALL}") 1575 | --------------------------------------------------------------------------------