├── requirements.txt
├── LICENSE
├── llm_config.py
├── llm_wrapper.py
├── web_scraper.py
├── strategic_analysis_parser.py
├── README.md
├── llm_response_parser.py
├── Web-LLM.py
├── Self_Improving_Search.py
└── research_manager.py


/requirements.txt:
--------------------------------------------------------------------------------
 1 | requests>=2.31.0
 2 | beautifulsoup4>=4.12.2
 3 | colorama>=0.4.6
 4 | python-dotenv>=1.0.0
 5 | tenacity>=8.2.3
 6 | tiktoken>=0.5.1
 7 | urllib3>=2.1.0
 8 | duckduckgo-search>=3.9.3
 9 | selenium>=4.15.2
10 | webdriver-manager>=4.0.1
11 | fake-useragent>=1.4.0
12 | html2text>=2020.1.16
13 | markdownify>=0.11.6
14 | readability-lxml>=0.8.1
15 | pyyaml>=6.0.1
16 | rich>=13.7.0
17 | httpx>=0.25.1
18 | pytest>=7.4.3
19 | black>=23.11.0
20 | isort>=5.12.0
21 | flake8>=6.1.0
22 | mypy>=1.7.0
23 | llama-cpp-python
24 | windows-curses>=2.3.1
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 James Warburton
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/llm_config.py:
--------------------------------------------------------------------------------
 1 | # llm_config.py
 2 | 
 3 | LLM_TYPE = "ollama"  # Options: 'llama_cpp', 'ollama'
 4 | 
 5 | # LLM settings for llama_cpp
 6 | MODEL_PATH = "/home/james/llama.cpp/models/gemma-2-9b-it-Q6_K.gguf" # Replace with your llama.cpp models filepath
 7 | 
 8 | LLM_CONFIG_LLAMA_CPP = {
 9 |     "llm_type": "llama_cpp",
10 |     "model_path": MODEL_PATH,
11 |     "n_ctx": 20000,  # context size
12 |     "n_gpu_layers": 0,  # number of layers to offload to GPU (-1 for all, 0 for none)
13 |     "n_threads": 8,  # number of threads to use
14 |     "temperature": 0.7,  # temperature for sampling
15 |     "top_p": 0.9,  # top p for sampling
16 |     "top_k": 40,  # top k for sampling
17 |     "repeat_penalty": 1.1,  # repeat penalty
18 |     "max_tokens": 1024,  # max tokens to generate
19 |     "stop": ["User:", "\n\n"]  # stop sequences
20 | }
21 | 
22 | # LLM settings for Ollama
23 | LLM_CONFIG_OLLAMA = {
24 |     "llm_type": "ollama",
25 |     "base_url": "http://localhost:11434",  # default Ollama server URL
26 |     "model_name": "custom-phi3-32k-Q4_K_M",  # Replace with your Ollama model name
27 |     "temperature": 0.7,
28 |     "top_p": 0.9,
29 |     "n_ctx": 55000,
30 |     "context_length": 55000,
31 |     "stop": ["User:", "\n\n"]
32 | }
33 | 
34 | def get_llm_config():
35 |     if LLM_TYPE == "llama_cpp":
36 |         return LLM_CONFIG_LLAMA_CPP
37 |     elif LLM_TYPE == "ollama":
38 |         return LLM_CONFIG_OLLAMA
39 |     else:
40 |         raise ValueError(f"Invalid LLM_TYPE: {LLM_TYPE}")
41 | 


--------------------------------------------------------------------------------
/llm_wrapper.py:
--------------------------------------------------------------------------------
 1 | from llama_cpp import Llama
 2 | import requests
 3 | import json
 4 | from llm_config import get_llm_config
 5 | 
 6 | class LLMWrapper:
 7 |     def __init__(self):
 8 |         self.llm_config = get_llm_config()
 9 |         self.llm_type = self.llm_config.get('llm_type', 'llama_cpp')
10 |         if self.llm_type == 'llama_cpp':
11 |             self.llm = self._initialize_llama_cpp()
12 |         elif self.llm_type == 'ollama':
13 |             self.base_url = self.llm_config.get('base_url', 'http://localhost:11434')
14 |             self.model_name = self.llm_config.get('model_name', 'your_model_name')
15 |         else:
16 |             raise ValueError(f"Unsupported LLM type: {self.llm_type}")
17 | 
18 |     def _initialize_llama_cpp(self):
19 |         return Llama(
20 |             model_path=self.llm_config.get('model_path'),
21 |             n_ctx=self.llm_config.get('n_ctx', 55000),
22 |             n_gpu_layers=self.llm_config.get('n_gpu_layers', 0),
23 |             n_threads=self.llm_config.get('n_threads', 8),
24 |             verbose=False
25 |         )
26 | 
27 |     def generate(self, prompt, **kwargs):
28 |         if self.llm_type == 'llama_cpp':
29 |             llama_kwargs = self._prepare_llama_kwargs(kwargs)
30 |             response = self.llm(prompt, **llama_kwargs)
31 |             return response['choices'][0]['text'].strip()
32 |         elif self.llm_type == 'ollama':
33 |             return self._ollama_generate(prompt, **kwargs)
34 |         else:
35 |             raise ValueError(f"Unsupported LLM type: {self.llm_type}")
36 | 
37 |     def _ollama_generate(self, prompt, **kwargs):
38 |         url = f"{self.base_url}/api/generate"
39 |         data = {
40 |             'model': self.model_name,
41 |             'prompt': prompt,
42 |             'options': {
43 |                 'temperature': kwargs.get('temperature', self.llm_config.get('temperature', 0.7)),
44 |                 'top_p': kwargs.get('top_p', self.llm_config.get('top_p', 0.9)),
45 |                 'stop': kwargs.get('stop', self.llm_config.get('stop', [])),
46 |                 'num_predict': kwargs.get('max_tokens', self.llm_config.get('max_tokens', 55000)),
47 |                 'context_length': self.llm_config.get('n_ctx', 55000)
48 |             }
49 |         }
50 |         response = requests.post(url, json=data, stream=True)
51 |         if response.status_code != 200:
52 |             raise Exception(f"Ollama API request failed with status {response.status_code}: {response.text}")
53 |         text = ''.join(json.loads(line)['response'] for line in response.iter_lines() if line)
54 |         return text.strip()
55 | 
56 |     def _cleanup(self):
57 |         """Force terminate any running LLM processes"""
58 |         if self.llm_type == 'ollama':
59 |             try:
60 |                 # Force terminate Ollama process
61 |                 requests.post(f"{self.base_url}/api/terminate")
62 |             except:
63 |                 pass
64 | 
65 |             try:
66 |                 # Also try to terminate via subprocess if needed
67 |                 import subprocess
68 |                 subprocess.run(['pkill', '-f', 'ollama'], capture_output=True)
69 |             except:
70 |                 pass
71 | 
72 |     def _prepare_llama_kwargs(self, kwargs):
73 |         llama_kwargs = {
74 |             'max_tokens': kwargs.get('max_tokens', self.llm_config.get('max_tokens', 55000)),
75 |             'temperature': kwargs.get('temperature', self.llm_config.get('temperature', 0.7)),
76 |             'top_p': kwargs.get('top_p', self.llm_config.get('top_p', 0.9)),
77 |             'stop': kwargs.get('stop', self.llm_config.get('stop', [])),
78 |             'echo': False,
79 |         }
80 |         return llama_kwargs
81 | 


--------------------------------------------------------------------------------
/web_scraper.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from bs4 import BeautifulSoup
  3 | from urllib.robotparser import RobotFileParser
  4 | from urllib.parse import urlparse, urljoin
  5 | import time
  6 | import logging
  7 | from concurrent.futures import ThreadPoolExecutor, as_completed
  8 | import re
  9 | 
 10 | # Set up logging
 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | class WebScraper:
 15 |     def __init__(self, user_agent="WebLLMAssistant/1.0 (+https://github.com/YourUsername/Web-LLM-Assistant-Llama-cpp)",
 16 |                  rate_limit=1, timeout=10, max_retries=3):
 17 |         self.session = requests.Session()
 18 |         self.session.headers.update({"User-Agent": user_agent})
 19 |         self.robot_parser = RobotFileParser()
 20 |         self.rate_limit = rate_limit
 21 |         self.timeout = timeout
 22 |         self.max_retries = max_retries
 23 |         self.last_request_time = {}
 24 | 
 25 |     def can_fetch(self, url):
 26 |         parsed_url = urlparse(url)
 27 |         robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
 28 |         self.robot_parser.set_url(robots_url)
 29 |         try:
 30 |             self.robot_parser.read()
 31 |             return self.robot_parser.can_fetch(self.session.headers["User-Agent"], url)
 32 |         except Exception as e:
 33 |             logger.warning(f"Error reading robots.txt for {url}: {e}")
 34 |             return True  # Assume allowed if robots.txt can't be read
 35 | 
 36 |     def respect_rate_limit(self, url):
 37 |         domain = urlparse(url).netloc
 38 |         current_time = time.time()
 39 |         if domain in self.last_request_time:
 40 |             time_since_last_request = current_time - self.last_request_time[domain]
 41 |             if time_since_last_request < self.rate_limit:
 42 |                 time.sleep(self.rate_limit - time_since_last_request)
 43 |         self.last_request_time[domain] = time.time()
 44 | 
 45 |     def scrape_page(self, url):
 46 |         if not self.can_fetch(url):
 47 |             logger.info(f"Robots.txt disallows scraping: {url}")
 48 |             return None
 49 | 
 50 |         for attempt in range(self.max_retries):
 51 |             try:
 52 |                 self.respect_rate_limit(url)
 53 |                 response = self.session.get(url, timeout=self.timeout)
 54 |                 response.raise_for_status()
 55 |                 return self.extract_content(response.text, url)
 56 |             except requests.RequestException as e:
 57 |                 logger.warning(f"Error scraping {url} (attempt {attempt + 1}/{self.max_retries}): {e}")
 58 |                 if attempt == self.max_retries - 1:
 59 |                     logger.error(f"Failed to scrape {url} after {self.max_retries} attempts")
 60 |                     return None
 61 |                 time.sleep(2 ** attempt)  # Exponential backoff
 62 | 
 63 |     def extract_content(self, html, url):
 64 |         soup = BeautifulSoup(html, 'html.parser')
 65 | 
 66 |         # Remove unwanted elements
 67 |         for element in soup(["script", "style", "nav", "footer", "header"]):
 68 |             element.decompose()
 69 | 
 70 |         # Extract title
 71 |         title = soup.title.string if soup.title else ""
 72 | 
 73 |         # Try to find main content
 74 |         main_content = soup.find('main') or soup.find('article') or soup.find('div', class_='content')
 75 | 
 76 |         if main_content:
 77 |             paragraphs = main_content.find_all('p')
 78 |         else:
 79 |             paragraphs = soup.find_all('p')
 80 | 
 81 |         # Extract text from paragraphs
 82 |         text = ' '.join([p.get_text().strip() for p in paragraphs])
 83 | 
 84 |         # If no paragraphs found, get all text
 85 |         if not text:
 86 |             text = soup.get_text()
 87 | 
 88 |         # Clean up whitespace
 89 |         text = re.sub(r'\s+', ' ', text).strip()
 90 | 
 91 |         # Extract and resolve links
 92 |         links = [urljoin(url, a['href']) for a in soup.find_all('a', href=True)]
 93 | 
 94 |         return {
 95 |             "url": url,
 96 |             "title": title,
 97 |             "content": text[:2400],  # Limit to first 2400 characters
 98 |             "links": links[:10]  # Limit to first 10 links
 99 |         }
100 | 
101 | def scrape_multiple_pages(urls, max_workers=5):
102 |     scraper = WebScraper()
103 |     results = {}
104 | 
105 |     with ThreadPoolExecutor(max_workers=max_workers) as executor:
106 |         future_to_url = {executor.submit(scraper.scrape_page, url): url for url in urls}
107 |         for future in as_completed(future_to_url):
108 |             url = future_to_url[future]
109 |             try:
110 |                 data = future.result()
111 |                 if data:
112 |                     results[url] = data
113 |                     logger.info(f"Successfully scraped: {url}")
114 |                 else:
115 |                     logger.warning(f"Failed to scrape: {url}")
116 |             except Exception as exc:
117 |                 logger.error(f"{url} generated an exception: {exc}")
118 | 
119 |     return results
120 | 
121 | # Function to integrate with your main system
122 | def get_web_content(urls):
123 |     scraped_data = scrape_multiple_pages(urls)
124 |     return {url: data['content'] for url, data in scraped_data.items() if data}
125 | 
126 | # Standalone can_fetch function
127 | def can_fetch(url):
128 |     parsed_url = urlparse(url)
129 |     robots_url = f"{parsed_url.scheme}://{parsed_url.netloc}/robots.txt"
130 |     rp = RobotFileParser()
131 |     rp.set_url(robots_url)
132 |     try:
133 |         rp.read()
134 |         return rp.can_fetch("*", url)
135 |     except Exception as e:
136 |         logger.warning(f"Error reading robots.txt for {url}: {e}")
137 |         return True  # Assume allowed if robots.txt can't be read
138 | 
139 | if __name__ == "__main__":
140 |     test_urls = [
141 |         "https://en.wikipedia.org/wiki/Web_scraping",
142 |         "https://example.com",
143 |         "https://www.python.org"
144 |     ]
145 |     scraped_content = get_web_content(test_urls)
146 |     for url, content in scraped_content.items():
147 |         print(f"Content from {url}:")
148 |         print(content[:500])  # Print first 500 characters
149 |         print("\n---\n")
150 | 


--------------------------------------------------------------------------------
/strategic_analysis_parser.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Dict, Optional, Union
  2 | import re
  3 | import logging
  4 | from dataclasses import dataclass
  5 | from datetime import datetime
  6 | 
  7 | @dataclass
  8 | class ResearchFocus:
  9 |     """Represents a specific area of research focus"""
 10 |     area: str
 11 |     priority: int
 12 |     source_query: str = ""
 13 |     timestamp: str = ""
 14 |     search_queries: List[str] = None
 15 | 
 16 |     def __post_init__(self):
 17 |         if not self.timestamp:
 18 |             self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 19 |         if self.search_queries is None:
 20 |             self.search_queries = []
 21 | 
 22 | @dataclass
 23 | class AnalysisResult:
 24 |     """Contains the complete analysis result"""
 25 |     original_question: str
 26 |     focus_areas: List[ResearchFocus]
 27 |     raw_response: str
 28 |     timestamp: str = ""
 29 |     confidence_score: float = 0.0
 30 | 
 31 |     def __post_init__(self):
 32 |         if not self.timestamp:
 33 |             self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 34 | 
 35 | # Set up logging
 36 | logger = logging.getLogger(__name__)
 37 | 
 38 | class StrategicAnalysisParser:
 39 |     """Enhanced parser with improved pattern matching and validation"""
 40 |     def __init__(self):
 41 |         self.patterns = {
 42 |             'original_question': [
 43 |                 r"(?i)original question analysis:\s*(.*?)(?=research gap|$)",
 44 |                 r"(?i)original query:\s*(.*?)(?=research gap|$)",
 45 |                 r"(?i)research question:\s*(.*?)(?=research gap|$)",
 46 |                 r"(?i)topic analysis:\s*(.*?)(?=research gap|$)"
 47 |             ],
 48 |             'research_gaps': [
 49 |                 r"(?i)research gaps?:\s*",
 50 |                 r"(?i)gaps identified:\s*",
 51 |                 r"(?i)areas for research:\s*",
 52 |                 r"(?i)investigation areas:\s*"
 53 |             ],
 54 |             'priority': [
 55 |                 r"(?i)priority:\s*(\d+)",
 56 |                 r"(?i)priority level:\s*(\d+)",
 57 |                 r"(?i)\(priority:\s*(\d+)\)",
 58 |                 r"(?i)importance:\s*(\d+)"
 59 |             ]
 60 |         }
 61 |         self.logger = logging.getLogger(__name__)
 62 | 
 63 |     def parse_analysis(self, llm_response: str) -> Optional[AnalysisResult]:
 64 |         """Main parsing method with improved validation"""
 65 |         try:
 66 |             # Clean and normalize the response
 67 |             cleaned_response = self._clean_text(llm_response)
 68 | 
 69 |             # Extract original question with validation
 70 |             original_question = self._extract_original_question(cleaned_response)
 71 |             if not original_question:
 72 |                 self.logger.warning("Failed to extract original question")
 73 |                 original_question = "Original question extraction failed"
 74 | 
 75 |             # Extract and validate research areas
 76 |             focus_areas = self._extract_research_areas(cleaned_response)
 77 |             focus_areas = self._normalize_focus_areas(focus_areas)
 78 | 
 79 |             # Calculate confidence score
 80 |             confidence_score = self._calculate_confidence_score(original_question, focus_areas)
 81 | 
 82 |             return AnalysisResult(
 83 |                 original_question=original_question,
 84 |                 focus_areas=focus_areas,
 85 |                 raw_response=llm_response,
 86 |                 confidence_score=confidence_score
 87 |             )
 88 | 
 89 |         except Exception as e:
 90 |             self.logger.error(f"Error in parse_analysis: {str(e)}")
 91 |             return None
 92 | 
 93 |     def _clean_text(self, text: str) -> str:
 94 |         """Clean and normalize text for parsing"""
 95 |         text = re.sub(r'\n{3,}', '\n\n', text)
 96 |         text = re.sub(r'\s{2,}', ' ', text)
 97 |         text = re.sub(r'(\d+\))', r'\1.', text)
 98 |         return text.strip()
 99 | 
100 |     def _extract_original_question(self, text: str) -> str:
101 |         """Extract original question with improved matching"""
102 |         for pattern in self.patterns['original_question']:
103 |             match = re.search(pattern, text, re.DOTALL)
104 |             if match:
105 |                 return self._clean_text(match.group(1))
106 |         return ""
107 | 
108 |     def _extract_research_areas(self, text: str) -> List[ResearchFocus]:
109 |         """Extract research areas with enhanced validation"""
110 |         areas = []
111 |         for pattern in self.patterns['research_gaps']:
112 |             gap_match = re.search(pattern, text)
113 |             if gap_match:
114 |                 sections = re.split(r'\n\s*\d+[\.)]\s+', text[gap_match.end():])
115 |                 sections = [s for s in sections if s.strip()]
116 | 
117 |                 for section in sections:
118 |                     focus = self._parse_research_focus(section)
119 |                     if focus and self._is_valid_focus(focus):
120 |                         areas.append(focus)
121 |                 break
122 |         return areas
123 | 
124 |     def _parse_research_focus(self, text: str) -> Optional[ResearchFocus]:
125 |         """Parse research focus with improved validation without reasoning."""
126 |         try:
127 |             # Extract area
128 |             area = text.split('\n')[0].strip()
129 | 
130 |             # Extract and validate priority
131 |             priority = self._extract_priority(text)
132 | 
133 |             # Return ResearchFocus without reasoning
134 |             return ResearchFocus(
135 |                 area=area,
136 |                 priority=priority
137 |             )
138 | 
139 |         except Exception as e:
140 |             self.logger.error(f"Error parsing research focus: {str(e)}")
141 |             return None
142 | 
143 |     def _extract_priority(self, text: str) -> int:
144 |         """Extract priority with validation"""
145 |         for pattern in self.patterns['priority']:
146 |             priority_match = re.search(pattern, text)
147 |             if priority_match:
148 |                 try:
149 |                     priority = int(priority_match.group(1))
150 |                     return max(1, min(5, priority))
151 |                 except ValueError:
152 |                     continue
153 |         return 3  # Default priority
154 | 
155 |     def _is_valid_focus(self, focus: ResearchFocus) -> bool:
156 |         """Validate research focus completeness and quality"""
157 |         if not focus.area:  # Only check if area exists and isn't empty
158 |             return False
159 |         if focus.priority < 1 or focus.priority > 5:
160 |             return False
161 |         return True
162 | 
163 |     def _normalize_focus_areas(self, areas: List[ResearchFocus]) -> List[ResearchFocus]:
164 |         """Normalize and validate focus areas"""
165 |         normalized = []
166 |         for area in areas:
167 |             if not area.area.strip():
168 |                 continue
169 | 
170 |             area.priority = max(1, min(5, area.priority))
171 | 
172 |             if self._is_valid_focus(area):
173 |                 normalized.append(area)
174 | 
175 |         # Sort by priority (highest first) but don't add any filler areas
176 |         normalized.sort(key=lambda x: x.priority, reverse=True)
177 | 
178 |         return normalized
179 | 
180 |     def _calculate_confidence_score(self, question: str, areas: List[ResearchFocus]) -> float:
181 |         """Calculate confidence score for analysis quality"""
182 |         score = 0.0
183 | 
184 |         # Question quality (0.3)
185 |         if question and len(question.split()) >= 3:
186 |             score += 0.3
187 | 
188 |         # Areas quality (0.7)
189 |         if areas:
190 |             # Valid areas ratio (0.35) - now based on proportion that are valid vs total
191 |             num_areas = len(areas)
192 |             if num_areas > 0:  # Avoid division by zero
193 |                 valid_areas = sum(1 for a in areas if self._is_valid_focus(a))
194 |                 score += 0.35 * (valid_areas / num_areas)
195 | 
196 |             # Priority distribution (0.35) - now based on having different priorities
197 |             if num_areas > 0:  # Avoid division by zero
198 |                 unique_priorities = len(set(a.priority for a in areas))
199 |                 score += 0.35 * (unique_priorities / num_areas)
200 | 
201 |         return round(score, 2)
202 | 
203 |     def format_analysis_result(self, result: AnalysisResult) -> str:
204 |         """Format analysis result for display without reasoning."""
205 |         formatted = [
206 |             "Strategic Analysis Result",
207 |             "=" * 80,
208 |             f"\nOriginal Question Analysis:\n{result.original_question}\n",
209 |             f"Analysis Confidence Score: {result.confidence_score}",
210 |             "\nResearch Focus Areas:"
211 |         ]
212 | 
213 |         for i, focus in enumerate(result.focus_areas, 1):
214 |             formatted.extend([
215 |                 f"\n{i}. {focus.area}",
216 |                 f"   Priority: {focus.priority}"
217 |             ])
218 | 
219 |         return "\n".join(formatted)
220 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Automated-AI-Web-Researcher-Ollama (Windows)
  2 | # This is a Windows-optimized version of the Automated-AI-Web-Researcher-Ollama, modified from the original Unix-based implementation to provide full Windows compatibility.
  3 | 
  4 | 
  5 | ## Description
  6 | Automated-AI-Web-Researcher is an innovative research assistant that leverages locally-run large language models through Ollama to conduct thorough, automated online research on any given topic or question. Unlike traditional LLM interactions, this tool actually performs structured research by breaking down queries into focused research areas, systematically investigating via web searching and then scraping of relevant websites each area, and compiling it's findings all saved automatically into a text document with all content found and links for the source of each, and whenever you want it to stop it's research you can input a command which then results in the research terminating and the LLM reviewing all the content it found and providing a comprehensive final summary to your original topic or question, and then you can also ask the LLM questions about it's research findings if you would like.
  7 | 
  8 | ## Project Demonstration
  9 | 
 10 | [![My Project Demo](https://img.youtube.com/vi/hS7Q1B8N1mQ/0.jpg)](https://youtu.be/hS7Q1B8N1mQ "My Project Demo")
 11 | 
 12 | Click the image above to watch the demonstration of My Project.
 13 | 
 14 | ## Here's how it works:
 15 | 
 16 | 1. You provide a research query (e.g., "What year will global population begin to decrease rather than increase according to research?")
 17 | 2. The LLM analyzes your query and generates 5 specific research focus areas, each with assigned priorities based on relevance to the topic or question.
 18 | 3. Starting with the highest priority area, the LLM:
 19 |    - Formulates targeted search queries
 20 |    - Performs web searches
 21 |    - Analyzes search results selecting the most relevant web pages
 22 |    - Scrapes and extracts relevant information for selected web pages
 23 |    - Documents all content it has found during the research session into a research text file including links to websites that the content was retrieved from
 24 | 4. After investigating all focus areas, the LLM based on information is found generates new focus areas, and repeating it's research cycle, often finding new relevant focus areas based on findings in research it has previously found leading to interesting and novel research focuses in some cases.
 25 | 5. You can let it research as long as you would like at any time being able to input a quit command which then stops the research and causes the LLM to review all the content collected so far in full and generate a comprehensive summary to respond to your original query or topic. 
 26 | 6. Then the LLM will enter a conversation mode where you can ask specific questions about the research findings if desired.
 27 | 
 28 | The key distinction is that this isn't just a chatbot - it's an automated research assistant that methodically investigates topics and maintains a documented research trail all from a single question or topic of your choosing, and depending on your system and model can do over a hundred searches and content retrievals in a relatively short amount of time, you can leave it running and come back to a full text document with over a hundred pieces of content from relevant websites, and then have it summarise the findings and then even ask it questions about what it found.
 29 | 
 30 | ## Features
 31 | - Automated research planning with prioritized focus areas
 32 | - Systematic web searching and content analysis
 33 | - All research content and source URLs saved into a detailed text document
 34 | - Research summary generation
 35 | - Post-research Q&A capability about findings
 36 | - Self-improving search mechanism
 37 | - Rich console output with status indicators
 38 | - Comprehensive answer synthesis using web-sourced information
 39 | - Research conversation mode for exploring findings
 40 | 
 41 | ## Installation on Windows
 42 | 
 43 | Python 3.11 with a virtual environment is recommended.
 44 | 
 45 | 1. Clone the repository:
 46 | 
 47 | ```sh
 48 | git clone https://github.com/hafeezhmha/Automated-AI-Web-Researcher-Ollama.git
 49 | cd Automated-AI-Web-Researcher-Ollama
 50 | ```
 51 | 
 52 | 2. Create and activate a virtual environment:
 53 | 
 54 | ```sh
 55 | python -m venv venv
 56 | venv\Scripts\activate
 57 | ```
 58 | 
 59 | 3. Install dependencies:
 60 | 
 61 | ```sh
 62 | pip install -r requirements.txt
 63 | ```
 64 | 
 65 | 4. Install and Configure Ollama:
 66 | - Install Ollama following instructions at https://ollama.ai
 67 | - Using your selected model file, create a custom model variant with the required context length
 68 |   (phi3:3.8b-mini-128k-instruct or phi3:14b-medium-128k-instruct are recommended)
 69 | 
 70 | Create a file named `modelfile` with these exact contents:
 71 | 
 72 | ```
 73 | FROM your-model-name
 74 | 
 75 | PARAMETER num_ctx 38000
 76 | ```
 77 | 
 78 | Replace "your-model-name" with your chosen model (e.g., phi3:medium-128k or phi3:medium).
 79 | 
 80 | Then create the model:
 81 | 
 82 | ```sh
 83 | ollama create research-phi3 -f modelfile
 84 | ```
 85 | 
 86 | Note: This specific configuration is necessary as recent Ollama versions have reduced context windows on models like phi3:3.8b-mini-128k-instruct despite the name suggesing high context which is why the modelfile step is necessary due to the high amount of information being used during the research process. 
 87 | 
 88 | ## Usage
 89 | 
 90 | 1. Start Ollama:
 91 | 
 92 | ```sh
 93 | ollama serve
 94 | ```
 95 | 
 96 | 2. Run the researcher:
 97 | 
 98 | ```sh
 99 | python Web-LLM.py
100 | ```
101 | 
102 | 3. Start a research session:
103 | - Type `@` followed by your research query
104 | - Press CTRL+Z to submit
105 | - Example: `@What year is global population projected to start declining?`
106 | 
107 | 4. During research you can use the following commands by typing the letter associated with each and submitting with CTRL+Z:
108 | - Use `s` to show status.
109 | - Use `f` to show current focus.
110 | - Use `p` to pause and assess research progress, which will give you an assessment from the LLM after reviewing the entire research content whether it can answer your query or not with the content it has so far collected, then it waits for you to input one of two commands, `c` to continue with the research or `q` to terminate it which will result in a summary like if you terminated it without using the pause feature.
111 | - Use `q` to quit research.
112 | 
113 | 5. After research completes:
114 | - Wait for the summary to be generated, and review the LLM's findings.
115 | - Enter conversation mode to ask specific questions about the findings.
116 | - Access the detailed research content found, avaliable in the in a research session text file which will appear in the programs directory, which includes:
117 |   * All retrieved content
118 |   * Source URLs for all information
119 |   * Focus areas investigated
120 |   * Generated summary
121 | 
122 | ## Configuration
123 | 
124 | The LLM settings can be modified in `llm_config.py`. You must specify your model name in the configuration for the researcher to function. The default configuration is optimized for research tasks with the specified Phi-3 model.
125 | 
126 | ## Current Status
127 | This is a prototype that demonstrates functional automated research capabilities. While still in development, it successfully performs structured research tasks. Currently tested and working well with the phi3:3.8b-mini-128k-instruct model when the context is set as advised previously.
128 | 
129 | ## Dependencies
130 | - Ollama
131 | - Python packages listed in requirements.txt
132 | - Recommended model: phi3:3.8b-mini-128k-instruct or phi3:14b-medium-128k-instruct (with custom context length as specified)
133 | 
134 | ## Contributing
135 | Contributions are welcome! This is a prototype with room for improvements and new features.
136 | 
137 | ## License
138 | This project is licensed under the MIT License - see the [LICENSE] file for details.
139 | 
140 | ## Acknowledgments
141 | - Ollama team for their local LLM runtime
142 | - DuckDuckGo for their search API
143 | 
144 | ## Personal Note
145 | This tool represents an attempt to bridge the gap between simple LLM interactions and genuine research capabilities. By structuring the research process and maintaining documentation, it aims to provide more thorough and verifiable results than traditional LLM conversations. It also represents an attempt to improve on my previous project 'Web-LLM-Assistant-Llamacpp-Ollama' which simply gave LLM's the ability to search and scrape websites to answer questions. This new program, unlike it's predecessor I feel thos program takes that capability and uses it in a novel and actually very useful way, I feel that it is the most advanced and useful way I could conceive of building on my previous program, as a very new programmer this being my second ever program I feel very good about the result, I hope that it hits the mark! 
146 | Given how much I have now been using it myself, unlike the previous program which felt more like a novelty then an actual tool, this is actually quite useful and unique, but I am quite biased!
147 | 
148 | Please enjoy! and feel free to submit any suggestions for improvements, so that we can make this automated AI researcher even more capable.
149 | 
150 | ## Disclaimer
151 | This project is for educational purposes only. Ensure you comply with the terms of service of all APIs and services used.
152 | 


--------------------------------------------------------------------------------
/llm_response_parser.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import Dict, List, Union, Optional
  3 | import logging
  4 | import json
  5 | from strategic_analysis_parser import StrategicAnalysisParser, AnalysisResult, ResearchFocus
  6 | 
  7 | # Set up logging
  8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  9 | logger = logging.getLogger(__name__)
 10 | 
 11 | class UltimateLLMResponseParser:
 12 |     def __init__(self):
 13 |         self.decision_keywords = {
 14 |             'refine': ['refine', 'need more info', 'insufficient', 'unclear', 'more research', 'additional search'],
 15 |             'answer': ['answer', 'sufficient', 'enough info', 'can respond', 'adequate', 'comprehensive']
 16 |         }
 17 |         self.section_identifiers = [
 18 |             ('decision', r'(?i)decision\s*:'),
 19 |             ('reasoning', r'(?i)reasoning\s*:'),
 20 |             ('selected_results', r'(?i)selected results\s*:'),
 21 |             ('response', r'(?i)response\s*:')
 22 |         ]
 23 |         # Initialize strategic analysis parser
 24 |         self.strategic_parser = StrategicAnalysisParser()
 25 | 
 26 |     def parse_llm_response(self, response: str, mode: str = 'search') -> Dict[str, Union[str, List[int], AnalysisResult]]:
 27 |         """
 28 |         Parse LLM response based on mode
 29 | 
 30 |         Args:
 31 |             response (str): The LLM's response text
 32 |             mode (str): 'search' for web search, 'research' for strategic analysis
 33 | 
 34 |         Returns:
 35 |             Dict containing parsed response
 36 |         """
 37 |         logger.info(f"Starting to parse LLM response in {mode} mode")
 38 | 
 39 |         if mode == 'research':
 40 |             return self._parse_research_response(response)
 41 | 
 42 |         # Original search mode parsing
 43 |         result = {
 44 |             'decision': None,
 45 |             'reasoning': None,
 46 |             'selected_results': [],
 47 |             'response': None
 48 |         }
 49 | 
 50 |         parsing_strategies = [
 51 |             self._parse_structured_response,
 52 |             self._parse_json_response,
 53 |             self._parse_unstructured_response,
 54 |             self._parse_implicit_response
 55 |         ]
 56 | 
 57 |         for strategy in parsing_strategies:
 58 |             try:
 59 |                 parsed_result = strategy(response)
 60 |                 if self._is_valid_result(parsed_result):
 61 |                     result.update(parsed_result)
 62 |                     logger.info(f"Successfully parsed using strategy: {strategy.__name__}")
 63 |                     break
 64 |             except Exception as e:
 65 |                 logger.warning(f"Error in parsing strategy {strategy.__name__}: {str(e)}")
 66 | 
 67 |         if not self._is_valid_result(result):
 68 |             logger.warning("All parsing strategies failed. Using fallback parsing.")
 69 |             result = self._fallback_parsing(response)
 70 | 
 71 |         result = self._post_process_result(result)
 72 | 
 73 |         logger.info("Finished parsing LLM response")
 74 |         return result
 75 | 
 76 |     def _parse_research_response(self, response: str) -> Dict[str, Union[str, AnalysisResult]]:
 77 |         """Handle research mode specific parsing"""
 78 |         try:
 79 |             analysis_result = self.strategic_parser.parse_analysis(response)
 80 |             if analysis_result:
 81 |                 return {
 82 |                     'mode': 'research',
 83 |                     'analysis_result': analysis_result,
 84 |                     'error': None
 85 |                 }
 86 |             else:
 87 |                 logger.error("Failed to parse strategic analysis")
 88 |                 return {
 89 |                     'mode': 'research',
 90 |                     'analysis_result': None,
 91 |                     'error': 'Failed to parse strategic analysis'
 92 |                 }
 93 |         except Exception as e:
 94 |             logger.error(f"Error in research response parsing: {str(e)}")
 95 |             return {
 96 |                 'mode': 'research',
 97 |                 'analysis_result': None,
 98 |                 'error': str(e)
 99 |             }
100 | 
101 |     def parse_search_query(self, query_response: str) -> Dict[str, str]:
102 |         """Parse search query formulation response"""
103 |         try:
104 |             lines = query_response.strip().split('\n')
105 |             result = {
106 |                 'query': '',
107 |                 'time_range': 'none'
108 |             }
109 | 
110 |             for line in lines:
111 |                 if ':' in line:
112 |                     key, value = line.split(':', 1)
113 |                     key = key.strip().lower()
114 |                     value = value.strip()
115 | 
116 |                     if 'query' in key:
117 |                         result['query'] = self._clean_query(value)
118 |                     elif 'time' in key or 'range' in key:
119 |                         result['time_range'] = self._validate_time_range(value)
120 | 
121 |             return result
122 |         except Exception as e:
123 |             logger.error(f"Error parsing search query: {str(e)}")
124 |             return {'query': '', 'time_range': 'none'}
125 | 
126 |     def _parse_structured_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
127 |         result = {}
128 |         for key, pattern in self.section_identifiers:
129 |             match = re.search(f'{pattern}(.*?)(?={"|".join([p for k, p in self.section_identifiers if k != key])}|$)',
130 |                             response, re.IGNORECASE | re.DOTALL)
131 |             if match:
132 |                 result[key] = match.group(1).strip()
133 | 
134 |         if 'selected_results' in result:
135 |             result['selected_results'] = self._extract_numbers(result['selected_results'])
136 | 
137 |         return result
138 | 
139 |     def _parse_json_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
140 |         try:
141 |             json_match = re.search(r'\{.*\}', response, re.DOTALL)
142 |             if json_match:
143 |                 json_str = json_match.group(0)
144 |                 parsed_json = json.loads(json_str)
145 |                 return {k: v for k, v in parsed_json.items()
146 |                        if k in ['decision', 'reasoning', 'selected_results', 'response']}
147 |         except json.JSONDecodeError:
148 |             pass
149 |         return {}
150 | 
151 |     def _parse_unstructured_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
152 |         result = {}
153 |         lines = response.split('\n')
154 |         current_section = None
155 | 
156 |         for line in lines:
157 |             section_match = re.match(r'(.+?)[:.-](.+)', line)
158 |             if section_match:
159 |                 key = self._match_section_to_key(section_match.group(1))
160 |                 if key:
161 |                     current_section = key
162 |                     result[key] = section_match.group(2).strip()
163 |             elif current_section:
164 |                 result[current_section] += ' ' + line.strip()
165 | 
166 |         if 'selected_results' in result:
167 |             result['selected_results'] = self._extract_numbers(result['selected_results'])
168 | 
169 |         return result
170 | 
171 |     def _parse_implicit_response(self, response: str) -> Dict[str, Union[str, List[int]]]:
172 |         result = {}
173 | 
174 |         decision = self._infer_decision(response)
175 |         if decision:
176 |             result['decision'] = decision
177 | 
178 |         numbers = self._extract_numbers(response)
179 |         if numbers:
180 |             result['selected_results'] = numbers
181 | 
182 |         if not result:
183 |             result['response'] = response.strip()
184 | 
185 |         return result
186 | 
187 |     def _fallback_parsing(self, response: str) -> Dict[str, Union[str, List[int]]]:
188 |         return {
189 |             'decision': self._infer_decision(response),
190 |             'reasoning': None,
191 |             'selected_results': self._extract_numbers(response),
192 |             'response': response.strip()
193 |         }
194 | 
195 |     def _post_process_result(self, result: Dict[str, Union[str, List[int]]]) -> Dict[str, Union[str, List[int]]]:
196 |         if result['decision'] not in ['refine', 'answer']:
197 |             result['decision'] = self._infer_decision(str(result))
198 | 
199 |         if not isinstance(result['selected_results'], list):
200 |             result['selected_results'] = self._extract_numbers(str(result['selected_results']))
201 | 
202 |         result['selected_results'] = result['selected_results'][:2]
203 | 
204 |         if not result['reasoning']:
205 |             result['reasoning'] = f"Based on the {'presence' if result['selected_results'] else 'absence'} of selected results and the overall content."
206 | 
207 |         if not result['response']:
208 |             result['response'] = result.get('reasoning', 'No clear response found.')
209 | 
210 |         return result
211 | 
212 |     def _match_section_to_key(self, section: str) -> Optional[str]:
213 |         for key, pattern in self.section_identifiers:
214 |             if re.search(pattern, section, re.IGNORECASE):
215 |                 return key
216 |         return None
217 | 
218 |     def _extract_numbers(self, text: str) -> List[int]:
219 |         return [int(num) for num in re.findall(r'\b(?:10|[1-9])\b', text)]
220 | 
221 |     def _infer_decision(self, text: str) -> str:
222 |         text = text.lower()
223 |         refine_score = sum(text.count(keyword) for keyword in self.decision_keywords['refine'])
224 |         answer_score = sum(text.count(keyword) for keyword in self.decision_keywords['answer'])
225 |         return 'refine' if refine_score > answer_score else 'answer'
226 | 
227 |     def _is_valid_result(self, result: Dict[str, Union[str, List[int]]]) -> bool:
228 |         return bool(result.get('decision') or result.get('response') or result.get('selected_results'))
229 | 
230 |     def _clean_query(self, query: str) -> str:
231 |         """Clean and validate search query"""
232 |         query = re.sub(r'["\'\[\]]', '', query)
233 |         query = re.sub(r'\s+', ' ', query)
234 |         return query.strip()[:100]
235 | 
236 |     def _validate_time_range(self, time_range: str) -> str:
237 |         """Validate time range value"""
238 |         valid_ranges = ['d', 'w', 'm', 'y', 'none']
239 |         time_range = time_range.lower()
240 |         return time_range if time_range in valid_ranges else 'none'
241 | 


--------------------------------------------------------------------------------
/Web-LLM.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | from colorama import init, Fore, Style
  4 | import logging
  5 | import time
  6 | from io import StringIO
  7 | from Self_Improving_Search import EnhancedSelfImprovingSearch
  8 | from llm_config import get_llm_config
  9 | from llm_response_parser import UltimateLLMResponseParser
 10 | from llm_wrapper import LLMWrapper
 11 | from strategic_analysis_parser import StrategicAnalysisParser
 12 | from research_manager import ResearchManager
 13 | 
 14 | # Initialize colorama
 15 | if os.name == 'nt':  # Windows-specific initialization
 16 |   init(convert=True, strip=False, wrap=True)
 17 | else:
 18 |   init()
 19 | 
 20 | # Set up logging
 21 | log_directory = 'logs'
 22 | if not os.path.exists(log_directory):
 23 |   os.makedirs(log_directory)
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | logger.setLevel(logging.INFO)
 27 | log_file = os.path.join(log_directory, 'web_llm.log')
 28 | file_handler = logging.FileHandler(log_file)
 29 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 30 | file_handler.setFormatter(formatter)
 31 | logger.handlers = []
 32 | logger.addHandler(file_handler)
 33 | logger.propagate = False
 34 | 
 35 | # Disable other loggers
 36 | for name in logging.root.manager.loggerDict:
 37 |   if name != __name__:
 38 |       logging.getLogger(name).disabled = True
 39 | 
 40 | class OutputRedirector:
 41 |   def __init__(self, stream=None):
 42 |       self.stream = stream or StringIO()
 43 |       self.original_stdout = sys.stdout
 44 |       self.original_stderr = sys.stderr
 45 | 
 46 |   def __enter__(self):
 47 |       sys.stdout = self.stream
 48 |       sys.stderr = self.stream
 49 |       return self.stream
 50 | 
 51 |   def __exit__(self, exc_type, exc_val, exc_tb):
 52 |       sys.stdout = self.original_stdout
 53 |       sys.stderr = self.original_stderr
 54 | 
 55 | def print_header():
 56 |   print(Fore.CYAN + Style.BRIGHT + """
 57 |   ╔══════════════════════════════════════════════════════════╗
 58 |   ║             🌐 Advanced Research Assistant 🤖             ║
 59 |   ╚══════════════════════════════════════════════════════════╝
 60 |   """ + Style.RESET_ALL)
 61 |   print(Fore.YELLOW + """
 62 |   Welcome to the Advanced Research Assistant!
 63 | 
 64 |   Usage:
 65 |   - Start your research query with '@'
 66 |     Example: "@analyze the impact of AI on healthcare"
 67 | 
 68 |   Press CTRL+Z (Windows) to submit input.
 69 |   """ + Style.RESET_ALL)
 70 | 
 71 | def get_multiline_input() -> str:
 72 |   """Get multiline input using msvcrt for Windows"""
 73 |   print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+Z to submit):{Style.RESET_ALL}")
 74 |   lines = []
 75 |   current_line = []
 76 | 
 77 |   import msvcrt
 78 | 
 79 |   try:
 80 |       while True:
 81 |           if msvcrt.kbhit():
 82 |               char = msvcrt.getch()
 83 | 
 84 |               # CTRL+Z detection (Windows EOF)
 85 |               if char == b'\x1a':  # ASCII code for CTRL+Z
 86 |                   sys.stdout.write('\n')  # New line for clean display
 87 |                   if current_line:
 88 |                       lines.append(''.join(current_line))
 89 |                   result = ''.join(lines).strip() if lines else ''.join(current_line).strip()
 90 |                   return result if result else ''  # Return empty string instead of None
 91 | 
 92 |               # Handle special characters
 93 |               elif char in [b'\r', b'\n']:  # Enter
 94 |                   sys.stdout.write('\n')
 95 |                   if current_line:  # Only append if there's content
 96 |                       lines.append(''.join(current_line))
 97 |                       current_line = []
 98 | 
 99 |               elif char == b'\x08':  # Backspace
100 |                   if current_line:
101 |                       current_line.pop()
102 |                       sys.stdout.write('\b \b')  # Erase character
103 | 
104 |               elif char == b'\x03':  # CTRL+C
105 |                   sys.stdout.write('\n')
106 |                   return 'q'
107 | 
108 |               # Normal character
109 |               elif 32 <= ord(char) <= 126:  # Printable characters
110 |                   current_line.append(char.decode('utf-8'))
111 |                   sys.stdout.write(char.decode('utf-8'))
112 | 
113 |               # Flush output
114 |               sys.stdout.flush()
115 | 
116 |   except Exception as e:
117 |       logger.error(f"Error in multiline input: {str(e)}")
118 |       return 'q'
119 | 
120 | def initialize_system():
121 |   """Initialize system with proper error checking"""
122 |   try:
123 |       print(Fore.YELLOW + "Initializing system..." + Style.RESET_ALL)
124 | 
125 |       llm_config = get_llm_config()
126 |       if llm_config['llm_type'] == 'ollama':
127 |           import requests
128 |           try:
129 |               response = requests.get(llm_config['base_url'], timeout=5)
130 |               if response.status_code != 200:
131 |                   raise ConnectionError("Cannot connect to Ollama server")
132 |           except requests.exceptions.RequestException:
133 |               raise ConnectionError(
134 |                   "\nCannot connect to Ollama server!"
135 |                   "\nPlease ensure:"
136 |                   "\n1. Ollama is installed"
137 |                   "\n2. Ollama server is running (try 'ollama serve')"
138 |                   "\n3. The model specified in llm_config.py is pulled"
139 |               )
140 |       elif llm_config['llm_type'] == 'llama_cpp':
141 |           model_path = llm_config.get('model_path')
142 |           if not model_path or not os.path.exists(model_path):
143 |               raise FileNotFoundError(
144 |                   f"\nLLama.cpp model not found at: {model_path}"
145 |                   "\nPlease ensure model path in llm_config.py is correct"
146 |               )
147 | 
148 |       with OutputRedirector() as output:
149 |           llm_wrapper = LLMWrapper()
150 |           try:
151 |               test_response = llm_wrapper.generate("Test", max_tokens=10)
152 |               if not test_response:
153 |                   raise ConnectionError("LLM failed to generate response")
154 |           except Exception as e:
155 |               raise ConnectionError(f"LLM test failed: {str(e)}")
156 | 
157 |           parser = UltimateLLMResponseParser()
158 |           search_engine = EnhancedSelfImprovingSearch(llm_wrapper, parser)
159 |           research_manager = ResearchManager(llm_wrapper, parser, search_engine)
160 | 
161 |       print(Fore.GREEN + "System initialized successfully." + Style.RESET_ALL)
162 |       return llm_wrapper, parser, search_engine, research_manager
163 |   except Exception as e:
164 |       logger.error(f"Error initializing system: {str(e)}", exc_info=True)
165 |       print(Fore.RED + f"System initialization failed: {str(e)}" + Style.RESET_ALL)
166 |       return None, None, None, None
167 | def handle_search_mode(search_engine, query):
168 |   """Handles web search operations"""
169 |   print(f"{Fore.CYAN}Initiating web search...{Style.RESET_ALL}")
170 |   try:
171 |       # Change search() to search_and_improve() which is the correct method name
172 |       results = search_engine.search_and_improve(query)
173 |       print(f"\n{Fore.GREEN}Search Results:{Style.RESET_ALL}")
174 |       print(results)
175 |   except Exception as e:
176 |       logger.error(f"Search error: {str(e)}")
177 |       print(f"{Fore.RED}Search failed: {str(e)}{Style.RESET_ALL}")
178 | def handle_research_mode(research_manager, query):
179 |   """Handles research mode operations"""
180 |   print(f"{Fore.CYAN}Initiating research mode...{Style.RESET_ALL}")
181 | 
182 |   try:
183 |       # Start the research
184 |       research_manager.start_research(query)
185 |       research_active = True  # Flag to track research state
186 | 
187 |       submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D"
188 |       print(f"\n{Fore.YELLOW}Research Running. Available Commands:{Style.RESET_ALL}")
189 |       print(f"Type command and press {submit_key}:")
190 |       print("'s' = Show status")
191 |       print("'f' = Show focus")
192 |       print("'p' = Pause and assess the research progress")
193 |       print("'q' = Quit research")
194 | 
195 |       # While the research is active, keep checking for commands
196 |       while research_active and research_manager.is_active():
197 |           try:
198 |               print(f"\n{Fore.GREEN}Enter command (s/f/p/q) and press {submit_key} to submit:{Style.RESET_ALL}")
199 |               command = get_multiline_input().strip().lower()
200 | 
201 |               # Handle empty input
202 |               if not command:
203 |                   continue
204 | 
205 |               if command == 's':  # Show status command
206 |                   status = research_manager.get_progress()
207 |                   print("\n" + status)
208 |                   # Don't break or stop research after showing status
209 |                   continue
210 | 
211 |               elif command == 'f':  # Show current focus command
212 |                   if research_manager.current_focus:
213 |                       print(f"\n{Fore.CYAN}Current Focus:{Style.RESET_ALL}")
214 |                       print(f"Area: {research_manager.current_focus.area}")
215 |                       print(f"Priority: {research_manager.current_focus.priority}")
216 |                       print(f"Reasoning: {research_manager.current_focus.reasoning}")
217 |                   else:
218 |                       print(f"\n{Fore.YELLOW}No current focus area{Style.RESET_ALL}")
219 |                   continue
220 | 
221 |               elif command == 'p':  # Pause research progress command
222 |                   research_manager.pause_and_assess()
223 |                   continue
224 | 
225 |               elif command == 'q':  # Quit research
226 |                   print(f"\n{Fore.YELLOW}Research terminated by user.{Style.RESET_ALL}")
227 |                   research_active = False
228 |                   break
229 | 
230 |               else:
231 |                   print(f"{Fore.RED}Unknown command. Please enter a valid command (s/f/p/q).{Style.RESET_ALL}")
232 |                   continue
233 | 
234 |           except KeyboardInterrupt:
235 |               print(f"\n{Fore.YELLOW}Research interrupted by user.{Style.RESET_ALL}")
236 |               research_active = False
237 |               break
238 | 
239 |           except Exception as e:
240 |               logger.error(f"Error processing command: {str(e)}")
241 |               print(f"{Fore.RED}An error occurred: {str(e)}{Style.RESET_ALL}")
242 |               continue
243 | 
244 |       # Only terminate if research is no longer active
245 |       if not research_active:
246 |           print("\nInitiating research termination...")
247 |           summary = research_manager.terminate_research()
248 | 
249 |           try:
250 |               research_manager._cleanup_research_ui()
251 |           except Exception as e:
252 |               logger.error(f"Error during UI cleanup: {str(e)}")
253 | 
254 |           print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}")
255 |           print(summary)
256 | 
257 |           if research_manager.research_complete and research_manager.research_summary:
258 |               time.sleep(0.5)
259 |               research_manager.start_conversation_mode()
260 | 
261 |   except KeyboardInterrupt:
262 |       print(f"\n{Fore.YELLOW}Research interrupted.{Style.RESET_ALL}")
263 |       research_manager.terminate_research()
264 |   except Exception as e:
265 |       logger.error(f"Research error: {str(e)}")
266 |       print(f"\n{Fore.RED}Research error: {str(e)}{Style.RESET_ALL}")
267 |       research_manager.terminate_research()
268 | def main():
269 |   print_header()
270 |   try:
271 |       llm, parser, search_engine, research_manager = initialize_system()
272 |       if not all([llm, parser, search_engine, research_manager]):
273 |           return
274 | 
275 |       while True:
276 |           try:
277 |               # Get input with improved CTRL+Z handling
278 |               user_input = get_multiline_input()
279 | 
280 |               # Handle immediate CTRL+Z (empty input)
281 |               if user_input == "":
282 |                   user_input = "@quit"  # Convert empty CTRL+Z to quit command
283 | 
284 |               user_input = user_input.strip()
285 | 
286 |               # Check for special quit markers
287 |               if user_input in ["@quit", "quit", "q"]:
288 |                   print(Fore.YELLOW + "\nGoodbye!" + Style.RESET_ALL)
289 |                   break
290 | 
291 |               if not user_input:
292 |                   continue
293 | 
294 |               if user_input.lower() == 'help':
295 |                   print_header()
296 |                   continue
297 | 
298 |               if user_input.startswith('/'):
299 |                   search_query = user_input[1:].strip()
300 |                   handle_search_mode(search_engine, search_query)
301 | 
302 |               elif user_input.startswith('@'):
303 |                   research_query = user_input[1:].strip()
304 |                   handle_research_mode(research_manager, research_query)
305 | 
306 |               else:
307 |                   print(f"{Fore.RED}Please start with '/' for search or '@' for research.{Style.RESET_ALL}")
308 | 
309 |           except KeyboardInterrupt:
310 |               print(f"\n{Fore.YELLOW}Exiting program...{Style.RESET_ALL}")
311 |               break
312 | 
313 |           except Exception as e:
314 |               logger.error(f"Error in main loop: {str(e)}")
315 |               print(f"{Fore.RED}An error occurred: {str(e)}{Style.RESET_ALL}")
316 |               continue
317 | 
318 |   except KeyboardInterrupt:
319 |       print(f"\n{Fore.YELLOW}Program terminated by user.{Style.RESET_ALL}")
320 | 
321 |   except Exception as e:
322 |       logger.critical(f"Critical error: {str(e)}")
323 |       print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}")
324 | 
325 |   finally:
326 |       # Ensure proper cleanup on exit
327 |       try:
328 |           if 'research_manager' in locals() and research_manager:
329 |               if hasattr(research_manager, 'ui'):
330 |                   research_manager.ui.cleanup()
331 |       except:
332 |           pass
333 |       os._exit(0)
334 | 
335 | if __name__ == "__main__":
336 |   main()
337 | 


--------------------------------------------------------------------------------
/Self_Improving_Search.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import re
  3 | import os
  4 | from typing import List, Dict, Tuple, Union
  5 | from colorama import Fore, Style
  6 | import logging
  7 | import sys
  8 | from io import StringIO
  9 | from web_scraper import get_web_content, can_fetch
 10 | from llm_config import get_llm_config
 11 | from llm_response_parser import UltimateLLMResponseParser
 12 | from llm_wrapper import LLMWrapper
 13 | from urllib.parse import urlparse
 14 | 
 15 | # Set up logging
 16 | log_directory = 'logs'
 17 | if not os.path.exists(log_directory):
 18 |     os.makedirs(log_directory)
 19 | 
 20 | # Configure logger
 21 | logger = logging.getLogger(__name__)
 22 | logger.setLevel(logging.INFO)
 23 | log_file = os.path.join(log_directory, 'llama_output.log')
 24 | file_handler = logging.FileHandler(log_file)
 25 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 26 | file_handler.setFormatter(formatter)
 27 | logger.handlers = []
 28 | logger.addHandler(file_handler)
 29 | logger.propagate = False
 30 | 
 31 | # Suppress other loggers
 32 | for name in ['root', 'duckduckgo_search', 'requests', 'urllib3']:
 33 |     logging.getLogger(name).setLevel(logging.WARNING)
 34 |     logging.getLogger(name).handlers = []
 35 |     logging.getLogger(name).propagate = False
 36 | 
 37 | class OutputRedirector:
 38 |     def __init__(self, stream=None):
 39 |         self.stream = stream or StringIO()
 40 |         self.original_stdout = sys.stdout
 41 |         self.original_stderr = sys.stderr
 42 | 
 43 |     def __enter__(self):
 44 |         sys.stdout = self.stream
 45 |         sys.stderr = self.stream
 46 |         return self.stream
 47 | 
 48 |     def __exit__(self, exc_type, exc_val, exc_tb):
 49 |         sys.stdout = self.original_stdout
 50 |         sys.stderr = self.original_stderr
 51 | 
 52 | class EnhancedSelfImprovingSearch:
 53 |     def __init__(self, llm: LLMWrapper, parser: UltimateLLMResponseParser, max_attempts: int = 5):
 54 |         self.llm = llm
 55 |         self.parser = parser
 56 |         self.max_attempts = max_attempts
 57 |         self.llm_config = get_llm_config()
 58 | 
 59 |     @staticmethod
 60 |     def initialize_llm():
 61 |         llm_wrapper = LLMWrapper()
 62 |         return llm_wrapper
 63 | 
 64 |     def print_thinking(self):
 65 |         print(Fore.MAGENTA + "🧠 Thinking..." + Style.RESET_ALL)
 66 | 
 67 |     def print_searching(self):
 68 |         print(Fore.MAGENTA + "📝 Searching..." + Style.RESET_ALL)
 69 | 
 70 |     def search_and_improve(self, user_query: str) -> str:
 71 |         attempt = 0
 72 |         while attempt < self.max_attempts:
 73 |             print(f"\n{Fore.CYAN}Search attempt {attempt + 1}:{Style.RESET_ALL}")
 74 |             self.print_searching()
 75 | 
 76 |             try:
 77 |                 formulated_query, time_range = self.formulate_query(user_query, attempt)
 78 | 
 79 |                 print(f"{Fore.YELLOW}Original query: {user_query}{Style.RESET_ALL}")
 80 |                 print(f"{Fore.YELLOW}Formulated query: {formulated_query}{Style.RESET_ALL}")
 81 |                 print(f"{Fore.YELLOW}Time range: {time_range}{Style.RESET_ALL}")
 82 | 
 83 |                 if not formulated_query:
 84 |                     print(f"{Fore.RED}Error: Empty search query. Retrying...{Style.RESET_ALL}")
 85 |                     attempt += 1
 86 |                     continue
 87 | 
 88 |                 search_results = self.perform_search(formulated_query, time_range)
 89 | 
 90 |                 if not search_results:
 91 |                     print(f"{Fore.RED}No results found. Retrying with a different query...{Style.RESET_ALL}")
 92 |                     attempt += 1
 93 |                     continue
 94 | 
 95 |                 self.display_search_results(search_results)
 96 | 
 97 |                 selected_urls = self.select_relevant_pages(search_results, user_query)
 98 | 
 99 |                 if not selected_urls:
100 |                     print(f"{Fore.RED}No relevant URLs found. Retrying...{Style.RESET_ALL}")
101 |                     attempt += 1
102 |                     continue
103 | 
104 |                 print(Fore.MAGENTA + "⚙️ Scraping selected pages..." + Style.RESET_ALL)
105 |                 # Scraping is done without OutputRedirector to ensure messages are visible
106 |                 scraped_content = self.scrape_content(selected_urls)
107 | 
108 |                 if not scraped_content:
109 |                     print(f"{Fore.RED}Failed to scrape content. Retrying...{Style.RESET_ALL}")
110 |                     attempt += 1
111 |                     continue
112 | 
113 |                 self.display_scraped_content(scraped_content)
114 | 
115 |                 self.print_thinking()
116 | 
117 |                 with OutputRedirector() as output:
118 |                     evaluation, decision = self.evaluate_scraped_content(user_query, scraped_content)
119 |                 llm_output = output.getvalue()
120 |                 logger.info(f"LLM Output in evaluate_scraped_content:\n{llm_output}")
121 | 
122 |                 print(f"{Fore.MAGENTA}Evaluation: {evaluation}{Style.RESET_ALL}")
123 |                 print(f"{Fore.MAGENTA}Decision: {decision}{Style.RESET_ALL}")
124 | 
125 |                 if decision == "answer":
126 |                     return self.generate_final_answer(user_query, scraped_content)
127 |                 elif decision == "refine":
128 |                     print(f"{Fore.YELLOW}Refining search...{Style.RESET_ALL}")
129 |                     attempt += 1
130 |                 else:
131 |                     print(f"{Fore.RED}Unexpected decision. Proceeding to answer.{Style.RESET_ALL}")
132 |                     return self.generate_final_answer(user_query, scraped_content)
133 | 
134 |             except Exception as e:
135 |                 print(f"{Fore.RED}An error occurred during search attempt. Check the log file for details.{Style.RESET_ALL}")
136 |                 logger.error(f"An error occurred during search: {str(e)}", exc_info=True)
137 |                 attempt += 1
138 | 
139 |         return self.synthesize_final_answer(user_query)
140 | 
141 |     def evaluate_scraped_content(self, user_query: str, scraped_content: Dict[str, str]) -> Tuple[str, str]:
142 |         user_query_short = user_query[:200]
143 |         prompt = f"""
144 | Evaluate if the following scraped content contains sufficient information to answer the user's question comprehensively:
145 | 
146 | User's question: "{user_query_short}"
147 | 
148 | Scraped Content:
149 | {self.format_scraped_content(scraped_content)}
150 | 
151 | Your task:
152 | 1. Determine if the scraped content provides enough relevant and detailed information to answer the user's question thoroughly.
153 | 2. If the information is sufficient, decide to 'answer'. If more information or clarification is needed, decide to 'refine' the search.
154 | 
155 | Respond using EXACTLY this format:
156 | Evaluation: [Your evaluation of the scraped content]
157 | Decision: [ONLY 'answer' if content is sufficient, or 'refine' if more information is needed]
158 | """
159 |         max_retries = 3
160 |         for attempt in range(max_retries):
161 |             try:
162 |                 response_text = self.llm.generate(prompt, max_tokens=200, stop=None)
163 |                 evaluation, decision = self.parse_evaluation_response(response_text)
164 |                 if decision in ['answer', 'refine']:
165 |                     return evaluation, decision
166 |             except Exception as e:
167 |                 logger.warning(f"Error in evaluate_scraped_content (attempt {attempt + 1}): {str(e)}")
168 | 
169 |         logger.warning("Failed to get a valid decision in evaluate_scraped_content. Defaulting to 'refine'.")
170 |         return "Failed to evaluate content.", "refine"
171 | 
172 |     def parse_evaluation_response(self, response: str) -> Tuple[str, str]:
173 |         evaluation = ""
174 |         decision = ""
175 |         for line in response.strip().split('\n'):
176 |             if line.startswith('Evaluation:'):
177 |                 evaluation = line.split(':', 1)[1].strip()
178 |             elif line.startswith('Decision:'):
179 |                 decision = line.split(':', 1)[1].strip().lower()
180 |         return evaluation, decision
181 | 
182 |     def formulate_query(self, user_query: str, attempt: int) -> Tuple[str, str]:
183 |         user_query_short = user_query[:200]
184 |         prompt = f"""
185 | Based on the following user question, formulate a concise and effective search query:
186 | "{user_query_short}"
187 | Your task:
188 | 1. Create a search query of 2-5 words that will yield relevant results.
189 | 2. Determine if a specific time range is needed for the search.
190 | Time range options:
191 | - 'd': Limit results to the past day. Use for very recent events or rapidly changing information.
192 | - 'w': Limit results to the past week. Use for recent events or topics with frequent updates.
193 | - 'm': Limit results to the past month. Use for relatively recent information or ongoing events.
194 | - 'y': Limit results to the past year. Use for annual events or information that changes yearly.
195 | - 'none': No time limit. Use for historical information or topics not tied to a specific time frame.
196 | Respond in the following format:
197 | Search query: [Your 2-5 word query]
198 | Time range: [d/w/m/y/none]
199 | Do not provide any additional information or explanation.
200 | """
201 |         max_retries = 3
202 |         for retry in range(max_retries):
203 |             with OutputRedirector() as output:
204 |                 response_text = self.llm.generate(prompt, max_tokens=50, stop=None)
205 |             llm_output = output.getvalue()
206 |             logger.info(f"LLM Output in formulate_query:\n{llm_output}")
207 |             query, time_range = self.parse_query_response(response_text)
208 |             if query and time_range:
209 |                 return query, time_range
210 |         return self.fallback_query(user_query), "none"
211 | 
212 |     def parse_query_response(self, response: str) -> Tuple[str, str]:
213 |         query = ""
214 |         time_range = "none"
215 |         for line in response.strip().split('\n'):
216 |             if ":" in line:
217 |                 key, value = line.split(":", 1)
218 |                 key = key.strip().lower()
219 |                 value = value.strip()
220 |                 if "query" in key:
221 |                     query = self.clean_query(value)
222 |                 elif "time" in key or "range" in key:
223 |                     time_range = self.validate_time_range(value)
224 |         return query, time_range
225 | 
226 |     def clean_query(self, query: str) -> str:
227 |         query = re.sub(r'["\'\[\]]', '', query)
228 |         query = re.sub(r'\s+', ' ', query)
229 |         return query.strip()[:100]
230 | 
231 |     def validate_time_range(self, time_range: str) -> str:
232 |         valid_ranges = ['d', 'w', 'm', 'y', 'none']
233 |         time_range = time_range.lower()
234 |         return time_range if time_range in valid_ranges else 'none'
235 | 
236 |     def fallback_query(self, user_query: str) -> str:
237 |         words = user_query.split()
238 |         return " ".join(words[:5])
239 | 
240 |     def perform_search(self, query: str, time_range: str) -> List[Dict]:
241 |         if not query:
242 |             return []
243 | 
244 |         from duckduckgo_search import DDGS
245 | 
246 |         with DDGS() as ddgs:
247 |             try:
248 |                 with OutputRedirector() as output:
249 |                     if time_range and time_range != 'none':
250 |                         results = list(ddgs.text(query, timelimit=time_range, max_results=10))
251 |                     else:
252 |                         results = list(ddgs.text(query, max_results=10))
253 |                 ddg_output = output.getvalue()
254 |                 logger.info(f"DDG Output in perform_search:\n{ddg_output}")
255 |                 return [{'number': i+1, **result} for i, result in enumerate(results)]
256 |             except Exception as e:
257 |                 print(f"{Fore.RED}Search error: {str(e)}{Style.RESET_ALL}")
258 |                 return []
259 | 
260 |     def display_search_results(self, results: List[Dict]) -> None:
261 |         """Display search results with minimal output"""
262 |         try:
263 |             if not results:
264 |                 return
265 | 
266 |             # Only show search success status
267 |             print(f"\nSearch query sent to DuckDuckGo: {self.last_query}")
268 |             print(f"Time range sent to DuckDuckGo: {self.last_time_range}")
269 |             print(f"Number of results: {len(results)}")
270 | 
271 |         except Exception as e:
272 |             logger.error(f"Error displaying search results: {str(e)}")
273 | 
274 |     def select_relevant_pages(self, search_results: List[Dict], user_query: str) -> List[str]:
275 |         prompt = f"""
276 | Given the following search results for the user's question: "{user_query}"
277 | Select the 2 most relevant results to scrape and analyze. Explain your reasoning for each selection.
278 | 
279 | Search Results:
280 | {self.format_results(search_results)}
281 | 
282 | Instructions:
283 | 1. You MUST select exactly 2 result numbers from the search results.
284 | 2. Choose the results that are most likely to contain comprehensive and relevant information to answer the user's question.
285 | 3. Provide a brief reason for each selection.
286 | 
287 | You MUST respond using EXACTLY this format and nothing else:
288 | 
289 | Selected Results: [Two numbers corresponding to the selected results]
290 | Reasoning: [Your reasoning for the selections]
291 | """
292 | 
293 |         max_retries = 3
294 |         for retry in range(max_retries):
295 |             with OutputRedirector() as output:
296 |                 response_text = self.llm.generate(prompt, max_tokens=200, stop=None)
297 |             llm_output = output.getvalue()
298 |             logger.info(f"LLM Output in select_relevant_pages:\n{llm_output}")
299 | 
300 |             parsed_response = self.parse_page_selection_response(response_text)
301 |             if parsed_response and self.validate_page_selection_response(parsed_response, len(search_results)):
302 |                 selected_urls = [result['href'] for result in search_results if result['number'] in parsed_response['selected_results']]
303 | 
304 |                 allowed_urls = [url for url in selected_urls if can_fetch(url)]
305 |                 if allowed_urls:
306 |                     return allowed_urls
307 |                 else:
308 |                     print(f"{Fore.YELLOW}Warning: All selected URLs are disallowed by robots.txt. Retrying selection.{Style.RESET_ALL}")
309 |             else:
310 |                 print(f"{Fore.YELLOW}Warning: Invalid page selection. Retrying.{Style.RESET_ALL}")
311 | 
312 |         print(f"{Fore.YELLOW}Warning: All attempts to select relevant pages failed. Falling back to top allowed results.{Style.RESET_ALL}")
313 |         allowed_urls = [result['href'] for result in search_results if can_fetch(result['href'])][:2]
314 |         return allowed_urls
315 | 
316 |     def parse_page_selection_response(self, response: str) -> Dict[str, Union[List[int], str]]:
317 |         lines = response.strip().split('\n')
318 |         parsed = {}
319 |         for line in lines:
320 |             if line.startswith('Selected Results:'):
321 |                 parsed['selected_results'] = [int(num.strip()) for num in re.findall(r'\d+', line)]
322 |             elif line.startswith('Reasoning:'):
323 |                 parsed['reasoning'] = line.split(':', 1)[1].strip()
324 |         return parsed if 'selected_results' in parsed and 'reasoning' in parsed else None
325 | 
326 |     def validate_page_selection_response(self, parsed_response: Dict[str, Union[List[int], str]], num_results: int) -> bool:
327 |         if len(parsed_response['selected_results']) != 2:
328 |             return False
329 |         if any(num < 1 or num > num_results for num in parsed_response['selected_results']):
330 |             return False
331 |         return True
332 | 
333 |     def format_results(self, results: List[Dict]) -> str:
334 |         formatted_results = []
335 |         for result in results:
336 |             formatted_result = f"{result['number']}. Title: {result.get('title', 'N/A')}\n"
337 |             formatted_result += f"   Snippet: {result.get('body', 'N/A')[:200]}...\n"
338 |             formatted_result += f"   URL: {result.get('href', 'N/A')}\n"
339 |             formatted_results.append(formatted_result)
340 |         return "\n".join(formatted_results)
341 | 
342 |     def scrape_content(self, urls: List[str]) -> Dict[str, str]:
343 |         scraped_content = {}
344 |         blocked_urls = []
345 |         for url in urls:
346 |             robots_allowed = can_fetch(url)
347 |             if robots_allowed:
348 |                 content = get_web_content([url])
349 |                 if content:
350 |                     scraped_content.update(content)
351 |                     print(Fore.YELLOW + f"Successfully scraped: {url}" + Style.RESET_ALL)
352 |                     logger.info(f"Successfully scraped: {url}")
353 |                 else:
354 |                     print(Fore.RED + f"Robots.txt disallows scraping of {url}" + Style.RESET_ALL)
355 |                     logger.warning(f"Robots.txt disallows scraping of {url}")
356 |             else:
357 |                 blocked_urls.append(url)
358 |                 print(Fore.RED + f"Warning: Robots.txt disallows scraping of {url}" + Style.RESET_ALL)
359 |                 logger.warning(f"Robots.txt disallows scraping of {url}")
360 | 
361 |         print(Fore.CYAN + f"Scraped content received for {len(scraped_content)} URLs" + Style.RESET_ALL)
362 |         logger.info(f"Scraped content received for {len(scraped_content)} URLs")
363 | 
364 |         if blocked_urls:
365 |             print(Fore.RED + f"Warning: {len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions." + Style.RESET_ALL)
366 |             logger.warning(f"{len(blocked_urls)} URL(s) were not scraped due to robots.txt restrictions: {', '.join(blocked_urls)}")
367 | 
368 |         return scraped_content
369 | 
370 |     def display_scraped_content(self, scraped_content: Dict[str, str]):
371 |         print(f"\n{Fore.CYAN}Scraped Content:{Style.RESET_ALL}")
372 |         for url, content in scraped_content.items():
373 |             print(f"{Fore.GREEN}URL: {url}{Style.RESET_ALL}")
374 |             print(f"Content: {content[:4000]}...\n")
375 | 
376 |     def generate_final_answer(self, user_query: str, scraped_content: Dict[str, str]) -> str:
377 |         user_query_short = user_query[:200]
378 |         prompt = f"""
379 | You are an AI assistant. Provide a comprehensive and detailed answer to the following question using ONLY the information provided in the scraped content. Do not include any references or mention any sources. Answer directly and thoroughly.
380 | 
381 | Question: "{user_query_short}"
382 | 
383 | Scraped Content:
384 | {self.format_scraped_content(scraped_content)}
385 | 
386 | Important Instructions:
387 | 1. Do not use phrases like "Based on the absence of selected results" or similar.
388 | 2. If the scraped content does not contain enough information to answer the question, say so explicitly and explain what information is missing.
389 | 3. Provide as much relevant detail as possible from the scraped content.
390 | 
391 | Answer:
392 | """
393 |         max_retries = 3
394 |         for attempt in range(max_retries):
395 |             with OutputRedirector() as output:
396 |                 response_text = self.llm.generate(prompt, max_tokens=1024, stop=None)
397 |             llm_output = output.getvalue()
398 |             logger.info(f"LLM Output in generate_final_answer:\n{llm_output}")
399 |             if response_text:
400 |                 logger.info(f"LLM Response:\n{response_text}")
401 |                 return response_text
402 | 
403 |         error_message = "I apologize, but I couldn't generate a satisfactory answer based on the available information."
404 |         logger.warning(f"Failed to generate a response after {max_retries} attempts. Returning error message.")
405 |         return error_message
406 | 
407 |     def format_scraped_content(self, scraped_content: Dict[str, str]) -> str:
408 |         formatted_content = []
409 |         for url, content in scraped_content.items():
410 |             content = re.sub(r'\s+', ' ', content)
411 |             formatted_content.append(f"Content from {url}:\n{content}\n")
412 |         return "\n".join(formatted_content)
413 | 
414 |     def synthesize_final_answer(self, user_query: str) -> str:
415 |         prompt = f"""
416 | After multiple search attempts, we couldn't find a fully satisfactory answer to the user's question: "{user_query}"
417 | 
418 | Please provide the best possible answer you can, acknowledging any limitations or uncertainties.
419 | If appropriate, suggest ways the user might refine their question or where they might find more information.
420 | 
421 | Respond in a clear, concise, and informative manner.
422 | """
423 |         try:
424 |             with OutputRedirector() as output:
425 |                 response_text = self.llm.generate(prompt, max_tokens=self.llm_config.get('max_tokens', 1024), stop=self.llm_config.get('stop', None))
426 |             llm_output = output.getvalue()
427 |             logger.info(f"LLM Output in synthesize_final_answer:\n{llm_output}")
428 |             if response_text:
429 |                 return response_text.strip()
430 |         except Exception as e:
431 |             logger.error(f"Error in synthesize_final_answer: {str(e)}", exc_info=True)
432 |         return "I apologize, but after multiple attempts, I wasn't able to find a satisfactory answer to your question. Please try rephrasing your question or breaking it down into smaller, more specific queries."
433 | 
434 | # End of EnhancedSelfImprovingSearch class
435 | 


--------------------------------------------------------------------------------
/research_manager.py:
--------------------------------------------------------------------------------
   1 | import msvcrt
   2 | import os
   3 | import sys
   4 | import threading
   5 | import time
   6 | import re
   7 | import json
   8 | import logging
   9 | import curses
  10 | import signal
  11 | from typing import List, Dict, Set, Optional, Tuple, Union
  12 | from dataclasses import dataclass
  13 | from queue import Queue
  14 | from datetime import datetime
  15 | from io import StringIO
  16 | from colorama import init, Fore, Style
  17 | import select
  18 | from threading import Event
  19 | from urllib.parse import urlparse
  20 | from pathlib import Path
  21 | 
  22 | # Initialize colorama for cross-platform color support
  23 | if os.name == 'nt':  # Windows-specific initialization
  24 |   init(convert=True, strip=False, wrap=True)
  25 | else:
  26 |   init()
  27 | 
  28 | # Set up logging
  29 | log_directory = 'logs'
  30 | if not os.path.exists(log_directory):
  31 |     os.makedirs(log_directory)
  32 | 
  33 | logger = logging.getLogger(__name__)
  34 | logger.setLevel(logging.INFO)
  35 | log_file = os.path.join(log_directory, 'research_llm.log')
  36 | file_handler = logging.FileHandler(log_file)
  37 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
  38 | file_handler.setFormatter(formatter)
  39 | logger.handlers = []
  40 | logger.addHandler(file_handler)
  41 | logger.propagate = False
  42 | 
  43 | # Suppress other loggers
  44 | for name in logging.root.manager.loggerDict:
  45 |     if name != __name__:
  46 |         logging.getLogger(name).disabled = True
  47 | 
  48 | @dataclass
  49 | class ResearchFocus:
  50 |     """Represents a specific area of research focus"""
  51 |     area: str
  52 |     priority: int
  53 |     source_query: str = ""
  54 |     timestamp: str = ""
  55 |     search_queries: List[str] = None
  56 |     reasoning: Optional[str] = None 
  57 | 
  58 |     def __post_init__(self):
  59 |         if not self.timestamp:
  60 |             self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  61 |         if self.search_queries is None:
  62 |             self.search_queries = []
  63 | 
  64 | @dataclass
  65 | class AnalysisResult:
  66 |     """Contains the complete analysis result"""
  67 |     original_question: str
  68 |     focus_areas: List[ResearchFocus]
  69 |     raw_response: str
  70 |     timestamp: str = ""
  71 | 
  72 |     def __post_init__(self):
  73 |         if not self.timestamp:
  74 |             self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  75 | 
  76 | class StrategicAnalysisParser:
  77 |     def __init__(self, llm=None):
  78 |         self.llm = llm
  79 |         self.logger = logging.getLogger(__name__)
  80 |         # Simplify patterns to match exactly what we expect
  81 |         self.patterns = {
  82 |             'priority': [
  83 |                 r"Priority:\s*(\d+)",  # Match exactly what's in our prompt
  84 |             ]
  85 |         }
  86 | 
  87 |     def strategic_analysis(self, original_query: str) -> Optional[AnalysisResult]:
  88 |         """Generate and process research areas with retries until success"""
  89 |         max_retries = 3
  90 |         try:
  91 |             self.logger.info("Starting strategic analysis...")
  92 |             prompt = f"""
  93 | You must select exactly 5 areas to investigate in order to explore and gather information to answer the research question:
  94 | "{original_query}"
  95 | 
  96 | You MUST provide exactly 5 areas numbered 1-5. Each must have a priority, YOU MUST ensure that you only assign one priority per area.
  97 | Assign priority based on the likelihood of a focus area being investigated to provide information that directly will allow you to respond to "{original_query}" with 5 being most likely and 1 being least.
  98 | Follow this EXACT format without any deviations or additional text:
  99 | 
 100 | 1. [First research topic]
 101 | Priority: [number 1-5]
 102 | 
 103 | 2. [Second research topic]
 104 | Priority: [number 1-5]
 105 | 
 106 | 3. [Third research topic]
 107 | Priority: [number 1-5]
 108 | 
 109 | 4. [Fourth research topic]
 110 | Priority: [number 1-5]
 111 | 
 112 | 5. [Fifth research topic]
 113 | Priority: [number 1-5]
 114 | """
 115 |             for attempt in range(max_retries):
 116 |                 response = self.llm.generate(prompt, max_tokens=1000)
 117 |                 focus_areas = self._extract_research_areas(response)
 118 | 
 119 |                 if focus_areas:  # If we got any valid areas
 120 |                     # Sort by priority (highest first)
 121 |                     focus_areas.sort(key=lambda x: x.priority, reverse=True)
 122 | 
 123 |                     return AnalysisResult(
 124 |                         original_question=original_query,
 125 |                         focus_areas=focus_areas,
 126 |                         raw_response=response,
 127 |                         timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 128 |                     )
 129 |                 else:
 130 |                     self.logger.warning(f"Attempt {attempt + 1}: No valid areas generated, retrying...")
 131 |                     print(f"\nRetrying research area generation (Attempt {attempt + 1}/{max_retries})...")
 132 | 
 133 |             # If all retries failed, try one final time with a stronger prompt
 134 |             prompt += "\n\nIMPORTANT: You MUST provide exactly 5 research areas with priorities. This is crucial."
 135 |             response = self.llm.generate(prompt, max_tokens=1000)
 136 |             focus_areas = self._extract_research_areas(response)
 137 | 
 138 |             if focus_areas:
 139 |                 focus_areas.sort(key=lambda x: x.priority, reverse=True)
 140 |                 return AnalysisResult(
 141 |                     original_question=original_query,
 142 |                     focus_areas=focus_areas,
 143 |                     raw_response=response,
 144 |                     timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 145 |                 )
 146 | 
 147 |             self.logger.error("Failed to generate any valid research areas after all attempts")
 148 |             return None
 149 | 
 150 |         except Exception as e:
 151 |             self.logger.error(f"Error in strategic analysis: {str(e)}")
 152 |             return None
 153 | 
 154 |     def _extract_research_areas(self, text: str) -> List[ResearchFocus]:
 155 |         """Extract research areas with enhanced parsing to handle priorities in various formats."""
 156 |         areas = []
 157 |         lines = text.strip().split('\n')
 158 | 
 159 |         current_area = None
 160 |         current_priority = None
 161 | 
 162 |         for i in range(len(lines)):
 163 |             line = lines[i].strip()
 164 |             if not line:
 165 |                 continue
 166 | 
 167 |             # Check for numbered items (e.g., '1. Area Name')
 168 |             number_match = re.match(r'^(\d+)\.\s*(.*)', line)
 169 |             if number_match:
 170 |                 # If we have a previous area, add it to our list
 171 |                 if current_area is not None:
 172 |                     areas.append(ResearchFocus(
 173 |                         area=current_area.strip(' -:'),
 174 |                         priority=current_priority or 3,
 175 |                     ))
 176 |                 # Start a new area
 177 |                 area_line = number_match.group(2)
 178 | 
 179 |                 # Search for 'priority' followed by a number, anywhere in the area_line
 180 |                 priority_inline_match = re.search(
 181 |                     r'(?i)\bpriority\b\s*(?:[:=]?\s*)?(\d+)', area_line)
 182 |                 if priority_inline_match:
 183 |                     # Extract and set the priority
 184 |                     try:
 185 |                         current_priority = int(priority_inline_match.group(1))
 186 |                         current_priority = max(1, min(5, current_priority))
 187 |                     except ValueError:
 188 |                         current_priority = 3  # Default priority if parsing fails
 189 |                     # Remove the 'priority' portion from area_line
 190 |                     area_line = area_line[:priority_inline_match.start()] + area_line[priority_inline_match.end():]
 191 |                     area_line = area_line.strip(' -:')
 192 |                 else:
 193 |                     current_priority = None  # Priority might be on the next line
 194 | 
 195 |                 current_area = area_line.strip()
 196 | 
 197 |             elif re.match(r'(?i)^priority\s*(?:[:=]?\s*)?(\d+)', line):
 198 |                 # Extract priority from the line following the area
 199 |                 try:
 200 |                     priority_match = re.match(r'(?i)^priority\s*(?:[:=]?\s*)?(\d+)', line)
 201 |                     current_priority = int(priority_match.group(1))
 202 |                     current_priority = max(1, min(5, current_priority))
 203 |                 except (ValueError, IndexError):
 204 |                     current_priority = 3  # Default priority if parsing fails
 205 | 
 206 |             # Check if this is the last line or the next line is a new area
 207 |             next_line_is_new_area = (i + 1 < len(lines)) and re.match(r'^\d+\.', lines[i + 1].strip())
 208 |             if next_line_is_new_area or i + 1 == len(lines):
 209 |                 if current_area is not None:
 210 |                     # Append the current area and priority to the list
 211 |                     areas.append(ResearchFocus(
 212 |                         area=current_area.strip(' -:'),
 213 |                         priority=current_priority or 3,
 214 |                     ))
 215 |                     current_area = None
 216 |                     current_priority = None
 217 | 
 218 |         return areas
 219 | 
 220 |     def _clean_text(self, text: str) -> str:
 221 |         """Clean and normalize text"""
 222 |         text = re.sub(r'\s+', ' ', text)
 223 |         text = re.sub(r'(\d+\))', r'\1.', text)
 224 |         text = re.sub(r'(?i)priority:', 'P:', text)
 225 |         return text.strip()
 226 | 
 227 |     def _add_area(self, areas: List[ResearchFocus], area: str, priority: Optional[int]):
 228 |         """Add area with basic validation"""
 229 |         if not area or len(area.split()) < 3:  # Basic validation
 230 |             return
 231 | 
 232 |         areas.append(ResearchFocus(
 233 |             area=area,
 234 |             priority=priority or 3,
 235 |             timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
 236 |             search_queries=[]
 237 |         ))
 238 | 
 239 |     def _normalize_focus_areas(self, areas: List[ResearchFocus]) -> List[ResearchFocus]:
 240 |         """Normalize and prepare final list of areas"""
 241 |         if not areas:
 242 |             return []
 243 | 
 244 |         # Sort by priority
 245 |         areas.sort(key=lambda x: x.priority, reverse=True)
 246 | 
 247 |         # Ensure priorities are properly spread
 248 |         for i, area in enumerate(areas):
 249 |             area.priority = max(1, min(5, area.priority))
 250 | 
 251 |         return areas[:5]
 252 | 
 253 |     def format_analysis_result(self, result: AnalysisResult) -> str:
 254 |         """Format the results for display"""
 255 |         if not result:
 256 |             return "No valid analysis result generated."
 257 | 
 258 |         formatted = [
 259 |             f"\nResearch Areas for: {result.original_question}\n"
 260 |         ]
 261 | 
 262 |         for i, focus in enumerate(result.focus_areas, 1):
 263 |             formatted.extend([
 264 |                 f"\n{i}. {focus.area}",
 265 |                 f"   Priority: {focus.priority}"
 266 |             ])
 267 | 
 268 |         return "\n".join(formatted)
 269 | 
 270 | class OutputRedirector:
 271 |     """Redirects stdout and stderr to a string buffer"""
 272 |     def __init__(self, stream=None):
 273 |         self.stream = stream or StringIO()
 274 |         self.original_stdout = sys.stdout
 275 |         self.original_stderr = sys.stderr
 276 | 
 277 |     def __enter__(self):
 278 |         sys.stdout = self.stream
 279 |         sys.stderr = self.stream
 280 |         return self.stream
 281 | 
 282 |     def __exit__(self, exc_type, exc_val, exc_tb):
 283 |         sys.stdout = self.original_stdout
 284 |         sys.stderr = self.original_stderr
 285 | 
 286 | class TerminalUI:
 287 |     """Manages terminal display with fixed input area at bottom"""
 288 |     def __init__(self):
 289 |         self.stdscr = None
 290 |         self.input_win = None
 291 |         self.output_win = None
 292 |         self.status_win = None
 293 |         self.max_y = 0
 294 |         self.max_x = 0
 295 |         self.input_buffer = ""
 296 |         self.is_setup = False
 297 |         self.old_terminal_settings = None
 298 |         self.should_terminate = Event()
 299 |         self.shutdown_event = Event()
 300 |         self.research_thread = None
 301 |         self.last_display_height = 0  # Track display height for corruption fix
 302 | 
 303 |     def setup(self):
 304 |         """Initialize the terminal UI"""
 305 |         if self.is_setup:
 306 |             return
 307 | 
 308 |         # Save terminal settings
 309 |         if os.name != 'nt':  # Unix-like systems
 310 |             self.old_terminal_settings = termios.tcgetattr(sys.stdin.fileno())
 311 | 
 312 |         self.stdscr = curses.initscr()
 313 |         curses.start_color()
 314 |         curses.noecho()
 315 |         curses.cbreak()
 316 |         self.stdscr.keypad(True)
 317 | 
 318 |         # Get terminal dimensions
 319 |         self.max_y, self.max_x = self.stdscr.getmaxyx()
 320 | 
 321 |         # Create windows
 322 |         self.output_win = curses.newwin(self.max_y - 4, self.max_x, 0, 0)
 323 |         self.status_win = curses.newwin(1, self.max_x, self.max_y - 4, 0)
 324 |         self.input_win = curses.newwin(3, self.max_x, self.max_y - 3, 0)
 325 | 
 326 |         # Setup colors
 327 |         curses.init_pair(1, curses.COLOR_GREEN, curses.COLOR_BLACK)
 328 |         curses.init_pair(2, curses.COLOR_CYAN, curses.COLOR_BLACK)
 329 |         curses.init_pair(3, curses.COLOR_YELLOW, curses.COLOR_BLACK)
 330 | 
 331 |         # Enable scrolling
 332 |         self.output_win.scrollok(True)
 333 |         self.output_win.idlok(True)
 334 |         self.input_win.scrollok(True)
 335 | 
 336 |         self.is_setup = True
 337 |         self._refresh_input_prompt()
 338 | 
 339 |     def cleanup(self):
 340 |         """Public cleanup method with enhanced terminal restoration"""
 341 |         if not self.is_setup:
 342 |             return
 343 |         try:
 344 |             # Ensure all windows are properly closed
 345 |             for win in [self.input_win, self.output_win, self.status_win]:
 346 |                 if win:
 347 |                     win.clear()
 348 |                     win.refresh()
 349 | 
 350 |             # Restore terminal state
 351 |             if self.stdscr:
 352 |                 self.stdscr.keypad(False)
 353 |                 curses.nocbreak()
 354 |                 curses.echo()
 355 |                 curses.endwin()
 356 | 
 357 |             # Restore original terminal settings
 358 |             if self.old_terminal_settings and os.name != 'nt':
 359 |                 import termios
 360 |                 termios.tcsetattr(
 361 |                     sys.stdin.fileno(),
 362 |                     termios.TCSADRAIN,
 363 |                     self.old_terminal_settings
 364 |                 )
 365 |         except Exception as e:
 366 |             logger.error(f"Error during terminal cleanup: {str(e)}")
 367 |         finally:
 368 |             self.is_setup = False
 369 |             self.stdscr = None
 370 |             self.input_win = None
 371 |             self.output_win = None
 372 |             self.status_win = None
 373 | 
 374 |     def _cleanup(self):
 375 |         """Enhanced resource cleanup with better process handling"""
 376 |         self.should_terminate.set()
 377 | 
 378 |         # Handle research thread with improved termination
 379 |         if self.research_thread and self.research_thread.is_alive():
 380 |             try:
 381 |                 self.research_thread.join(timeout=1.0)
 382 |                 if self.research_thread.is_alive():
 383 |                     import ctypes
 384 |                     ctypes.pythonapi.PyThreadState_SetAsyncExc(
 385 |                         ctypes.c_long(self.research_thread.ident),
 386 |                         ctypes.py_object(SystemExit))
 387 |                     time.sleep(0.1)  # Give thread time to exit
 388 |                     if self.research_thread.is_alive():  # Double-check
 389 |                         ctypes.pythonapi.PyThreadState_SetAsyncExc(
 390 |                             ctypes.c_long(self.research_thread.ident),
 391 |                             0)  # Reset exception
 392 |             except Exception as e:
 393 |                 logger.error(f"Error terminating research thread: {str(e)}")
 394 | 
 395 |         # Clean up LLM with improved error handling
 396 |         if hasattr(self, 'llm') and hasattr(self.llm, '_cleanup'):
 397 |             try:
 398 |                 self.llm.cleanup()
 399 |             except Exception as e:
 400 |                 logger.error(f"Error cleaning up LLM: {str(e)}")
 401 | 
 402 |         # Ensure terminal is restored
 403 |         try:
 404 |             curses.endwin()
 405 |         except:
 406 |             pass
 407 | 
 408 |         # Final cleanup of UI
 409 |         self.cleanup()
 410 | 
 411 |     def _refresh_input_prompt(self, prompt="Enter command: "):
 412 |         """Refresh the fixed input prompt at bottom with display fix"""
 413 |         if not self.is_setup:
 414 |             return
 415 | 
 416 |         try:
 417 |             # Clear the entire input window first
 418 |             self.input_win.clear()
 419 | 
 420 |             # Calculate proper cursor position
 421 |             cursor_y = 0
 422 |             cursor_x = len(prompt) + len(self.input_buffer)
 423 | 
 424 |             # Add the prompt and buffer
 425 |             self.input_win.addstr(0, 0, f"{prompt}{self.input_buffer}", curses.color_pair(1))
 426 | 
 427 |             # Position cursor correctly
 428 |             try:
 429 |                 self.input_win.move(cursor_y, cursor_x)
 430 |             except curses.error:
 431 |                 pass  # Ignore if cursor would be off-screen
 432 | 
 433 |             self.input_win.refresh()
 434 |         except curses.error:
 435 |             pass
 436 | 
 437 |     def update_output(self, text: str):
 438 |         """Update output window with display corruption fix"""
 439 |         if not self.is_setup:
 440 |             return
 441 | 
 442 |         try:
 443 |             # Clean ANSI escape codes
 444 |             clean_text = re.sub(r'\x1b\[[0-9;]*[mK]', '', text)
 445 | 
 446 |             # Store current position
 447 |             current_y, _ = self.output_win.getyx()
 448 | 
 449 |             # Clear any potential corruption
 450 |             if current_y > self.last_display_height:
 451 |                 self.output_win.clear()
 452 | 
 453 |             self.output_win.addstr(clean_text + "\n", curses.color_pair(2))
 454 |             new_y, _ = self.output_win.getyx()
 455 |             self.last_display_height = new_y
 456 | 
 457 |             self.output_win.refresh()
 458 |             self._refresh_input_prompt()
 459 |         except curses.error:
 460 |             pass
 461 | 
 462 |     def update_status(self, text: str):
 463 |         """Update the status line above input area"""
 464 |         if not self.is_setup:
 465 |             return
 466 | 
 467 |         try:
 468 |             self.status_win.clear()
 469 |             self.status_win.addstr(0, 0, text, curses.color_pair(3))
 470 |             self.status_win.refresh()
 471 |             self._refresh_input_prompt()  # Ensure prompt is refreshed after status update
 472 |         except curses.error:
 473 |             pass
 474 | 
 475 |     def get_input(self, prompt: Optional[str] = None) -> Optional[str]:
 476 |         """Windows-compatible input handling"""
 477 |         try:
 478 |             if prompt:
 479 |                 self.update_status(prompt)
 480 |             if not self.is_setup:
 481 |                 self.setup()
 482 |             self.input_buffer = ""
 483 |             self._refresh_input_prompt()
 484 | 
 485 |             while True:
 486 |                 if self.should_terminate.is_set():
 487 |                     return None
 488 | 
 489 |                 if msvcrt.kbhit():
 490 |                     ch = msvcrt.getch()
 491 |                     
 492 |                     if ch == b'\x04':  # Ctrl+D
 493 |                         result = self.input_buffer.strip()
 494 |                         self.input_buffer = ""
 495 |                         if not result:
 496 |                             self.cleanup()
 497 |                             return "@quit"
 498 |                         return result
 499 | 
 500 |                     elif ch == b'\x03':  # Ctrl+C
 501 |                         self.should_terminate.set()
 502 |                         self.cleanup()
 503 |                         return "@quit"
 504 | 
 505 |                     elif ch == b'\r':  # Enter
 506 |                         result = self.input_buffer.strip()
 507 |                         if result:
 508 |                             self.input_buffer = ""
 509 |                             return result
 510 |                         continue
 511 | 
 512 |                     elif ch == b'\x08':  # Backspace
 513 |                         if self.input_buffer:
 514 |                             self.input_buffer = self.input_buffer[:-1]
 515 |                             self._refresh_input_prompt()
 516 | 
 517 |                     elif 32 <= ord(ch[0]) <= 126:  # Printable characters
 518 |                         self.input_buffer += ch.decode('utf-8')
 519 |                         self._refresh_input_prompt()
 520 | 
 521 |         except Exception as e:
 522 |             logger.error(f"Error in get_input: {str(e)}")
 523 |             self.should_terminate.set()
 524 |             self.cleanup()
 525 |         return "@quit"
 526 | 
 527 |     def force_exit(self):
 528 |         """Force immediate exit with enhanced cleanup"""
 529 |         try:
 530 |             self.should_terminate.set()
 531 |             self.shutdown_event.set()
 532 |             self._cleanup()  # Call private cleanup first
 533 |             self.cleanup()   # Then public cleanup
 534 |             curses.endwin()  # Final attempt to restore terminal
 535 |         except:
 536 |             pass
 537 |         finally:
 538 |             os._exit(0)  # Ensure exit
 539 | 
 540 | class NonBlockingInput:
 541 |     """Handles non-blocking keyboard input for Windows systems."""
 542 |     def __init__(self):
 543 |         """Initialize NonBlockingInput (no special setup required on Windows)."""
 544 |         if os.name != 'nt':
 545 |             raise EnvironmentError("NonBlockingInput is designed for Windows only.")
 546 | 
 547 |     def __enter__(self):
 548 |         """Enter the context (no-op for Windows)."""
 549 |         return self
 550 | 
 551 |     def __exit__(self, type, value, traceback):
 552 |         """Exit the context (no-op for Windows)."""
 553 |         pass
 554 | 
 555 |     def check_input(self, timeout=0.1):
 556 |         """
 557 |         Check for keyboard input without blocking.
 558 |         
 559 |         Args:
 560 |             timeout (float): Time in seconds to wait before returning if no input.
 561 |         
 562 |         Returns:
 563 |             str or None: The input character as a string, or None if no input is detected.
 564 |         """
 565 |         start_time = time.time()
 566 |         while True:
 567 |             if msvcrt.kbhit():
 568 |                 try:
 569 |                     return msvcrt.getch().decode('utf-8')
 570 |                 except UnicodeDecodeError:
 571 |                     # Handle non-ASCII characters gracefully
 572 |                     return None
 573 |             
 574 |             if time.time() - start_time > timeout:
 575 |                 return None
 576 | 
 577 | class ResearchManager:
 578 |     """Manages the research process including analysis, search, and documentation"""
 579 |     def __init__(self, llm_wrapper, parser, search_engine, max_searches_per_cycle: int = 5):
 580 |         self.llm = llm_wrapper
 581 |         self.parser = parser
 582 |         self.search_engine = search_engine
 583 |         self.max_searches = max_searches_per_cycle
 584 |         self.should_terminate = threading.Event()
 585 |         self.shutdown_event = Event()
 586 |         self.research_started = threading.Event()
 587 |         self.research_thread = None
 588 |         self.thinking = False
 589 |         self.stop_words = {
 590 |             'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i',
 591 |             'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at'
 592 |         }
 593 | 
 594 |         # State tracking
 595 |         self.searched_urls: Set[str] = set()
 596 |         self.current_focus: Optional[ResearchFocus] = None
 597 |         self.original_query: str = ""
 598 |         self.focus_areas: List[ResearchFocus] = []
 599 |         self.is_running = False
 600 | 
 601 |         # New conversation mode attributes
 602 |         self.research_complete = False
 603 |         self.research_summary = ""
 604 |         self.conversation_active = False
 605 |         self.research_content = ""
 606 | 
 607 |         # Initialize document paths
 608 |         self.document_path = None
 609 |         self.session_files = []
 610 | 
 611 |         # Initialize UI and parser
 612 |         self.ui = TerminalUI()
 613 |         self.strategic_parser = StrategicAnalysisParser(llm=self.llm)
 614 | 
 615 |         # Initialize new flags for pausing and assessment
 616 |         self.research_paused = False
 617 |         self.awaiting_user_decision = False
 618 | 
 619 |         # Setup signal handlers
 620 |         signal.signal(signal.SIGINT, self._signal_handler)
 621 |         signal.signal(signal.SIGTERM, self._signal_handler)
 622 | 
 623 |     def _signal_handler(self, signum, frame):
 624 |         """Handle interrupt signals"""
 625 |         self.shutdown_event.set()
 626 |         self.should_terminate.set()
 627 |         self._cleanup()
 628 | 
 629 |     def print_thinking(self):
 630 |         """Display thinking indicator to user"""
 631 |         self.ui.update_output("🧠 Thinking...")
 632 | 
 633 |     @staticmethod
 634 |     def get_initial_input() -> str:
 635 |         """Get the initial research query from user"""
 636 |         print(f"{Fore.GREEN}📝 Enter your message (Press CTRL+D to submit):{Style.RESET_ALL}")
 637 |         lines = []
 638 |         try:
 639 |             while True:
 640 |                 line = input()
 641 |                 if line:  # Only add non-empty lines
 642 |                     lines.append(line)
 643 |                 if not line:  # Empty line (just Enter pressed)
 644 |                     break
 645 |         except EOFError:  # Ctrl+D pressed
 646 |             pass
 647 |         except KeyboardInterrupt:  # Ctrl+C pressed
 648 |             print("\nOperation cancelled")
 649 |             sys.exit(0)
 650 | 
 651 |         return " ".join(lines).strip()
 652 |     def get_multiline_input(self) -> str:
 653 |         """Get multiline input with proper command handling"""
 654 |         buffer = []
 655 |         current_line = []
 656 | 
 657 |         try:
 658 |             while True:
 659 |                 if msvcrt.kbhit():
 660 |                     char = msvcrt.getch()
 661 | 
 662 |                     # Handle CTRL+Z detection
 663 |                     if char in [b'\x1a']:  # CTRL+Z (Windows)
 664 |                         sys.stdout.write('\n')
 665 |                         if current_line:
 666 |                             buffer.append(''.join(current_line))
 667 |                         return ' '.join(buffer).strip()
 668 | 
 669 |                     # Handle single-character commands immediately
 670 |                     if not buffer and not current_line and char in [b's', b'f', b'p', b'q']:
 671 |                         command = char.decode('utf-8').lower()
 672 |                         sys.stdout.write(command + '\n')
 673 |                         return command
 674 | 
 675 |                     # Handle special characters
 676 |                     elif char == b'\r':  # Enter
 677 |                         sys.stdout.write('\n')
 678 |                         if current_line:
 679 |                             buffer.append(''.join(current_line))
 680 |                             current_line = []
 681 | 
 682 |                     elif char == b'\x08':  # Backspace
 683 |                         if current_line:
 684 |                             current_line.pop()
 685 |                             sys.stdout.write('\b \b')
 686 | 
 687 |                     elif char == b'\x03':  # CTRL+C
 688 |                         sys.stdout.write('\n')
 689 |                         return 'q'
 690 | 
 691 |                     # Normal character input
 692 |                     elif 32 <= ord(char) <= 126:  # Printable characters
 693 |                         current_line.append(char.decode('utf-8'))
 694 |                         sys.stdout.write(char.decode('utf-8'))
 695 | 
 696 |                     sys.stdout.flush()
 697 | 
 698 |         except Exception as e:
 699 |             logger.error(f"Error in multiline input: {str(e)}")
 700 |             return ''
 701 | 
 702 |     def formulate_search_queries(self, focus_area: ResearchFocus) -> List[str]:
 703 |         """Generate search queries for a focus area"""
 704 |         try:
 705 |             self.print_thinking()
 706 | 
 707 |             prompt = f"""
 708 | In order to research this query/topic:
 709 | 
 710 | Context: {self.original_query}
 711 | 
 712 | Base a search query to investigate the following research focus, which is related to the original query/topic:
 713 | 
 714 | Area: {focus_area.area}
 715 | 
 716 | Create a search query that will yield specific, search results thare are directly relevant to your focus area.
 717 | Format your response EXACTLY like this:
 718 | 
 719 | Search query: [Your 2-5 word query]
 720 | Time range: [d/w/m/y/none]
 721 | 
 722 | Do not provide any additional information or explanation, note that the time range allows you to see results within a time range (d is within the last day, w is within the last week, m is within the last month, y is within the last year, and none is results from anytime, only select one, using only the corresponding letter for whichever of these options you select as indicated in the response format) use your judgement as many searches will not require a time range and some may depending on what the research focus is.
 723 | """
 724 |             response_text = self.llm.generate(prompt, max_tokens=50, stop=None)
 725 |             query, time_range = self.parse_query_response(response_text)
 726 | 
 727 |             if not query:
 728 |                 self.ui.update_output(f"{Fore.RED}Error: Empty search query. Using focus area as query...{Style.RESET_ALL}")
 729 |                 return [focus_area.area]
 730 | 
 731 |             self.ui.update_output(f"{Fore.YELLOW}Original focus: {focus_area.area}{Style.RESET_ALL}")
 732 |             self.ui.update_output(f"{Fore.YELLOW}Formulated query: {query}{Style.RESET_ALL}")
 733 |             self.ui.update_output(f"{Fore.YELLOW}Time range: {time_range}{Style.RESET_ALL}")
 734 | 
 735 |             return [query]
 736 | 
 737 |         except Exception as e:
 738 |             logger.error(f"Error formulating query: {str(e)}")
 739 |             return [focus_area.area]
 740 | 
 741 |     def parse_search_query(self, query_response: str) -> Dict[str, str]:
 742 |         """Parse search query formulation response with improved time range detection"""
 743 |         try:
 744 |             lines = query_response.strip().split('\n')
 745 |             result = {
 746 |                 'query': '',
 747 |                 'time_range': 'none'
 748 |             }
 749 | 
 750 |             # First try to find standard format
 751 |             for line in lines:
 752 |                 if ':' in line:
 753 |                     key, value = line.split(':', 1)
 754 |                     key = key.strip().lower()
 755 |                     value = value.strip()
 756 | 
 757 |                     if 'query' in key:
 758 |                         result['query'] = self._clean_query(value)
 759 |                     elif ('time' in key or 'range' in key) and value.strip().lower() in ['d', 'w', 'm', 'y', 'none']:
 760 |                         result['time_range'] = value.strip().lower()
 761 | 
 762 |             # If no time range found, look for individual characters
 763 |             if result['time_range'] == 'none':
 764 |                 # Get all text except the query itself
 765 |                 full_text = query_response.lower()
 766 |                 if result['query']:
 767 |                     full_text = full_text.replace(result['query'].lower(), '')
 768 | 
 769 |                 # Look for isolated d, w, m, or y characters
 770 |                 time_chars = set()
 771 |                 for char in ['d', 'w', 'm', 'y']:
 772 |                     # Check if char exists by itself (not part of another word)
 773 |                     matches = re.finditer(r'\b' + char + r'\b', full_text)
 774 |                     for match in matches:
 775 |                         # Verify it's not part of a word
 776 |                         start, end = match.span()
 777 |                         if (start == 0 or not full_text[start-1].isalpha()) and \
 778 |                            (end == len(full_text) or not full_text[end].isalpha()):
 779 |                             time_chars.add(char)
 780 | 
 781 |                 # If exactly one time char found, use it
 782 |                 if len(time_chars) == 1:
 783 |                     result['time_range'] = time_chars.pop()
 784 | 
 785 |             return result
 786 |         except Exception as e:
 787 |             logger.error(f"Error parsing search query: {str(e)}")
 788 |             return {'query': '', 'time_range': 'none'}
 789 | 
 790 |     def _cleanup(self):
 791 |         """Enhanced cleanup to handle conversation mode"""
 792 |         self.conversation_active = False
 793 |         self.should_terminate.set()
 794 | 
 795 |         if self.research_thread and self.research_thread.is_alive():
 796 |             try:
 797 |                 self.research_thread.join(timeout=1.0)
 798 |                 if self.research_thread.is_alive():
 799 |                     import ctypes
 800 |                     ctypes.pythonapi.PyThreadState_SetAsyncExc(
 801 |                         ctypes.c_long(self.research_thread.ident),
 802 |                         ctypes.py_object(SystemExit)
 803 |                     )
 804 |             except Exception as e:
 805 |                 logger.error(f"Error terminating research thread: {str(e)}")
 806 | 
 807 |         if hasattr(self.llm, 'cleanup'):
 808 |             try:
 809 |                 self.llm.cleanup()
 810 |             except Exception as e:
 811 |                 logger.error(f"Error cleaning up LLM: {str(e)}")
 812 | 
 813 |         if hasattr(self.ui, 'cleanup'):
 814 |             self.ui.cleanup()
 815 | 
 816 |     def _initialize_document(self):
 817 |         """Initialize research session document"""
 818 |         try:
 819 |             # Get all existing research session files
 820 |             self.session_files = []
 821 |             for file in os.listdir():
 822 |                 if file.startswith("research_session_") and file.endswith(".txt"):
 823 |                     try:
 824 |                         num = int(file.split("_")[2].split(".")[0])
 825 |                         self.session_files.append(num)
 826 |                     except ValueError:
 827 |                         continue
 828 | 
 829 |             # Determine next session number
 830 |             next_session = 1 if not self.session_files else max(self.session_files) + 1
 831 |             self.document_path = f"research_session_{next_session}.txt"
 832 | 
 833 |             # Initialize the new document
 834 |             with open(self.document_path, 'w', encoding='utf-8') as f:
 835 |                 f.write(f"Research Session {next_session}\n")
 836 |                 f.write(f"Topic: {self.original_query}\n")
 837 |                 f.write(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
 838 |                 f.write("="*80 + "\n\n")
 839 |                 f.flush()
 840 | 
 841 |         except Exception as e:
 842 |             logger.error(f"Error initializing document: {str(e)}")
 843 |             self.document_path = "research_findings.txt"
 844 |             with open(self.document_path, 'w', encoding='utf-8') as f:
 845 |                 f.write("Research Findings:\n\n")
 846 |                 f.flush()
 847 | 
 848 |     def add_to_document(self, content: str, source_url: str, focus_area: str):
 849 |         """Add research findings to current session document"""
 850 |         try:
 851 |             with open(self.document_path, 'a', encoding='utf-8') as f:
 852 |                 if source_url not in self.searched_urls:
 853 |                     f.write(f"\n{'='*80}\n")
 854 |                     f.write(f"Research Focus: {focus_area}\n")
 855 |                     f.write(f"Source: {source_url}\n")
 856 |                     f.write(f"Content:\n{content}\n")
 857 |                     f.write(f"{'='*80}\n")
 858 |                     f.flush()
 859 |                     self.searched_urls.add(source_url)
 860 |                     self.ui.update_output(f"Added content from: {source_url}")
 861 |         except Exception as e:
 862 |             logger.error(f"Error adding to document: {str(e)}")
 863 |             self.ui.update_output(f"Error saving content: {str(e)}")
 864 | 
 865 |     def get_multiline_conversation_input(self) -> str:
 866 |         """Windows-compatible multiline input"""
 867 |         buffer = []
 868 |         current_line = []
 869 | 
 870 |         try:
 871 |             while True:
 872 |                 if msvcrt.kbhit():
 873 |                     char = msvcrt.getch()
 874 | 
 875 |                     # CTRL+D or CTRL+Z detection
 876 |                     if char in [b'\x04', b'\x1a']:  # CTRL+D (Unix) and CTRL+Z (Windows)
 877 |                         sys.stdout.write('\n')
 878 |                         if current_line:
 879 |                             buffer.append(''.join(current_line))
 880 |                         return ' '.join(buffer).strip()
 881 | 
 882 |                     # Handle special characters
 883 |                     elif char == b'\r':  # Enter
 884 |                         sys.stdout.write('\n')
 885 |                         buffer.append(''.join(current_line))
 886 |                         current_line = []
 887 | 
 888 |                     elif char == b'\x08':  # Backspace
 889 |                         if current_line:
 890 |                             current_line.pop()
 891 |                             sys.stdout.write('\b \b')
 892 | 
 893 |                     elif char == b'\x03':  # CTRL+C
 894 |                         sys.stdout.write('\n')
 895 |                         return 'quit'
 896 | 
 897 |                     # Handle command inputs like 'p', 's', 'f', 'q'
 898 |                     elif char == b's':  # Show status command
 899 |                         buffer.append('s')
 900 |                         sys.stdout.write('s')  # Print 's' to indicate the command
 901 |                     elif char == b'f':  # Show focus command
 902 |                         buffer.append('f')
 903 |                         sys.stdout.write('f')  # Print 'f' to indicate the command
 904 |                     elif char == b'p':  # Pause command
 905 |                         buffer.append('p')
 906 |                         sys.stdout.write('p')  # Print 'p' to indicate the command
 907 |                     elif char == b'q':  # Quit command
 908 |                         buffer.append('q')
 909 |                         sys.stdout.write('q')  # Print 'q' to indicate the command
 910 | 
 911 |                     # Normal character input
 912 |                     elif 32 <= ord(char) <= 126:  # Printable characters
 913 |                         current_line.append(char.decode('utf-8'))
 914 |                         sys.stdout.write(char.decode('utf-8'))
 915 | 
 916 |                     sys.stdout.flush()
 917 | 
 918 |         except Exception as e:
 919 |             logger.error(f"Error in multiline input: {str(e)}")
 920 |             return 'quit'
 921 |     def _process_search_results(self, results: Dict[str, str], focus_area: str):
 922 |         """Process and store search results"""
 923 |         if not results:
 924 |             return
 925 | 
 926 |         for url, content in results.items():
 927 |             if url not in self.searched_urls:
 928 |                 self.add_to_document(content, url, focus_area)
 929 | 
 930 |     def _research_loop(self):
 931 |         """Main research loop with comprehensive functionality"""
 932 |         self.is_running = True
 933 |         try:
 934 |             logging.debug("Research loop started.")
 935 |             self.research_started.set()
 936 | 
 937 |             while not self.should_terminate.is_set() and not self.shutdown_event.is_set():
 938 |                 # Check if research is paused
 939 |                 if self.research_paused:
 940 |                     logging.debug("Research is paused.")
 941 |                     time.sleep(1)
 942 |                     continue
 943 | 
 944 |                 self.ui.update_output("\nAnalyzing research progress...")
 945 |                 logging.debug("Analyzing research progress.")
 946 | 
 947 |                 # Generate focus areas
 948 |                 self.ui.update_output("\nGenerating research focus areas...")
 949 |                 logging.debug("Generating research focus areas.")
 950 |                 analysis_result = self.strategic_parser.strategic_analysis(self.original_query)
 951 | 
 952 |                 if not analysis_result:
 953 |                     self.ui.update_output("\nFailed to generate analysis result. Retrying...")
 954 |                     logging.warning("Failed to generate analysis result. Retrying...")
 955 |                     continue
 956 | 
 957 |                 focus_areas = analysis_result.focus_areas
 958 |                 if not focus_areas:
 959 |                     self.ui.update_output("\nNo valid focus areas generated. Retrying...")
 960 |                     logging.warning("No valid focus areas generated. Retrying...")
 961 |                     continue
 962 | 
 963 |                 self.ui.update_output(f"\nGenerated {len(focus_areas)} research areas:")
 964 |                 logging.debug(f"Generated {len(focus_areas)} research areas.")
 965 |                 for i, focus in enumerate(focus_areas, 1):
 966 |                     self.ui.update_output(f"\nArea {i}: {focus.area}")
 967 |                     self.ui.update_output(f"Priority: {focus.priority}")
 968 |                     logging.debug(f"Area {i}: {focus.area}, Priority: {focus.priority}")
 969 | 
 970 |                 # Process each focus area in priority order
 971 |                 for focus_area in focus_areas:
 972 |                     if self.should_terminate.is_set():
 973 |                         logging.debug("Termination signal received. Exiting focus area processing.")
 974 |                         break
 975 | 
 976 |                     # Check if research is paused
 977 |                     while self.research_paused and not self.should_terminate.is_set():
 978 |                         logging.debug("Research is paused during focus area processing.")
 979 |                         time.sleep(1)
 980 | 
 981 |                     if self.should_terminate.is_set():
 982 |                         logging.debug("Termination signal received. Exiting focus area processing.")
 983 |                         break
 984 | 
 985 |                     self.current_focus = focus_area
 986 |                     self.ui.update_output(f"\nInvestigating: {focus_area.area}")
 987 |                     logging.debug(f"Investigating focus area: {focus_area.area}")
 988 | 
 989 |                     queries = self.formulate_search_queries(focus_area)
 990 |                     if not queries:
 991 |                         logging.warning("No queries formulated for focus area.")
 992 |                         continue
 993 | 
 994 |                     for query in queries:
 995 |                         if self.should_terminate.is_set():
 996 |                             logging.debug("Termination signal received. Exiting query processing.")
 997 |                             break
 998 | 
 999 |                         # Check if research is paused
1000 |                         while self.research_paused and not self.should_terminate.is_set():
1001 |                             logging.debug("Research is paused during query processing.")
1002 |                             time.sleep(1)
1003 | 
1004 |                         if self.should_terminate.is_set():
1005 |                             logging.debug("Termination signal received. Exiting query processing.")
1006 |                             break
1007 | 
1008 |                         try:
1009 |                             self.ui.update_output(f"\nSearching: {query}")
1010 |                             logging.debug(f"Performing search for query: {query}")
1011 |                             results = self.search_engine.perform_search(query, time_range='none')
1012 | 
1013 |                             if results:
1014 |                                 selected_urls = self.search_engine.select_relevant_pages(results, query)
1015 | 
1016 |                                 if selected_urls:
1017 |                                     self.ui.update_output("\n⚙️ Scraping selected pages...")
1018 |                                     logging.debug("Scraping selected pages.")
1019 |                                     scraped_content = self.search_engine.scrape_content(selected_urls)
1020 |                                     if scraped_content:
1021 |                                         for url, content in scraped_content.items():
1022 |                                             if url not in self.searched_urls:
1023 |                                                 self.add_to_document(content, url, focus_area.area)
1024 | 
1025 |                         except Exception as e:
1026 |                             logger.error(f"Error in search: {str(e)}")
1027 |                             self.ui.update_output(f"Error during search: {str(e)}")
1028 | 
1029 |                     if self.check_document_size():
1030 |                         self.ui.update_output("\nDocument size limit reached. Finalizing research.")
1031 |                         logging.info("Document size limit reached. Finalizing research.")
1032 |                         return
1033 | 
1034 |                 # After processing all areas, cycle back to generate new ones
1035 |                 self.ui.update_output("\nAll current focus areas investigated. Generating new areas...")
1036 |                 logging.debug("All current focus areas investigated. Generating new areas.")
1037 | 
1038 |         except Exception as e:
1039 |             logger.error(f"Error in research loop: {str(e)}")
1040 |             self.ui.update_output(f"Error in research process: {str(e)}")
1041 |         finally:
1042 |             self.is_running = False
1043 |             logging.debug("Research loop ended.")
1044 | 
1045 |     def start_research(self, topic: str):
1046 |         """Start research with new session document"""
1047 |         try:
1048 |             submit_key = "CTRL+Z" if os.name == 'nt' else "CTRL+D"
1049 |             
1050 |             logging.debug("Setting up UI and initializing document.")
1051 |             self.ui.setup()
1052 |             self.original_query = topic
1053 |             self._initialize_document()
1054 | 
1055 |             self.ui.update_output(f"\nStarting research on: {topic}")
1056 |             self.ui.update_output(f"Session document: {self.document_path}")
1057 | 
1058 |             # Clear previous state
1059 |             self.should_terminate.clear()
1060 |             self.research_started.clear()
1061 |             self.research_paused = False
1062 |             self.awaiting_user_decision = False
1063 |             self.is_running = True  # Set running state explicitly
1064 | 
1065 |             # Start research thread
1066 |             logging.debug("Starting research thread.")
1067 |             self.research_thread = threading.Thread(target=self._research_loop, daemon=True)
1068 |             self.research_thread.start()
1069 | 
1070 |             # Wait for research to actually start
1071 |             if not self.research_started.wait(timeout=10):
1072 |                 self.ui.update_output("Error: Research failed to start within timeout period")
1073 |                 logging.error("Research failed to start within timeout period.")
1074 |                 self.should_terminate.set()
1075 |                 return
1076 | 
1077 |             # Enter command loop
1078 |             while self.is_active():  # Use is_active() instead of should_terminate
1079 |                 try:
1080 |                     print(f"\n{Fore.GREEN}Enter command (s/f/p/q) and press {submit_key} to submit:{Style.RESET_ALL}")
1081 |                     command = self.get_multiline_input().strip().lower()  # Use self.get_multiline_input()
1082 | 
1083 |                     if command:
1084 |                         self._handle_command(command)
1085 | 
1086 |                     if self.should_terminate.is_set():
1087 |                         break
1088 | 
1089 |                 except KeyboardInterrupt:
1090 |                     self.ui.update_output("\nOperation interrupted. Submit 'q' to quit.")
1091 |                     continue
1092 | 
1093 |         except Exception as e:
1094 |             logging.error(f"Error in research process: {str(e)}")
1095 |             self.ui.update_output(f"Error in research process: {str(e)}")
1096 |         finally:
1097 |             logging.debug("Cleaning up resources.")
1098 |             self._cleanup()
1099 | 
1100 |     def check_document_size(self) -> bool:
1101 |         """Check if document size is approaching context limit"""
1102 |         try:
1103 |             with open(self.document_path, 'r', encoding='utf-8') as f:
1104 |                 content = f.read()
1105 |             estimated_tokens = len(content.split()) * 1.3
1106 |             max_tokens = self.llm.llm_config.get('n_ctx', 2048)
1107 |             current_ratio = estimated_tokens / max_tokens
1108 | 
1109 |             if current_ratio > 0.8:
1110 |                 logger.warning(f"Document size at {current_ratio*100:.1f}% of context limit")
1111 |                 self.ui.update_output(f"Warning: Document size at {current_ratio*100:.1f}% of context limit")
1112 | 
1113 |             return current_ratio > 0.9
1114 |         except Exception as e:
1115 |             logger.error(f"Error checking document size: {str(e)}")
1116 |             return True
1117 | 
1118 |     def _handle_command(self, cmd: str):
1119 |         """Handle user commands during research"""
1120 |         try:
1121 |             if cmd.lower() == 's':
1122 |                 progress = self.get_progress()
1123 |                 self.ui.update_output("\n" + progress)
1124 |                 return  # Don't terminate after showing status
1125 | 
1126 |             elif cmd.lower() == 'f':
1127 |                 if self.current_focus:
1128 |                     self.ui.update_output("\nCurrent Focus:")
1129 |                     self.ui.update_output(f"Area: {self.current_focus.area}")
1130 |                     self.ui.update_output(f"Priority: {self.current_focus.priority}")
1131 |                 else:
1132 |                     self.ui.update_output("\nNo current focus area")
1133 |                 return  # Don't terminate after showing focus
1134 | 
1135 |             elif cmd.lower() == 'p':
1136 |                 self.pause_and_assess()
1137 |                 return  # Don't terminate after pausing
1138 | 
1139 |             elif cmd.lower() == 'q':
1140 |                 self.ui.update_output("\nInitiating research termination...")
1141 |                 self.should_terminate.set()
1142 |                 self.ui.update_output("\nGenerating research summary... please wait...")
1143 |                 summary = self.terminate_research()
1144 |                 self.ui.update_output("\nFinal Research Summary:")
1145 |                 self.ui.update_output(summary)
1146 | 
1147 |         except Exception as e:
1148 |             logger.error(f"Error handling command: {str(e)}")
1149 |             self.ui.update_output(f"Error processing command: {str(e)}")
1150 | 
1151 |     def show_progress_indicator(self, message="Generating summary, please wait..."):
1152 |         """Show a rotating progress indicator until the summary is ready."""
1153 |         symbols = ['|', '/', '-', '\\']
1154 |         idx = 0
1155 |         self.summary_ready = False  # Track whether the summary is complete
1156 | 
1157 |         try:
1158 |             while not self.summary_ready:
1159 |                 sys.stdout.write(f"\r{message} {symbols[idx]}")
1160 |                 sys.stdout.flush()
1161 |                 idx = (idx + 1) % len(symbols)
1162 |                 time.sleep(0.2)  # Adjust the speed of the rotation if needed
1163 |         except KeyboardInterrupt:
1164 |             sys.stdout.write("\rOperation interrupted.\n")
1165 |             self.summary_ready = True
1166 |         finally:
1167 |             sys.stdout.write("\r" + " " * (len(message) + 2) + "\r")  # Clear the line when done
1168 |             sys.stdout.flush()
1169 |     def _cleanup_research_ui(self):
1170 |         """Clean up just the research UI components"""
1171 |         if hasattr(self, 'ui') and self.ui:
1172 |             self.ui.cleanup()
1173 | 
1174 |     def show_thinking_indicator(self, message: str, stop_flag_name: str):
1175 |         """Show a rotating thinking indicator with custom message"""
1176 |         symbols = ['|', '/', '-', '\\']
1177 |         idx = 0
1178 |         while getattr(self, stop_flag_name):  # Use dynamic attribute lookup
1179 |             sys.stdout.write(f"\r{message} {symbols[idx]}")
1180 |             sys.stdout.flush()
1181 |             idx = (idx + 1) % len(symbols)
1182 |             time.sleep(0.2)
1183 |         sys.stdout.write("\r" + " " * (len(message) + 2) + "\r")  # Clear the line when done
1184 | 
1185 |     def start_conversation_mode(self):
1186 |         """Start interactive conversation mode with CTRL+D input handling and thinking indicator"""
1187 |         self.conversation_active = True
1188 |         self.thinking = False
1189 | 
1190 |         # Print header with clear instructions
1191 |         print("\n" + "="*80)
1192 |         print(Fore.CYAN + "Research Conversation Mode" + Style.RESET_ALL)
1193 |         print("="*80)
1194 |         print(Fore.YELLOW + "\nInstructions:")
1195 |         print("- Type your question and press CTRL+D to submit")
1196 |         print("- Type 'quit' and press CTRL+D to exit")
1197 |         print("- Your messages appear in green")
1198 |         print("- AI responses appear in cyan" + Style.RESET_ALL + "\n")
1199 | 
1200 |         while self.conversation_active:
1201 |             try:
1202 |                 # Show prompt with user input in green
1203 |                 print(Fore.GREEN + "Your question (Press CTRL+D to submit):" + Style.RESET_ALL)
1204 |                 user_input = self.get_multiline_conversation_input()
1205 | 
1206 |                 # Handle exit commands
1207 |                 if not user_input or user_input.lower() in ['quit', 'exit', 'q']:
1208 |                     print(Fore.YELLOW + "\nExiting conversation mode..." + Style.RESET_ALL)
1209 |                     self.conversation_active = False
1210 |                     break
1211 | 
1212 |                 # Skip empty input
1213 |                 if not user_input.strip():
1214 |                     continue
1215 | 
1216 |                 # Echo the submitted question for clarity
1217 |                 print(Fore.GREEN + "Submitted question:" + Style.RESET_ALL)
1218 |                 print(Fore.GREEN + user_input + Style.RESET_ALL + "\n")
1219 | 
1220 |                 # Start thinking indicator in a separate thread
1221 |                 self.thinking = True  # Set flag before starting thread
1222 |                 thinking_thread = threading.Thread(
1223 |                     target=self.show_thinking_indicator,
1224 |                     args=("Thinking...", "thinking")
1225 |                 )
1226 |                 thinking_thread.daemon = True
1227 |                 thinking_thread.start()
1228 | 
1229 |                 try:
1230 |                     # Generate response
1231 |                     response = self._generate_conversation_response(user_input)
1232 | 
1233 |                     # Stop thinking indicator
1234 |                     self.thinking = False
1235 |                     thinking_thread.join()
1236 | 
1237 |                     # Display response in cyan
1238 |                     print(Fore.CYAN + "AI Response:" + Style.RESET_ALL)
1239 |                     print(f"{Fore.CYAN}{response}{Style.RESET_ALL}\n")
1240 |                     print("-" * 80 + "\n")  # Separator between QA pairs
1241 | 
1242 |                 except Exception as e:
1243 |                     self.thinking = False  # Ensure thinking indicator stops
1244 |                     thinking_thread.join()
1245 |                     raise e
1246 | 
1247 |             except KeyboardInterrupt:
1248 |                 self.thinking = False  # Ensure thinking indicator stops
1249 |                 print(Fore.YELLOW + "\nOperation cancelled. Submit 'quit' to exit." + Style.RESET_ALL)
1250 |             except Exception as e:
1251 |                 logger.error(f"Error in conversation mode: {str(e)}")
1252 |                 print(Fore.RED + f"Error processing question: {str(e)}" + Style.RESET_ALL)
1253 |     def _generate_conversation_response(self, user_query: str) -> str:
1254 |         """Generate contextual responses with improved context handling"""
1255 |         try:
1256 |             # Add debug logging to verify content
1257 |             logger.info(f"Research summary length: {len(self.research_summary) if self.research_summary else 0}")
1258 |             logger.info(f"Research content length: {len(self.research_content) if self.research_content else 0}")
1259 | 
1260 |             # First verify we have content
1261 |             if not self.research_content and not self.research_summary:
1262 |                 # Try to reload from file if available
1263 |                 try:
1264 |                     if os.path.exists(self.document_path):
1265 |                         with open(self.document_path, 'r', encoding='utf-8') as f:
1266 |                             self.research_content = f.read().strip()
1267 |                 except Exception as e:
1268 |                     logger.error(f"Failed to reload research content: {str(e)}")
1269 | 
1270 |             # Prepare context, ensuring we have content
1271 |             context = f"""
1272 | Research Content:
1273 | {self.research_content}
1274 | 
1275 | Research Summary:
1276 | {self.research_summary if self.research_summary else 'No summary available'}
1277 | """
1278 | 
1279 |             prompt = f"""
1280 | Based on the following research content and summary, please answer this question:
1281 | 
1282 | {context}
1283 | 
1284 | Question: {user_query}
1285 | 
1286 | you have 2 sets of instructions the applied set and the unapplied set, the applied set should be followed if the question is directly relating to the research content whereas anything else other then direct questions about the content of the research will result in you instead following the unapplied ruleset
1287 | 
1288 | Applied:
1289 | 
1290 | Instructions:
1291 | 1. Answer based ONLY on the research content provided above if asked a question about your research or that content.
1292 | 2. If the information requested isn't in the research, clearly state that it isn't in the content you gathered.
1293 | 3. Be direct and specific in your response, DO NOT directly cite research unless specifically asked to, be concise and give direct answers to questions based on the research, unless instructed otherwise.
1294 | 
1295 | Unapplied:
1296 | 
1297 | Instructions:
1298 | 
1299 | 1. Do not make up anything that isn't actually true.
1300 | 2. Respond directly to the user's question in an honest and thoughtful manner.
1301 | 3. disregard rules in the applied set for queries not DIRECTLY related to the research, including queries about the research process or what you remember about the research should result in the unapplied ruleset being used.
1302 | 
1303 | Answer:
1304 | """
1305 | 
1306 |             response = self.llm.generate(
1307 |                 prompt,
1308 |                 max_tokens=1000,  # Increased for more detailed responses
1309 |                 temperature=0.7
1310 |             )
1311 | 
1312 |             if not response or not response.strip():
1313 |                 return "I apologize, but I cannot find relevant information in the research content to answer your question."
1314 | 
1315 |             return response.strip()
1316 | 
1317 |         except Exception as e:
1318 |             logger.error(f"Error generating response: {str(e)}")
1319 |             return f"I apologize, but I encountered an error processing your question: {str(e)}"
1320 |     def pause_and_assess(self):
1321 |         """Pause the research and assess if the collected content is sufficient."""
1322 |         try:
1323 |             # Pause the research thread
1324 |             self.ui.update_output("\nPausing research for assessment...")
1325 |             self.research_paused = True
1326 | 
1327 |             # Start progress indicator in a separate thread
1328 |             self.summary_ready = False
1329 |             indicator_thread = threading.Thread(
1330 |                 target=self.show_progress_indicator,
1331 |                 args=("Assessing the researched information...",)
1332 |             )
1333 |             indicator_thread.daemon = True
1334 |             indicator_thread.start()
1335 | 
1336 |             # Read the current research content
1337 |             if not os.path.exists(self.document_path):
1338 |                 self.summary_ready = True
1339 |                 indicator_thread.join()
1340 |                 self.ui.update_output("No research data found to assess.")
1341 |                 self.research_paused = False
1342 |                 return
1343 | 
1344 |             with open(self.document_path, 'r', encoding='utf-8') as f:
1345 |                 content = f.read().strip()
1346 | 
1347 |             if not content:
1348 |                 self.summary_ready = True
1349 |                 indicator_thread.join()
1350 |                 self.ui.update_output("No research data was collected to assess.")
1351 |                 self.research_paused = False
1352 |                 return
1353 | 
1354 |             # Prepare the prompt for the AI assessment
1355 |             assessment_prompt = f"""
1356 | Based on the following research content, please assess whether the original query "{self.original_query}" can be answered sufficiently with the collected information.
1357 | 
1358 | Research Content:
1359 | {content}
1360 | 
1361 | Instructions:
1362 | 1. If the research content provides enough information to answer the original query in detail, respond with: "The research is sufficient to answer the query."
1363 | 2. If not, respond with: "The research is insufficient and it would be advisable to continue gathering information."
1364 | 3. Do not provide any additional information or details.
1365 | 
1366 | Assessment:
1367 | """
1368 | 
1369 |             # Generate the assessment
1370 |             assessment = self.llm.generate(assessment_prompt, max_tokens=200)
1371 | 
1372 |             # Stop the progress indicator
1373 |             self.summary_ready = True
1374 |             indicator_thread.join()
1375 | 
1376 |             # Display the assessment
1377 |             self.ui.update_output("\nAssessment Result:")
1378 |             self.ui.update_output(assessment.strip())
1379 | 
1380 |             # Provide user with options to continue or quit
1381 |             self.ui.update_output("\nEnter 'c' to continue the research or 'q' to terminate and generate the summary.")
1382 |             self.awaiting_user_decision = True  # Flag to indicate we are waiting for user's decision
1383 | 
1384 |             while self.awaiting_user_decision:
1385 |                 cmd = self.ui.get_input("Enter command ('c' to continue, 'q' to quit): ")
1386 |                 if cmd is None:
1387 |                     continue  # Ignore invalid inputs
1388 |                 cmd = cmd.strip().lower()
1389 |                 if cmd == 'c':
1390 |                     self.ui.update_output("\nResuming research...")
1391 |                     self.research_paused = False
1392 |                     self.awaiting_user_decision = False
1393 |                 elif cmd == 'q':
1394 |                     self.ui.update_output("\nTerminating research and generating summary...")
1395 |                     self.awaiting_user_decision = False
1396 |                     self.should_terminate.set()
1397 |                     summary = self.terminate_research()
1398 |                     self.ui.update_output("\nFinal Research Summary:")
1399 |                     self.ui.update_output(summary)
1400 |                     break
1401 |                 else:
1402 |                     self.ui.update_output("Invalid command. Please enter 'c' to continue or 'q' to quit.")
1403 | 
1404 |         except Exception as e:
1405 |             logger.error(f"Error during pause and assess: {str(e)}")
1406 |             self.ui.update_output(f"Error during assessment: {str(e)}")
1407 |             self.research_paused = False
1408 |         finally:
1409 |             self.summary_ready = True  # Ensure the indicator thread can exit
1410 | 
1411 |     def get_progress(self) -> str:
1412 |         """Get current research progress"""
1413 |         status = 'Active' if self.is_active() else 'Stopped'
1414 |         if self.research_paused:
1415 |             status = 'Paused'
1416 | 
1417 |         return f"""
1418 |         Research Progress:
1419 |         - Original Query: {self.original_query}
1420 |         - Sources analyzed: {len(self.searched_urls)}
1421 |         - Status: {status}
1422 |         - Current focus: {self.current_focus.area if self.current_focus else 'Initializing'}
1423 |         """
1424 | 
1425 |     def is_active(self) -> bool:
1426 |         """Check if research is currently active"""
1427 |         return (self.is_running and 
1428 |                 self.research_thread and 
1429 |                 self.research_thread.is_alive() and 
1430 |                 not self.should_terminate.is_set())
1431 | 
1432 |     def terminate_research(self) -> str:
1433 |         """Terminate research and return to main terminal"""
1434 |         try:
1435 |             print("Initiating research termination...")
1436 |             sys.stdout.flush()
1437 | 
1438 |             # Start progress indicator in a separate thread immediately
1439 |             indicator_thread = threading.Thread(target=self.show_progress_indicator)
1440 |             indicator_thread.daemon = True
1441 |             indicator_thread.start()
1442 | 
1443 |             if not os.path.exists(self.document_path):
1444 |                 self.summary_ready = True
1445 |                 indicator_thread.join(timeout=1.0)
1446 |                 self._cleanup()
1447 |                 return "No research data found to summarize."
1448 | 
1449 |             with open(self.document_path, 'r', encoding='utf-8') as f:
1450 |                 content = f.read().strip()
1451 |                 self.research_content = content  # Store for conversation mode
1452 | 
1453 |             if not content or content == "Research Findings:\n\n":
1454 |                 self.summary_ready = True
1455 |                 indicator_thread.join(timeout=1.0)
1456 |                 self._cleanup()
1457 |                 return "No research data was collected to summarize."
1458 | 
1459 |             try:
1460 |                 # Generate summary using LLM
1461 |                 summary_prompt = f"""
1462 |                 Analyze the following content to provide a comprehensive research summary and a response to the user's original query "{self.original_query}" ensuring that you conclusively answer the query in detail:
1463 | 
1464 |                 Research Content:
1465 |                 {content}
1466 | 
1467 |                 Important Instructions:
1468 |                 > Summarize the research findings that are relevant to the Original topic/question: "{self.original_query}"
1469 |                 > Ensure that in your summary you directly answer the original question/topic conclusively to the best of your ability in detail.
1470 |                 > Read the original topic/question again "{self.original_query}" and abide by any additional instructions that it contains, exactly as instructed in your summary otherwise provide it normally should it not have any specific instructions
1471 | 
1472 |                 Summary:
1473 |                 """
1474 | 
1475 |                 summary = self.llm.generate(summary_prompt, max_tokens=4000)
1476 | 
1477 |                 # Signal that summary is complete to stop the progress indicator
1478 |                 self.summary_ready = True
1479 |                 indicator_thread.join(timeout=1.0)
1480 | 
1481 |                 # Store summary and mark research as complete
1482 |                 self.research_summary = summary
1483 |                 self.research_complete = True
1484 | 
1485 |                 # Format summary
1486 |                 formatted_summary = f"""
1487 |                 {'='*80}
1488 |                 RESEARCH SUMMARY
1489 |                 {'='*80}
1490 | 
1491 |                 Original Query: {self.original_query}
1492 |                 Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
1493 | 
1494 |                 {summary}
1495 | 
1496 |                 {'='*80}
1497 |                 End of Summary
1498 |                 {'='*80}
1499 |                 """
1500 | 
1501 |                 # Write to document
1502 |                 with open(self.document_path, 'a', encoding='utf-8') as f:
1503 |                     f.write("\n\n" + formatted_summary)
1504 | 
1505 |                 # Clean up research UI
1506 |                 if hasattr(self, 'ui') and self.ui:
1507 |                     self.ui.cleanup()
1508 | 
1509 |                 return formatted_summary
1510 | 
1511 |             except Exception as e:
1512 |                 self.summary_ready = True
1513 |                 indicator_thread.join(timeout=1.0)
1514 |                 raise e
1515 | 
1516 |         except Exception as e:
1517 |             error_msg = f"Error generating summary: {str(e)}"
1518 |             logger.error(error_msg)
1519 |             return error_msg
1520 | 
1521 |         finally:
1522 |             # Clean up research UI
1523 |             self._cleanup_research_ui()
1524 | 
1525 |   
1526 | if __name__ == "__main__":
1527 |     from llm_wrapper import LLMWrapper
1528 |     from llm_response_parser import UltimateLLMResponseParser
1529 |     from Self_Improving_Search import EnhancedSelfImprovingSearch
1530 | 
1531 |     try:
1532 |         print(f"{Fore.CYAN}Initializing Research System...{Style.RESET_ALL}")
1533 |         llm = LLMWrapper()
1534 |         parser = UltimateLLMResponseParser()
1535 |         search_engine = EnhancedSelfImprovingSearch(llm, parser)
1536 |         manager = ResearchManager(llm, parser, search_engine)
1537 | 
1538 |         print(f"{Fore.GREEN}System initialized. Enter your research topic or 'quit' to exit.{Style.RESET_ALL}")
1539 |         while True:
1540 |             try:
1541 |                 topic = ResearchManager.get_initial_input()
1542 |                 if topic.lower() == 'quit':
1543 |                     break
1544 | 
1545 |                 if not topic:
1546 |                     continue
1547 | 
1548 |                 if not topic.startswith('@'):
1549 |                     print(f"{Fore.YELLOW}Please start your research query with '@'{Style.RESET_ALL}")
1550 |                     continue
1551 | 
1552 |                 topic = topic[1:]  # Remove @ prefix
1553 |                 manager.start_research(topic)
1554 |                 summary = manager.terminate_research()
1555 |                 print(f"\n{Fore.GREEN}Research Summary:{Style.RESET_ALL}")
1556 |                 print(summary)
1557 |                 print(f"\n{Fore.GREEN}Research completed. Ready for next topic.{Style.RESET_ALL}\n")
1558 | 
1559 |             except KeyboardInterrupt:
1560 |                 print(f"\n{Fore.YELLOW}Operation cancelled. Ready for next topic.{Style.RESET_ALL}")
1561 |                 if 'manager' in locals():
1562 |                     manager.terminate_research()
1563 |                 continue
1564 | 
1565 |     except KeyboardInterrupt:
1566 |         print(f"\n{Fore.YELLOW}Research system shutting down.{Style.RESET_ALL}")
1567 |         if 'manager' in locals():
1568 |             manager.terminate_research()
1569 |     except Exception as e:
1570 |         print(f"{Fore.RED}Critical error: {str(e)}{Style.RESET_ALL}")
1571 |         logger.error("Critical error in main loop", exc_info=True)
1572 | 
1573 |     if os.name == 'nt':
1574 |         print(f"{Fore.YELLOW}Running on Windows - Some features may be limited{Style.RESET_ALL}")
1575 | 


--------------------------------------------------------------------------------