├── __pycache__ └── web_browser.cpython-312.pyc ├── requirements.txt ├── .gitignore ├── utilities └── getnow.py ├── example.config.py ├── tool_stuff ├── tools_for_chaos.py ├── groqtools.py └── web_browser.py ├── README.md └── main.py /__pycache__/web_browser.cpython-312.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Drlordbasil/groq-gmail-assistant/HEAD/__pycache__/web_browser.cpython-312.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | selenium 2 | bs4 3 | nltk 4 | numpy 5 | langchain-core 6 | langchain-groq 7 | ollama 8 | webdriver-manager 9 | groq 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | config.py 2 | __pycache__/config.cpython-312.pyc 3 | notes.txt 4 | __pycache__/email_manager.cpython-312.pyc 5 | __pycache__/getnow.cpython-312.pyc 6 | __pycache__/groqtools.cpython-312.pyc 7 | __pycache__/tools_for_chaos.cpython-312.pyc 8 | -------------------------------------------------------------------------------- /utilities/getnow.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | def get_current_time_formatted(): 4 | # Get the current date and time 5 | now = datetime.now() 6 | 7 | # Format the date and time as a string 8 | time_string = now.strftime("%Y-%m-%d %H:%M:%S") 9 | 10 | return f"{time_string} is the current date and time" 11 | 12 | # Example usage 13 | # current_time = get_current_time_formatted() 14 | # print(current_time) 15 | -------------------------------------------------------------------------------- /example.config.py: -------------------------------------------------------------------------------- 1 | ############################################### 2 | # User = Email address ie drlordbasil@gmail.com 3 | ######### 4 | 5 | USER='' 6 | #app password from google apps. 7 | APP_PASSWORD='' 8 | IMAP_URL='imap.gmail.com' 9 | SMTP_URL='smtp.gmail.com' 10 | SMTP_PORT=587 11 | ### This prompt can vary but generally try to keep him on track. 12 | SYSTEM_PROMPT=''' 13 | 14 | You ALWAYS respect the client. 15 | Your response format must only be what you intend to reply with to the email. 16 | ''' 17 | MODEL='llama3-70b-8192' 18 | GROQ_API_KEY='' 19 | EMB_MODEL='mxbai-embed-large' 20 | -------------------------------------------------------------------------------- /tool_stuff/tools_for_chaos.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | 5 | from datetime import datetime 6 | from tool_stuff.web_browser import WebResearchTool 7 | 8 | web_research_tool = WebResearchTool() 9 | 10 | def get_current_time_formatted(): 11 | # Get the current date and time 12 | now = datetime.now() 13 | 14 | # Format the date and time as a string 15 | time_string = now.strftime("%Y-%m-%d %H:%M:%S") 16 | 17 | return f"{time_string} is the current date and time" 18 | 19 | 20 | def write_note(content): 21 | """Write a note to the notes.txt file.""" 22 | with open("notes.txt", "a") as file: 23 | file.write(content + "\n") 24 | return "Note written successfully." 25 | 26 | def read_notes(): 27 | """Read notes from the notes.txt file.""" 28 | try: 29 | with open("notes.txt", "r") as file: 30 | notes = file.read() 31 | return notes 32 | except FileNotFoundError: 33 | return "No notes found." 34 | 35 | def create_calendar_appointment(subject, start_time, end_time, location, body): 36 | """Create an .ics file for an appointment and open it with the default calendar application.""" 37 | start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') 38 | end_time = datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') 39 | 40 | # Format start and end times for iCalendar 41 | start_time_formatted = start_time.strftime('%Y%m%dT%H%M%S') 42 | end_time_formatted = end_time.strftime('%Y%m%dT%H%M%S') 43 | 44 | # Create iCalendar content 45 | ics_content = ( 46 | 'BEGIN:VCALENDAR\n' 47 | 'VERSION:2.0\n' 48 | 'BEGIN:VEVENT\n' 49 | f'SUMMARY:{subject}\n' 50 | f'DTSTART;VALUE=DATE-TIME:{start_time_formatted}\n' 51 | f'DTEND;VALUE=DATE-TIME:{end_time_formatted}\n' 52 | f'LOCATION:{location}\n' 53 | f'DESCRIPTION:{body}\n' 54 | 'END:VEVENT\n' 55 | 'END:VCALENDAR' 56 | ) 57 | 58 | # Define file path for .ics file 59 | ics_file_path = os.path.join(os.getenv('TEMP', '.'), f"{subject.replace(' ', '_')}.ics") 60 | 61 | # Write the iCalendar file 62 | with open(ics_file_path, 'w') as file: 63 | file.write(ics_content) 64 | 65 | # Open the file with the default application 66 | os.startfile(ics_file_path) 67 | 68 | return f"Appointment created: {subject} from {start_time} to {end_time} at {location}. " \ 69 | f"The .ics file has been opened with the default calendar application." 70 | 71 | def web_browser(query): 72 | """Perform a web search using Selenium and return the relevant information/context from Google.""" 73 | return web_research_tool.web_research(query) 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub stars](https://img.shields.io/github/stars/Drlordbasil/groq-gmail-assistant?style=social&label=Star) 2 | UPDATES COMING JAN 2025 OR SOONER! 3 | # Groq Gmail Assistant: AI-Powered Email Management 4 | 5 | Manage your emails efficiently and intelligently with Groq Gmail Assistant, an AI-driven tool designed to automate and enhance your email interactions using the latest advancements in AI technology. 6 | 7 | ## Key Features 8 | 9 | - **Automated Email Responses:** Automatically reads and generates responses to emails based on context and sentiment. 10 | - **Advanced Sentiment Analysis:** Utilizes natural language processing to understand the tone and intent behind emails. 11 | - **Conversation Memory:** Remembers past interactions to provide context to ongoing conversations. 12 | - **Customizable AI Models:** Tailors AI behavior to suit individual or organizational needs. 13 | - **Secure and Private:** Ensures all interactions are encrypted and private, using secure Google App Passwords for authentication. 14 | 15 | ## Getting Started 16 | 17 | Follow these steps to set up the Groq Gmail Assistant: 18 | 19 | ### Prerequisites 20 | 21 | - Python 3.11 or higher 22 | - Gmail account with IMAP access enabled 23 | - Google App Password for secure authentication 24 | 25 | ### Installation 26 | 27 | 1. **Clone the Repository** 28 | ```bash 29 | git clone https://github.com/Drlordbasil/groq-gmail-assistant.git 30 | cd groq-gmail-assistant 31 | ``` 32 | 33 | 2. **Install Dependencies** 34 | ```bash 35 | pip install -r requirements.txt 36 | ``` 37 | 38 | 3. **Configure Your Settings** 39 | - Rename `example.config.py` to `config.py`. 40 | - Update `config.py` with your Gmail settings and Google App Password. 41 | 42 | ### Running the Application 43 | 44 | To start the Groq Gmail Assistant, run: 45 | ```python 46 | python main.py 47 | ``` 48 | 49 | ## How It Works 50 | 51 | The Groq Gmail Assistant integrates several components to handle emails: 52 | - **Email Fetching:** Connects to your Gmail account to fetch new emails. 53 | - **Email Parsing and Analysis:** Parses emails and analyzes their content for sentiment and relevance. 54 | - **Response Generation:** Uses AI models to generate appropriate responses based on the analysis. 55 | - **Email Response Handling:** Sends the generated responses back through your Gmail account. 56 | 57 | ## Contribute 58 | 59 | Contributions are welcome! Please fork the repository and submit pull requests with your proposed changes. 60 | 61 | ## License 62 | 63 | Distributed under the MIT License. See `LICENSE` for more information. 64 | 65 | ## Contact 66 | 67 | For support or business inquiries, email drlordbasil@gmail.com. 68 | 69 | --- 70 | 71 | *Enhance your email management experience with Groq Gmail Assistant, leveraging the power of AI to handle your communications efficiently.* 72 | 73 | ## Planned Features 74 | 75 | - **Expansion to Other Large Language Models (LLMs):** While currently utilizing the Groq API, future versions will support a variety of LLMs, enhancing the versatility and adaptability of the assistant across different platforms and use cases. 76 | 77 | - **All-Around AI Autobot Capabilities:** Beyond email management, the assistant will evolve into a comprehensive AI autobot capable of handling a wide range of tasks. This includes but is not limited to scheduling, task management, content creation, and more, providing a fully integrated AI assistant experience. 78 | 79 | Stay tuned for these exciting updates as we continue to enhance the functionality and scope of the Groq Gmail Assistant! 80 | -------------------------------------------------------------------------------- /tool_stuff/groqtools.py: -------------------------------------------------------------------------------- 1 | from groq import Groq 2 | import os 3 | import json 4 | 5 | from selenium.webdriver.support import expected_conditions as EC 6 | 7 | from tool_stuff.tools_for_chaos import write_note, read_notes, create_calendar_appointment, web_browser, get_current_time_formatted 8 | 9 | client = Groq(api_key=os.getenv('GROQ_API_KEY')) 10 | MODEL = 'llama3-70b-8192' 11 | 12 | def run_conversation(user_prompt): 13 | # Step 1: send the conversation and available functions to the model 14 | messages = [ 15 | { 16 | "role": "system", 17 | "content": f" \n\n\nYou are a function calling LLM(multimodel with optional tools) AI assistant named Chaos. You will adapt based on your current situations. " 18 | }, 19 | { 20 | "role": "user", 21 | "content": user_prompt, 22 | } 23 | ] 24 | tools = [ 25 | { 26 | "type": "function", 27 | "function": { 28 | "name": "create_calendar_appointment", 29 | "description": "create a calendar appointment in the local Windows 11 calendar for User. This must ONLY be used if requested.", 30 | "parameters": { 31 | "type": "object", 32 | "properties": { 33 | "subject": { 34 | "type": "string", 35 | "description": "title of the appointment/subject section of the calendar event with names of attendants.", 36 | }, 37 | "start_time": { 38 | "type": "string", 39 | "description": "start time of the appointment in the format 'YYYY-MM-DD HH:MM:SS'", 40 | }, 41 | "end_time": { 42 | "type": "string", 43 | "description": "end time of the appointment in the format 'YYYY-MM-DD HH:MM:SS'", 44 | }, 45 | "location": { 46 | "type": "string", 47 | "description": "location of the appointment", 48 | }, 49 | "body": { 50 | "type": "string", 51 | "description": "description of the appointment with the names of both parties included in the body", 52 | }, 53 | }, 54 | "required": ["subject", "start_time", "end_time", "location", "body"], 55 | }, 56 | }, 57 | }, 58 | { 59 | "type": "function", 60 | "function": { 61 | "name": "web_browser", 62 | "description": "Perform a web search using Selenium and return the relevant information/context from google,bing, and brave. This will return updated info on anything you search in real time!", 63 | "parameters": { 64 | "type": "object", 65 | "properties": { 66 | "query": { 67 | "type": "string", 68 | "description": "The search query to perform to help yourself be more honest by providing updated info.", 69 | }, 70 | }, 71 | "required": ["query"], 72 | }, 73 | }, 74 | }, 75 | { 76 | "type": "function", 77 | "function": { 78 | "name": "write_note", 79 | "description": "Write a note to the notes.txt file. Whenenever important data is passed, you need to note it.", 80 | "parameters": { 81 | "type": "object", 82 | "properties": { 83 | "content": { 84 | "type": "string", 85 | "description": "The content of the note to write.", 86 | }, 87 | }, 88 | "required": ["content"], 89 | }, 90 | }, 91 | }, 92 | { 93 | "type": "function", 94 | "function": { 95 | "name": "read_notes", 96 | "description": "Read notes from the notes.txt file.", 97 | "parameters": { 98 | "type": "object", 99 | "properties": {}, 100 | "required": [], 101 | }, 102 | }, 103 | } 104 | ] 105 | 106 | response = client.chat.completions.create( 107 | model=MODEL, 108 | messages=messages, 109 | tools=tools, 110 | tool_choice="auto", 111 | max_tokens=4096 112 | ) 113 | 114 | response_message = response.choices[0].message 115 | print(response_message.content) 116 | 117 | tool_calls = response_message.tool_calls 118 | print(tool_calls) 119 | # Step 2: check if the model wanted to call a function 120 | if tool_calls: 121 | # Step 3: call the function 122 | available_functions = { 123 | "create_calendar_appointment": create_calendar_appointment, 124 | "web_browser": web_browser, 125 | "write_note": write_note, 126 | "read_notes": read_notes, 127 | } 128 | messages.append(response_message) # extend conversation with assistant's reply 129 | # Step 4: send the info for each function call and function response to the model 130 | for tool_call in tool_calls: 131 | function_name = tool_call.function.name 132 | function_to_call = available_functions[function_name] 133 | function_args = json.loads(tool_call.function.arguments) 134 | if function_name == "create_calendar_appointment": 135 | function_response = function_to_call( 136 | subject=function_args.get("subject"), 137 | start_time=function_args.get("start_time"), 138 | end_time=function_args.get("end_time"), 139 | location=function_args.get("location"), 140 | body=function_args.get("body") 141 | ) 142 | elif function_name == "web_browser": 143 | function_response = function_to_call( 144 | query=function_args.get("query") 145 | ) 146 | elif function_name == "write_note": 147 | function_response = function_to_call( 148 | content=function_args.get("content") 149 | ) 150 | elif function_name == "read_notes": 151 | function_response = function_to_call() 152 | messages.append( 153 | { 154 | "tool_call_id": tool_call.id, 155 | "role": "tool", 156 | "name": function_name, 157 | "content": function_response, 158 | } 159 | ) # extend conversation with function response 160 | second_response = client.chat.completions.create( 161 | model=MODEL, 162 | messages=messages 163 | ) # get a new response from the model where it can see the function response 164 | print(second_response.choices[0].message.content) 165 | return second_response.choices[0].message.content 166 | -------------------------------------------------------------------------------- /tool_stuff/web_browser.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import numpy as np 4 | from bs4 import BeautifulSoup 5 | from selenium import webdriver 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.common.keys import Keys 8 | from selenium.webdriver.support.ui import WebDriverWait 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.webdriver.chrome.service import Service as ChromeService 11 | from webdriver_manager.chrome import ChromeDriverManager 12 | import trafilatura 13 | from selenium.common.exceptions import WebDriverException, NoSuchElementException, TimeoutException 14 | import requests 15 | import json 16 | from sklearn.feature_extraction.text import TfidfVectorizer 17 | from sklearn.metrics.pairwise import cosine_similarity 18 | import networkx as nx 19 | 20 | max_content_length = 1000 21 | 22 | class SelectorRL: 23 | def __init__(self): 24 | self.selectors = { 25 | "google": { 26 | "search_box": ["input[name='q']", "textarea[name='q']", "#search-input"], 27 | "result": ["div.g", "div.tF2Cxc", "div.yuRUbf"] 28 | }, 29 | "bing": { 30 | "search_box": ["input[name='q']", "#sb_form_q"], 31 | "result": ["li.b_algo", "div.b_title", "h2"] 32 | }, 33 | "brave": { 34 | "search_box": ["input[name='q']", "#searchbox"], 35 | "result": ["div.snippet", "div.fdb", "div.result"] 36 | } 37 | } 38 | self.q_values = {engine: {selector: 0 for selector_type in selectors.values() for selector in selector_type} for engine, selectors in self.selectors.items()} 39 | self.learning_rate = 0.1 40 | self.discount_factor = 0.9 41 | self.epsilon = 0.1 42 | 43 | def get_selector(self, engine, selector_type): 44 | if random.random() < self.epsilon: 45 | return random.choice(self.selectors[engine][selector_type]) 46 | else: 47 | return max(self.selectors[engine][selector_type], key=lambda s: self.q_values[engine][s]) 48 | 49 | def update_q_value(self, engine, selector, reward): 50 | self.q_values[engine][selector] += self.learning_rate * (reward - self.q_values[engine][selector]) 51 | 52 | def add_new_selector(self, engine, selector_type, new_selector): 53 | if new_selector not in self.selectors[engine][selector_type]: 54 | self.selectors[engine][selector_type].append(new_selector) 55 | self.q_values[engine][new_selector] = 0 56 | 57 | def save_state(self, filename='selector_rl_state.json'): 58 | state = { 59 | 'q_values': self.q_values, 60 | 'selectors': self.selectors 61 | } 62 | with open(filename, 'w') as f: 63 | json.dump(state, f) 64 | 65 | def load_state(self, filename='selector_rl_state.json'): 66 | try: 67 | with open(filename, 'r') as f: 68 | state = json.load(f) 69 | self.q_values = state['q_values'] 70 | self.selectors = state['selectors'] 71 | except FileNotFoundError: 72 | print("No saved state found. Starting with default values.") 73 | 74 | class WebResearchTool: 75 | def __init__(self): 76 | self.max_content_length = 2000 77 | self.selector_rl = SelectorRL() 78 | self.selector_rl.load_state() 79 | self.vectorizer = TfidfVectorizer() 80 | 81 | def _initialize_webdriver(self): 82 | options = webdriver.ChromeOptions() 83 | service = ChromeService(ChromeDriverManager().install()) 84 | return webdriver.Chrome(service=service, options=options) 85 | 86 | def find_new_selector(self, driver, element_type): 87 | if element_type == "search_box": 88 | potential_selectors = driver.find_elements(By.XPATH, "//input[@type='text'] | //input[@type='search'] | //textarea") 89 | else: # result 90 | potential_selectors = driver.find_elements(By.XPATH, "//div[.//a] | //li[.//a] | //h2[.//a]") 91 | 92 | for element in potential_selectors: 93 | try: 94 | selector = self.get_css_selector(driver, element) 95 | return selector 96 | except: 97 | continue 98 | return None 99 | 100 | def get_css_selector(self, driver, element): 101 | return driver.execute_script(""" 102 | var path = []; 103 | var element = arguments[0]; 104 | while (element.nodeType === Node.ELEMENT_NODE) { 105 | var selector = element.nodeName.toLowerCase(); 106 | if (element.id) { 107 | selector += '#' + element.id; 108 | path.unshift(selector); 109 | break; 110 | } else { 111 | var sibling = element; 112 | var nth = 1; 113 | while (sibling.previousElementSibling) { 114 | sibling = sibling.previousElementSibling; 115 | if (sibling.nodeName.toLowerCase() == selector) 116 | nth++; 117 | } 118 | if (nth != 1) 119 | selector += ":nth-of-type("+nth+")"; 120 | } 121 | path.unshift(selector); 122 | element = element.parentNode; 123 | } 124 | return path.join(' > '); 125 | """, element) 126 | 127 | def extract_text_from_url(self, url): 128 | try: 129 | response = requests.get(url, timeout=10) 130 | response.raise_for_status() 131 | text = trafilatura.extract(response.text, include_comments=False, include_tables=False) 132 | if text and len(text) >= 50: 133 | return text 134 | 135 | driver = self._initialize_webdriver() 136 | driver.get(url) 137 | WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body"))) 138 | soup = BeautifulSoup(driver.page_source, 'html.parser') 139 | for element in soup(['script', 'style', 'nav', 'footer', 'aside']): 140 | element.decompose() 141 | text = ' '.join(p.get_text().strip() for p in soup.find_all('p') if len(p.get_text().strip()) > 20) 142 | return text if len(text) >= 50 else None 143 | except Exception as e: 144 | print(f"Error extracting text from URL {url}: {e}") 145 | return None 146 | finally: 147 | if 'driver' in locals(): 148 | driver.quit() 149 | 150 | def crawl_website(self, url, max_pages=5, progress_callback: callable = None): 151 | visited = set() 152 | to_visit = [url] 153 | graph = nx.DiGraph() 154 | content = {} 155 | 156 | while to_visit and len(visited) < max_pages: 157 | current_url = to_visit.pop(0) 158 | if current_url in visited: 159 | continue 160 | 161 | visited.add(current_url) 162 | if progress_callback: 163 | progress_callback(f"Crawling: {current_url}") 164 | 165 | try: 166 | response = requests.get(current_url, timeout=10) 167 | response.raise_for_status() 168 | soup = BeautifulSoup(response.text, 'html.parser') 169 | 170 | # Extract content 171 | text = self.extract_text_from_url(current_url) 172 | if text: 173 | content[current_url] = text 174 | 175 | # Find links 176 | for link in soup.find_all('a', href=True): 177 | href = link['href'] 178 | full_url = requests.compat.urljoin(current_url, href) 179 | if full_url.startswith(url): # Stay on the same domain 180 | graph.add_edge(current_url, full_url) 181 | if full_url not in visited: 182 | to_visit.append(full_url) 183 | 184 | except Exception as e: 185 | if progress_callback: 186 | progress_callback(f"Error crawling {current_url}: {e}") 187 | 188 | return graph, content 189 | 190 | def calculate_similarity(self, query, text): 191 | tfidf_matrix = self.vectorizer.fit_transform([query, text]) 192 | return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] 193 | 194 | def web_research(self, query, progress_callback: callable = None): 195 | if progress_callback: 196 | progress_callback("Starting web research...") 197 | 198 | combined_query = query 199 | search_engines = [ 200 | ("https://www.google.com/search", "google"), 201 | ("https://www.bing.com/search", "bing"), 202 | ("https://search.brave.com/search", "brave") 203 | ] 204 | search_results = [] 205 | 206 | for engine_url, engine_name in search_engines: 207 | if progress_callback: 208 | progress_callback(f"Using search engine: {engine_name}") 209 | 210 | try: 211 | driver = self._initialize_webdriver() 212 | driver.get(engine_url) 213 | 214 | search_box_selector = self.selector_rl.get_selector(engine_name, "search_box") 215 | result_selector = self.selector_rl.get_selector(engine_name, "result") 216 | 217 | try: 218 | search_box = WebDriverWait(driver, 10).until( 219 | EC.presence_of_element_located((By.CSS_SELECTOR, search_box_selector)) 220 | ) 221 | search_box.send_keys(combined_query) 222 | search_box.send_keys(Keys.RETURN) 223 | 224 | WebDriverWait(driver, 10).until( 225 | EC.presence_of_element_located((By.CSS_SELECTOR, result_selector)) 226 | ) 227 | 228 | soup = BeautifulSoup(driver.page_source, 'html.parser') 229 | results = soup.select(result_selector)[:5] # Top 5 results 230 | 231 | if results: 232 | self.selector_rl.update_q_value(engine_name, search_box_selector, 1) 233 | self.selector_rl.update_q_value(engine_name, result_selector, 1) 234 | else: 235 | if progress_callback: 236 | progress_callback(f"No results found with current selectors for {engine_name}. Attempting to find new selectors.") 237 | new_search_box_selector = self.find_new_selector(driver, "search_box") 238 | new_result_selector = self.find_new_selector(driver, "result") 239 | 240 | if new_search_box_selector and new_result_selector: 241 | self.selector_rl.add_new_selector(engine_name, "search_box", new_search_box_selector) 242 | self.selector_rl.add_new_selector(engine_name, "result", new_result_selector) 243 | if progress_callback: 244 | progress_callback(f"New selectors found for {engine_name}: Search box: {new_search_box_selector}, Result: {new_result_selector}") 245 | 246 | # Retry with new selectors 247 | driver.get(engine_url) 248 | search_box = WebDriverWait(driver, 10).until( 249 | EC.presence_of_element_located((By.CSS_SELECTOR, new_search_box_selector)) 250 | ) 251 | search_box.send_keys(combined_query) 252 | search_box.send_keys(Keys.RETURN) 253 | 254 | WebDriverWait(driver, 10).until( 255 | EC.presence_of_element_located((By.CSS_SELECTOR, new_result_selector)) 256 | ) 257 | 258 | soup = BeautifulSoup(driver.page_source, 'html.parser') 259 | results = soup.select(new_result_selector)[:5] 260 | 261 | for result in results: 262 | link = result.select_one('a') 263 | if link and link.get('href'): 264 | url = link['href'] 265 | if url.startswith('http'): 266 | graph, crawled_content = self.crawl_website(url, progress_callback=progress_callback) 267 | for page_url, content in crawled_content.items(): 268 | similarity = self.calculate_similarity(combined_query, content) 269 | if similarity > 0.1: # Adjust threshold as needed 270 | search_results.append({ 271 | "title": link.get_text(), 272 | "link": page_url, 273 | "content": content, 274 | "similarity": similarity 275 | }) 276 | except (NoSuchElementException, TimeoutException) as e: 277 | if progress_callback: 278 | progress_callback(f"Error interacting with search engine {engine_url}: {e}") 279 | self.selector_rl.update_q_value(engine_name, search_box_selector, -1) 280 | self.selector_rl.update_q_value(engine_name, result_selector, -1) 281 | except WebDriverException as e: 282 | if progress_callback: 283 | progress_callback(f"WebDriver error with search engine {engine_url}: {e}") 284 | finally: 285 | driver.quit() 286 | 287 | self.selector_rl.save_state() 288 | 289 | if not search_results: 290 | return f"No results found for the query: {combined_query}" 291 | 292 | # Sort results by similarity 293 | search_results.sort(key=lambda x: x['similarity'], reverse=True) 294 | 295 | aggregated_content = "" 296 | for result in search_results: 297 | if len(aggregated_content) + len(result['content']) <= self.max_content_length: 298 | aggregated_content += f"[Source: {result['link']}]\n{result['content']}\n\n" 299 | else: 300 | remaining_chars = self.max_content_length - len(aggregated_content) 301 | aggregated_content += f"[Source: {result['link']}]\n{result['content'][:remaining_chars]}" 302 | break 303 | 304 | if progress_callback: 305 | progress_callback("Web research completed.") 306 | 307 | return aggregated_content.strip() if aggregated_content else f"Unable to retrieve relevant content for the query: {combined_query}" 308 | 309 | # Example usage 310 | if __name__ == "__main__": 311 | research_tool = WebResearchTool() 312 | user_prompt = "What are the latest advancements in AI?" 313 | assistant_query = "Focus on breakthroughs in natural language processing and computer vision" 314 | 315 | def progress_update(message): 316 | print(f"Progress: {message}") 317 | 318 | results = research_tool.web_research(query=f"{user_prompt} {assistant_query}", progress_callback=progress_update) 319 | print(f"Research results:\n\n{results}") 320 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import imaplib 3 | import smtplib 4 | from email.mime.text import MIMEText 5 | from email import policy 6 | from email.parser import BytesParser 7 | import json 8 | import os 9 | import time 10 | import logging 11 | from bs4 import BeautifulSoup 12 | import re 13 | import nltk 14 | from nltk.corpus import stopwords 15 | from nltk.tokenize import word_tokenize, sent_tokenize 16 | from nltk.sentiment import SentimentIntensityAnalyzer 17 | import numpy as np 18 | import email 19 | 20 | from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate 21 | from langchain_groq import ChatGroq 22 | import ollama 23 | from config import USER, APP_PASSWORD, IMAP_URL, SMTP_URL, SMTP_PORT, SYSTEM_PROMPT 24 | from tool_stuff.groqtools import run_conversation 25 | from utilities.getnow import get_current_time_formatted 26 | from nltk.stem import WordNetLemmatizer 27 | 28 | # Download necessary NLTK data 29 | nltk.download('wordnet') 30 | nltk.download('punkt') 31 | nltk.download('stopwords') 32 | nltk.download('vader_lexicon') 33 | 34 | # Get current time formatted 35 | now = get_current_time_formatted() 36 | 37 | # Set Groq API key from environment variable 38 | GROQ_API_KEY = os.getenv("GROQ_API_KEY") 39 | if GROQ_API_KEY is None: 40 | raise ValueError("GROQ_API_KEY environment variable is not set.") 41 | 42 | # Set up logging 43 | logging.basicConfig(filename='gmail_assistant.log', level=logging.INFO, 44 | format='%(asctime)s - %(levelname)s - %(message)s') 45 | 46 | 47 | class EmbeddingModel: 48 | """ 49 | EmbeddingModel handles loading conversation documents and retrieving their embeddings. 50 | """ 51 | def __init__(self, json_file_path: str = 'conversation_memory.json', model_name: str = 'snowflake-arctic-embed'): 52 | self.json_file_path = json_file_path 53 | self.model_name = model_name 54 | 55 | def _load_documents(self) -> list: 56 | """Load documents from a JSON file.""" 57 | try: 58 | with open(self.json_file_path, 'r') as file: 59 | return json.load(file) 60 | except FileNotFoundError: 61 | return [] 62 | 63 | def get_embeddings(self) -> list: 64 | """Retrieve embeddings for the loaded documents.""" 65 | documents = self._load_documents() 66 | return [ollama.embeddings(model=self.model_name, prompt=doc['content'])['embedding'] for doc in documents] 67 | 68 | 69 | class ConversationMemory: 70 | """ 71 | ConversationMemory manages saving and loading conversation history. 72 | """ 73 | def __init__(self, memory_file="conversation_memory.json"): 74 | self.memory_file = memory_file 75 | self.history = self._load_memory() 76 | 77 | def save_context(self, role: str, content: str): 78 | """Save context to memory.""" 79 | self.history.append({"role": role, "content": content}) 80 | self._save_memory() 81 | 82 | def get_history(self) -> list: 83 | """Get the conversation history.""" 84 | return self.history 85 | 86 | def _load_memory(self) -> list: 87 | """Load conversation memory from a file.""" 88 | try: 89 | with open(self.memory_file, "r") as file: 90 | return json.load(file) 91 | except FileNotFoundError: 92 | return [] 93 | 94 | def _save_memory(self): 95 | """Save conversation memory to a file.""" 96 | with open(self.memory_file, "w") as file: 97 | json.dump(self.history, file, indent=2) 98 | 99 | 100 | class ChatGroqFactory: 101 | """ 102 | ChatGroqFactory creates ChatGroq instances with specified parameters. 103 | """ 104 | @staticmethod 105 | def create_chat_groq(temperature=0.7, model_name="llama3-70b-8192") -> ChatGroq: 106 | """Create a ChatGroq instance.""" 107 | return ChatGroq(groq_api_key=GROQ_API_KEY, temperature=temperature, model_name=model_name) 108 | 109 | 110 | def preprocess_text(text: str) -> str: 111 | """ 112 | Preprocess text by tokenizing, removing stop words, and lemmatizing. 113 | 114 | Args: 115 | text (str): Input text to preprocess. 116 | 117 | Returns: 118 | str: Preprocessed text. 119 | """ 120 | lemmatizer = WordNetLemmatizer() 121 | tokens = word_tokenize(text) 122 | stop_words = set(stopwords.words('english')) 123 | lemmatized_tokens = [lemmatizer.lemmatize(token.lower()) for token in tokens if token.lower() not in stop_words] 124 | return ' '.join(lemmatized_tokens) 125 | 126 | 127 | def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: 128 | """ 129 | Calculate the cosine similarity between two vectors. 130 | 131 | Args: 132 | a (np.ndarray): First vector. 133 | b (np.ndarray): Second vector. 134 | 135 | Returns: 136 | float: Cosine similarity between vectors a and b. 137 | """ 138 | return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) 139 | 140 | 141 | class EmailHandler: 142 | """ 143 | EmailHandler processes emails, retrieves relevant messages, and interacts with Groq. 144 | """ 145 | def __init__(self, embedding_model: EmbeddingModel): 146 | self.embedding_model = embedding_model 147 | self.sentiment_analyzer = SentimentIntensityAnalyzer() 148 | self.imap_server = None 149 | self.response_to_client = "" 150 | self.email_from_client = "" 151 | self.drafted_email = "" 152 | 153 | def retrieve_relevant_messages(self, user_prompt: str, memory: ConversationMemory) -> list: 154 | """ 155 | Retrieve relevant messages based on user prompt and memory. 156 | 157 | Args: 158 | user_prompt (str): User's prompt. 159 | memory (ConversationMemory): Conversation memory. 160 | 161 | Returns: 162 | list: List of relevant messages. 163 | """ 164 | embeddings = self.embedding_model.get_embeddings() 165 | user_prompt_embedding = ollama.embeddings(model='snowflake-arctic-embed', 166 | prompt=preprocess_text(user_prompt))['embedding'] 167 | 168 | return [ 169 | msg for i, msg in enumerate(memory.get_history()) 170 | if cosine_similarity(np.array(embeddings[i]), np.array(user_prompt_embedding)) > 0.8 171 | ] 172 | 173 | def chat_with_groq(self, system_prompt: str, user_prompt: str, chat_instance: ChatGroq = None, memory: ConversationMemory = None) -> str: 174 | """ 175 | Interact with Groq based on system and user prompts. 176 | 177 | Args: 178 | system_prompt (str): System prompt. 179 | user_prompt (str): User prompt. 180 | chat_instance (ChatGroq, optional): ChatGroq instance. Defaults to None. 181 | memory (ConversationMemory, optional): Conversation memory. Defaults to None. 182 | 183 | Returns: 184 | str: Response from Groq. 185 | """ 186 | chat_instance = chat_instance or ChatGroqFactory.create_chat_groq() 187 | memory = memory or ConversationMemory() 188 | memory.save_context("user", user_prompt) 189 | 190 | relevant_messages = self.retrieve_relevant_messages(user_prompt, memory) 191 | history = relevant_messages + [{"role": "user", "content": user_prompt}] 192 | messages = [SystemMessagePromptTemplate.from_template(system_prompt)] + [ 193 | HumanMessagePromptTemplate.from_template(msg["content"]) if msg["role"] == "user" else 194 | SystemMessagePromptTemplate.from_template(msg["content"]) for msg in history 195 | ] 196 | 197 | prompt = ChatPromptTemplate.from_messages(messages) 198 | response = chat_instance.invoke(prompt.format_prompt()) 199 | memory.save_context("assistant", response.content) 200 | 201 | return response 202 | 203 | def get_email_body(self, msg) -> str: 204 | """ 205 | Extract the email body from an email message. 206 | 207 | Args: 208 | msg (email.message.Message): Email message. 209 | 210 | Returns: 211 | str: Email body. 212 | """ 213 | if msg.is_multipart(): 214 | for part in msg.walk(): 215 | content_type = part.get_content_type() 216 | if content_type == 'text/plain' and 'attachment' not in str(part.get('Content-Disposition')): 217 | return part.get_payload(decode=True).decode('utf-8', errors='ignore') 218 | elif content_type == 'text/html' and 'attachment' not in str(part.get('Content-Disposition')): 219 | return self._parse_html_content(part.get_payload(decode=True).decode('utf-8', errors='ignore')) 220 | else: 221 | content_type = msg.get_content_type() 222 | if content_type == 'text/plain': 223 | return msg.get_payload(decode=True).decode('utf-8', errors='ignore') 224 | elif content_type == 'text/html': 225 | return self._parse_html_content(msg.get_payload(decode=True).decode('utf-8', errors='ignore')) 226 | 227 | return "" 228 | 229 | def _parse_html_content(self, html_content: str) -> str: 230 | """ 231 | Parse and clean HTML content to extract text. 232 | 233 | Args: 234 | html_content (str): HTML content. 235 | 236 | Returns: 237 | str: Cleaned text from HTML. 238 | """ 239 | soup = BeautifulSoup(html_content, 'html.parser') 240 | for element in soup(["script", "style"]): 241 | element.decompose() 242 | for link in soup.find_all('a'): 243 | link.decompose() 244 | text = soup.get_text() 245 | return re.sub(r'\s+', ' ', text).strip() 246 | 247 | def extract_sentences(self, text: str) -> list: 248 | """ 249 | Extract sentences from text. 250 | 251 | Args: 252 | text (str): Input text. 253 | 254 | Returns: 255 | list: List of sentences. 256 | """ 257 | return sent_tokenize(text) 258 | 259 | def clean_email_body(self, email_body: str) -> str: 260 | """ 261 | Clean the email body by removing unnecessary headers. 262 | 263 | Args: 264 | email_body (str): Raw email body. 265 | 266 | Returns: 267 | str: Cleaned email body. 268 | """ 269 | lines = email_body.split("\n") 270 | cleaned_lines = [] 271 | 272 | for line in lines: 273 | if line.startswith("From:") or line.startswith("Sent:") or line.startswith("To:") or line.startswith( 274 | "Subject:"): 275 | break 276 | cleaned_lines.append(line) 277 | 278 | return "\n".join(cleaned_lines).strip() 279 | 280 | def get_client_name(self, email_from: str) -> str: 281 | """ 282 | Extract client name from email address. 283 | 284 | Args: 285 | email_from (str): Email address. 286 | 287 | Returns: 288 | str: Client name. 289 | """ 290 | if '<' in email_from: 291 | client_name = email_from.split('<')[0].strip() 292 | else: 293 | client_name = email_from 294 | 295 | return client_name 296 | 297 | def process_email(self, mail_id: bytes, parser: BytesParser): 298 | """ 299 | Process an email based on its mail ID. 300 | 301 | Args: 302 | mail_id (bytes): Mail ID. 303 | parser (BytesParser): BytesParser instance to parse the email. 304 | """ 305 | _, data = self.imap_server.fetch(mail_id, '(RFC822)') 306 | for response_part in data: 307 | if isinstance(response_part, tuple): 308 | msg = parser.parsebytes(response_part[1]) 309 | email_from = msg['from'] 310 | email_subject = msg['subject'] or "No Subject" 311 | email_body = self.get_email_body(msg) 312 | 313 | print(f"Received email from: {email_from}") 314 | print(f"Email subject: {email_subject}") 315 | print(f"Email body: {email_body}") 316 | 317 | skip_keywords = [ 318 | "no-reply", "do-not-reply", "noreply", "donotreply", "no_reply", "do_not_reply", 319 | "newsletter", "news letter", "notifications", "notification", "account", "accounts" 320 | ] 321 | if any(keyword in email_from.lower() or keyword in email_subject.lower() for keyword in skip_keywords): 322 | logging.info(f"Skipping email from {email_from} with subject '{email_subject}'.") 323 | print(f"Skipping email from {email_from} with subject '{email_subject}'.") 324 | return 325 | 326 | logging.info(f"Processing new email from {email_from} with subject '{email_subject}'.") 327 | print(f"Processing new email from {email_from} with subject '{email_subject}'.") 328 | 329 | cleaned_email_body = self.clean_email_body(email_body) 330 | print(f"Cleaned email body: {cleaned_email_body}") 331 | 332 | sentences = self.extract_sentences(cleaned_email_body) 333 | 334 | client_name = self.get_client_name(email_from) 335 | 336 | for sentence in sentences: 337 | user_prompt = f"{now}\n\nAs Chaos, analyze this email sentence and determine if it requires a response. You will always respond to emails asking for help from you. You have RAG using embedding with NLP to find relevant info that's also attached.\n\nSender: {email_from}\n\nClient Name: {client_name}\n\nEmail Content:\n{sentence}\n\nDoes this email require a response? Respond with only YES or NO." 338 | 339 | try: 340 | response = self.chat_with_groq(SYSTEM_PROMPT, user_prompt) 341 | print(f"Analysis result: {response.content}") 342 | 343 | if "YES" in response.content.strip().upper(): 344 | user_prompt = f"{now}\n\nAs Chaos, respond to this email sentence. Use any tools necessary to formulate your response. Provide only the response text, without any additional information.\n\nSender: {email_from}\n\nClient Name: {client_name}\n\nEmail Content:\n{sentence}" 345 | print(f"Using tools to formulate a response...") 346 | tool_response = run_conversation(user_prompt) 347 | print(f"Tool response: {tool_response}") 348 | 349 | # Combine tool_response in the final response API request 350 | memories = self.retrieve_relevant_messages(sentence, ConversationMemory()) 351 | response = self.chat_with_groq(SYSTEM_PROMPT, user_prompt + "\n" + tool_response + "\n" + memories[-1]["content"]) 352 | print(f"Final response: {response.content}") 353 | 354 | self.drafted_email = response.content.strip() 355 | self.response_to_client = response.content.strip() 356 | self.email_from_client = email_from 357 | else: 358 | print("Email does not require a response.") 359 | pass 360 | 361 | except Exception as e: 362 | logging.error(f"An error occurred while processing the email sentence: {str(e)}") 363 | print(f"An error occurred while processing the email sentence: {str(e)}") 364 | continue 365 | 366 | def send_response_email(self, email_to: str, email_subject: str, response_content: str): 367 | """ 368 | Send a response email. 369 | 370 | Args: 371 | email_to (str): Recipient email address. 372 | email_subject (str): Email subject. 373 | response_content (str): Email body content. 374 | """ 375 | try: 376 | smtp_server = smtplib.SMTP(SMTP_URL, SMTP_PORT) 377 | smtp_server.starttls() 378 | smtp_server.login(USER, APP_PASSWORD) 379 | 380 | message = MIMEText(response_content) 381 | message['From'] = USER 382 | message['To'] = email_to 383 | message['Subject'] = email_subject 384 | 385 | smtp_server.sendmail(USER, [email_to], message.as_string()) 386 | logging.info(f"Email sent to: {email_to}") 387 | print(f"Email sent to: {email_to}") 388 | except Exception as e: 389 | logging.error(f"An error occurred while sending the email: {str(e)}") 390 | print(f"An error occurred while sending the email: {str(e)}") 391 | finally: 392 | smtp_server.quit() 393 | 394 | def handle_emails(self): 395 | """ 396 | Handle incoming emails. 397 | """ 398 | parser = BytesParser(policy=policy.default) 399 | 400 | while True: 401 | try: 402 | self.imap_server = imaplib.IMAP4_SSL(IMAP_URL) 403 | self.imap_server.login(USER, APP_PASSWORD) 404 | 405 | # Process incoming emails 406 | self.imap_server.select('inbox') 407 | _, data = self.imap_server.search(None, 'UNSEEN') 408 | mail_ids = data[0].split() 409 | 410 | if mail_ids: 411 | for mail_id in mail_ids: 412 | self.process_email(mail_id, parser) 413 | if self.response_to_client and self.email_from_client: 414 | self.send_response_email(self.email_from_client, "Re: Your Inquiry", self.response_to_client) 415 | else: 416 | print("No new emails found.") 417 | 418 | except imaplib.IMAP4.error as e: 419 | logging.error(f"IMAP error occurred: {str(e)}") 420 | print(f"IMAP error occurred: {str(e)}") 421 | except Exception as e: 422 | logging.error(f"An error occurred while handling emails: {str(e)}") 423 | print(f"An error occurred while handling emails: {str(e)}") 424 | finally: 425 | try: 426 | if self.imap_server.state == 'SELECTED': 427 | self.imap_server.close() 428 | self.imap_server.logout() 429 | except imaplib.IMAP4.error as e: 430 | logging.error(f"IMAP error during logout: {str(e)}") 431 | print(f"IMAP error during logout: {str(e)}") 432 | except Exception as e: 433 | logging.error(f"An error occurred during IMAP logout: {str(e)}") 434 | print(f"An error occurred during IMAP logout: {str(e)}") 435 | 436 | print("Waiting for 30 seconds before checking for new emails...") 437 | time.sleep(30) # Wait for 30 seconds before checking for new emails again 438 | 439 | 440 | if __name__ == "__main__": 441 | embedding_model = EmbeddingModel() 442 | email_handler = EmailHandler(embedding_model) 443 | email_handler.handle_emails() 444 | --------------------------------------------------------------------------------