├── src
    ├── tools
    │   ├── contacts
    │   │   ├── __init__.py
    │   │   ├── add_contact_tool.py
    │   │   └── fetch_contact_tool.py
    │   ├── search
    │   │   ├── __init__.py
    │   │   ├── search_web_tool.py
    │   │   └── knowledge_base_tool.py
    │   ├── base_tool.py
    │   ├── emails
    │   │   └── emailing_tool.py
    │   └── calendar
    │   │   └── calendar_tool.py
    ├── utils.py
    ├── speech_processing
    │   ├── conversation_manager.py
    │   ├── text_to_speech.py
    │   └── speech_to_text.py
    ├── prompts
    │   └── prompts.py
    └── agents
    │   └── agent.py
├── requirements.txt
├── scripts
    ├── create_index.py
    └── fetch_index.py
├── main.py
└── README.md


/src/tools/contacts/__init__.py:
--------------------------------------------------------------------------------
1 | from .add_contact_tool import AddContactTool
2 | from .fetch_contact_tool import FetchContactTool
3 | 
4 | __all__ = ['AddContactTool', 'FetchContactTool']
5 | 


--------------------------------------------------------------------------------
/src/tools/search/__init__.py:
--------------------------------------------------------------------------------
1 | from .search_web_tool import SearchWebTool
2 | from .knowledge_base_tool import KnowledgeSearchTool
3 | 
4 | __all__ = ['SearchWebTool', 'KnowledgeSearchTool']
5 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | SCOPES = [
2 |     "https://www.googleapis.com/auth/calendar.events",
3 |     'https://www.googleapis.com/auth/contacts',
4 |     "https://www.googleapis.com/auth/contacts.readonly"
5 | ]


--------------------------------------------------------------------------------
/src/tools/base_tool.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from instructor import OpenAISchema
 3 | 
 4 | 
 5 | # Define the BaseTool abstract class
 6 | class BaseTool(ABC, OpenAISchema):
 7 |     @abstractmethod
 8 |     def run(self):
 9 |         pass
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | litellm
 2 | deepgram-sdk
 3 | playsound
 4 | langchain-groq 
 5 | langchain_google_genai
 6 | langchain_chroma
 7 | chromadb
 8 | Pillow
 9 | pyperclip
10 | pyaudio
11 | python-dotenv
12 | instructor 
13 | pydantic
14 | colorama
15 | tavily-python
16 | google-auth
17 | google-auth-oauthlib
18 | google-auth-httplib2
19 | google-api-python-client


--------------------------------------------------------------------------------
/scripts/create_index.py:
--------------------------------------------------------------------------------
 1 | from langchain_community.document_loaders import DirectoryLoader
 2 | from langchain_text_splitters import RecursiveCharacterTextSplitter
 3 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
 4 | from langchain_chroma import Chroma
 5 | from dotenv import load_dotenv
 6 | 
 7 | # Load environment variables from a .env file
 8 | load_dotenv()
 9 | 
10 | print("Loading Docs...")
11 | loader = DirectoryLoader("./files")
12 | docs = loader.load()
13 | 
14 | print("Splitting Docs...")
15 | doc_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=200)
16 | doc_chunks = doc_splitter.split_documents(docs)
17 | 
18 | print("Loading embedding model...")
19 | embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
20 | 
21 | print("Creating vector store...")
22 | vectorstore = Chroma.from_documents(doc_chunks, embeddings, persist_directory="db")


--------------------------------------------------------------------------------
/src/tools/search/search_web_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pydantic import Field
 3 | from ..base_tool import BaseTool
 4 | from tavily import TavilyClient
 5 | 
 6 | class SearchWebTool(BaseTool):
 7 |     """
 8 |     A tool that searches the internet and get up to date information for a given query
 9 |     """
10 |     query: str = Field(description='Search query string')
11 | 
12 |     def search_web(self, query: str):
13 |         """
14 |         @notice Searches the internet for the given query.
15 |         @param query The search query.
16 |         @return content The combined content from the search results.
17 |         """
18 |         # Initialize the Tavily client for searching internet
19 |         tavily = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
20 | 
21 |         content = ""
22 |         response = tavily.search(query=query, max_results=4)
23 |         for r in response['results']:
24 |             content += r['content']
25 |         return content
26 |     
27 |     def run(self):
28 |         return self.search_web(self.query)
29 | 
30 | 


--------------------------------------------------------------------------------
/src/speech_processing/conversation_manager.py:
--------------------------------------------------------------------------------
 1 | from .speech_to_text import get_transcript
 2 | from .text_to_speech import TTS
 3 | 
 4 | 
 5 | class ConversationManager:
 6 |     def __init__(self, assistant):
 7 |         self.transcription_response = ""
 8 |         self.assistant = assistant
 9 | 
10 |     async def main(self):
11 |         def handle_full_sentence(full_sentence):
12 |             self.transcription_response = full_sentence
13 | 
14 |         # Loop indefinitely until "goodbye" is said
15 |         while True:
16 |             await get_transcript(handle_full_sentence)
17 |             
18 |             # Check for "goodbye" to exit the loop
19 |             if "goodbye" in self.transcription_response.lower():
20 |                 break
21 |             
22 |             llm_response = self.assistant.invoke(self.transcription_response)
23 |             print(f"AI: {llm_response}")
24 | 
25 |             tts = TTS()
26 |             tts.speak(llm_response)
27 | 
28 |             # Reset transcription_response for the next loop iteration
29 |             self.transcription_response = ""


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | from src.agents.agent import Agent
 3 | from src.tools.calendar.calendar_tool import CalendarTool
 4 | from src.tools.contacts import AddContactTool, FetchContactTool
 5 | from src.tools.emails.emailing_tool import EmailingTool
 6 | from src.tools.search import SearchWebTool, KnowledgeSearchTool
 7 | from src.speech_processing.conversation_manager import ConversationManager
 8 | from src.prompts.prompts import assistant_prompt
 9 | from dotenv import load_dotenv
10 | 
11 | load_dotenv()
12 | 
13 | # Choose any model with LiteLLM
14 | model = "groq/llama3-70b-8192"
15 | # model = "groq/llama-3.1-70b-versatile"
16 | # model = "gemini/gemini-1.5-pro"
17 | 
18 | # agent tools
19 | tools_list = [
20 |     CalendarTool,
21 |     AddContactTool,
22 |     FetchContactTool,
23 |     EmailingTool,
24 |     SearchWebTool,
25 |     # KnowledgeSearchTool
26 | ]
27 | 
28 | # Initiate the sale agent
29 | agent = Agent("Assistant Agent", model, tools_list, system_prompt=assistant_prompt)
30 | 
31 | if __name__ == "__main__":
32 |     manager = ConversationManager(agent)
33 |     asyncio.run(manager.main())


--------------------------------------------------------------------------------
/scripts/fetch_index.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
 3 | from langchain_chroma import Chroma
 4 | from langchain_core.prompts import ChatPromptTemplate
 5 | from langchain_groq import ChatGroq
 6 | from langchain_core.runnables import RunnablePassthrough
 7 | from langchain_core.output_parsers import StrOutputParser
 8 | from src.prompts.prompts import RAG_SEARCH_PROMPT_TEMPLATE
 9 | from dotenv import load_dotenv
10 | 
11 | # Load environment variables from a .env file
12 | load_dotenv()
13 | 
14 | embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
15 | 
16 | vectorstore = Chroma(persist_directory="db", embedding_function=embeddings)
17 | 
18 | # Semantic vector search
19 | vectorstore_retreiver = vectorstore.as_retriever(search_kwargs={"k": 3})
20 | 
21 | prompt = ChatPromptTemplate.from_template(RAG_SEARCH_PROMPT_TEMPLATE)
22 | 
23 | llm = ChatGroq(model="llama3-70b-8192", api_key=os.getenv("GROQ_API_KEY"))
24 | 
25 | # build retrieval chain using LCEL
26 | # this will take the user query and generate the answer
27 | rag_chain = (
28 |     {"context": vectorstore_retreiver, "question": RunnablePassthrough()}
29 |     | prompt
30 |     | llm
31 |     | StrOutputParser()
32 | )
33 | 


--------------------------------------------------------------------------------
/src/speech_processing/text_to_speech.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from dotenv import load_dotenv
 3 | from deepgram import DeepgramClient, SpeakOptions
 4 | from playsound import playsound
 5 | 
 6 | load_dotenv()
 7 | 
 8 | class TTS:
 9 |     def __init__(self):
10 |         self.filename = "output.wav"
11 |     
12 |     def speak(self, text):
13 |         try:
14 |             # STEP 1: Create a Deepgram client using the API key from environment variables
15 |             deepgram = DeepgramClient(api_key=os.getenv("DEEPGRAM_API_KEY"))
16 | 
17 |             # STEP 2: Configure the options (such as model choice, audio configuration, etc.)
18 |             options = SpeakOptions(
19 |                 model="aura-asteria-en",
20 |                 encoding="linear16",
21 |                 container="wav"
22 |             )
23 | 
24 |             # STEP 3: Call the save method on the speak property
25 |             SPEAK_OPTIONS = {"text": text}
26 |             response = deepgram.speak.v("1").save(self.filename, SPEAK_OPTIONS, options)
27 | 
28 |             # STEP 4: Play the audio file
29 |             playsound(self.filename)
30 | 
31 |         except Exception as e:
32 |             print(f"Exception: {e}")
33 | 
34 | if __name__ == "__main__":
35 |     tts = TTS()
36 |     tts.speak("Hello, how can I help you today?")
37 | 


--------------------------------------------------------------------------------
/src/tools/search/knowledge_base_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pydantic import Field
 3 | from langchain_google_genai import GoogleGenerativeAIEmbeddings
 4 | from langchain_chroma import Chroma
 5 | from langchain_core.prompts import ChatPromptTemplate
 6 | from langchain_groq import ChatGroq
 7 | from langchain_core.runnables import RunnablePassthrough
 8 | from langchain_core.output_parsers import StrOutputParser
 9 | from src.prompts.prompts import RAG_SEARCH_PROMPT_TEMPLATE
10 | from ..base_tool import BaseTool
11 | 
12 | class KnowledgeSearchTool(BaseTool):
13 |     """
14 |     A tool that searches a knowledge base and answers user queries based on the stored information.
15 |     """
16 | 
17 |     query: str = Field(description="User's query to search in the knowledge base")
18 | 
19 |     def __init__(self):
20 |         super().__init__()
21 |         self.retriever = self.load_retriever()
22 | 
23 |     def load_retriever(self):
24 |         embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
25 |         vectorstore = Chroma(persist_directory="db", embedding_function=embeddings)
26 |         vectorstore_retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
27 |         prompt = ChatPromptTemplate.from_template(RAG_SEARCH_PROMPT_TEMPLATE)
28 | 
29 |         llm = ChatGroq(model="mixtral-8x7b-32768", api_key=os.getenv("GROQ_API_KEY"))
30 |         app = (
31 |             {"context": vectorstore_retriever, "question": RunnablePassthrough()}
32 |             | prompt
33 |             | llm
34 |             | StrOutputParser()
35 |         )
36 |         return app
37 | 
38 |     def search_knowledge_base(self, query: str) -> str:
39 |         response = self.retriever.invoke(query)
40 |         return str(response)
41 | 
42 |     def run(self):
43 |         return self.search_knowledge_base(self.query)


--------------------------------------------------------------------------------
/src/tools/contacts/add_contact_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from google.oauth2.credentials import Credentials
 3 | from googleapiclient.discovery import build
 4 | from googleapiclient.errors import HttpError
 5 | from google.auth.transport.requests import Request
 6 | from google_auth_oauthlib.flow import InstalledAppFlow
 7 | from pydantic import Field
 8 | from ..base_tool import BaseTool
 9 | from src.utils import SCOPES
10 | 
11 | class AddContactTool(BaseTool):
12 |     """
13 |     A tool for adding a new contact to Google Contacts
14 |     """
15 |     name: str = Field(description='Full name of the contact')
16 |     phone: str = Field(description='Phone number of the contact')
17 |     email: str = Field(default=None, description='Email address of the contact (optional)')
18 | 
19 |     def get_credentials(self):
20 |         """
21 |         Get and refresh Google Contacts API credentials
22 |         """
23 |         creds = None
24 |         if os.path.exists('token.json'):
25 |             creds = Credentials.from_authorized_user_file('token.json', SCOPES)
26 |         if not creds or not creds.valid:
27 |             if creds and creds.expired and creds.refresh_token:
28 |                 creds.refresh(Request())
29 |             else:
30 |                 flow = InstalledAppFlow.from_client_secrets_file(
31 |                     'credentials.json', SCOPES)
32 |                 creds = flow.run_local_server(port=0)
33 |             with open('token.json', 'w') as token:
34 |                 token.write(creds.to_json())
35 |         return creds
36 | 
37 |     def add_contact(self):
38 |         """
39 |         Adds a new contact to Google Contacts
40 |         """
41 |         try:
42 |             creds = self.get_credentials()
43 |             service = build('people', 'v1', credentials=creds)
44 | 
45 |             contact_body = {
46 |                 "names": [{"givenName": self.name}],
47 |                 "phoneNumbers": [{"value": self.phone}]
48 |             }
49 | 
50 |             if self.email:
51 |                 contact_body["emailAddresses"] = [{"value": self.email}]
52 | 
53 |             contact = service.people().createContact(body=contact_body).execute()
54 | 
55 |             return f"Contact added successfully. Contact ID: {contact.get('resourceName')}"
56 | 
57 |         except HttpError as error:
58 |             return f"An error occurred: {error}"
59 | 
60 |     def run(self):
61 |         return self.add_contact()


--------------------------------------------------------------------------------
/src/tools/emails/emailing_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import smtplib
 3 | from email.mime.text import MIMEText
 4 | from email.mime.multipart import MIMEMultipart
 5 | from pydantic import Field
 6 | from ..base_tool import BaseTool
 7 | from src.tools.contacts import FetchContactTool
 8 | 
 9 | class EmailingTool(BaseTool):
10 |     """
11 |     A tool for sending emails using Gmail
12 |     """
13 |     recipient_name: str = Field(description='Name of the email recipient')
14 |     subject: str = Field(description='Subject of the email')
15 |     body: str = Field(description='Body content of the email')
16 | 
17 |     def fetch_recipient_email(self):
18 |         """
19 |         Fetches the email address of the recipient using FetchContactTool
20 |         """
21 |         try:
22 |             fetch_contact_tool = FetchContactTool(contact_name=self.recipient_name)
23 |             result = fetch_contact_tool.run()
24 |             contact_info = eval(result)
25 |             email = contact_info[0].get('emails', [None])[0]
26 |             if not email:
27 |                 raise ValueError(f"No email found for contact: {self.recipient_name}")
28 |             return email
29 |         except Exception as e:
30 |             raise ValueError(f"Failed to fetch email for {self.recipient_name}: {e}")
31 | 
32 |     def send_email_with_gmail(self, recipient_email):
33 |         """
34 |         Sends an email using Gmail SMTP
35 |         """
36 |         try:
37 |             sender_email = os.getenv("GMAIL_MAIL")
38 |             app_password = os.getenv("GMAIL_APP_PASSWORD")
39 | 
40 |             msg = MIMEMultipart()
41 |             msg['From'] = sender_email
42 |             msg['To'] = recipient_email
43 |             msg['Subject'] = self.subject
44 |             msg.attach(MIMEText(self.body, 'plain'))
45 | 
46 |             server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
47 |             server.login(sender_email, app_password)
48 |             text = msg.as_string()
49 |             server.sendmail(sender_email, recipient_email, text)
50 |             server.quit()
51 |             return "Email sent successfully."
52 |         except Exception as e:
53 |             return f"Email was not sent successfully, error: {e}"
54 | 
55 |     def run(self):
56 |         try:
57 |             recipient_email = self.fetch_recipient_email()
58 |             return self.send_email_with_gmail(recipient_email)
59 |         except Exception as e:
60 |             return f"Failed to send email: {e}"
61 | 


--------------------------------------------------------------------------------
/src/tools/calendar/calendar_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import datetime
 3 | from google.auth.transport.requests import Request
 4 | from google.oauth2.credentials import Credentials
 5 | from google_auth_oauthlib.flow import InstalledAppFlow
 6 | from googleapiclient.discovery import build
 7 | from googleapiclient.errors import HttpError
 8 | from pydantic import Field
 9 | from ..base_tool import BaseTool
10 | from src.utils import SCOPES
11 | 
12 | class CalendarTool(BaseTool):
13 |     """
14 |     A tool for booking events on Google Calendar
15 |     """
16 |     event_name: str = Field(description='Name of the event to be created')
17 |     event_datetime: str = Field(
18 |         description='Date and time of the event. This must be converted into a Python datetime.datetime object before use.'
19 |     )
20 |     event_description: str = Field(default="", description='Optional description of the event')
21 | 
22 |     def get_credentials(self):
23 |         """
24 |         Get and refresh Google Calendar API credentials
25 |         """
26 |         creds = None
27 |         if os.path.exists("token.json"):
28 |             creds = Credentials.from_authorized_user_file("token.json", SCOPES)
29 |         if not creds or not creds.valid:
30 |             if creds and creds.expired and creds.refresh_token:
31 |                 creds.refresh(Request())
32 |             else:
33 |                 flow = InstalledAppFlow.from_client_secrets_file(
34 |                     "credentials.json", SCOPES
35 |                 )
36 |                 creds = flow.run_local_server(port=0)
37 |             with open("token.json", "w") as token:
38 |                 token.write(creds.to_json())
39 |         return creds
40 | 
41 |     def create_event(self):
42 |         """
43 |         Creates an event on Google Calendar
44 |         """
45 |         try:
46 |             creds = self.get_credentials()
47 |             service = build("calendar", "v3", credentials=creds)
48 |             
49 |             # Convert the string to a datetime object
50 |             event_datetime = datetime.datetime.fromisoformat(self.event_datetime)
51 | 
52 |             event = {
53 |                 'summary': self.event_name,
54 |                 'description': self.event_description,
55 |                 'start': {
56 |                     'dateTime': event_datetime.isoformat(),
57 |                     'timeZone': 'UTC',
58 |                 },
59 |                 'end': {
60 |                     'dateTime': (event_datetime + datetime.timedelta(hours=1)).isoformat(),
61 |                     'timeZone': 'UTC',
62 |                 },
63 |             }
64 | 
65 |             event = service.events().insert(calendarId='primary', body=event).execute()
66 |             return f"Event created successfully. Event ID: {event.get('id')}"
67 | 
68 |         except HttpError as error:
69 |             return f"An error occurred: {error}"
70 | 
71 |     def run(self):
72 |         return self.create_event()


--------------------------------------------------------------------------------
/src/prompts/prompts.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | date_time = datetime.now()
 4 | 
 5 | assistant_prompt = f"""
 6 | # Role
 7 | You are an AI assistant responsible for helping users with their queries and tasks, you role is
 8 | to engage in a conversation with a human and utilize a set of specialized tools to provide comprehensive assistance.
 9 | 
10 | # Tasks
11 | - Determine whether tools are necessary to fulfill user requests
12 | - Use appropriate tools when needed to complete tasks
13 | - Provide helpful and accurate information or assistance in normal conversation when tools are not required
14 | 
15 | # SOP
16 | 1. Carefully analyze the user's request
17 | 2. Determine if any tools are needed to fulfill the request
18 | 3. If no tools are needed, engage in normal conversation to assist the user
19 | 4. If tools are needed, select and use the appropriate tool(s)
20 | 5. Provide a clear and concise response based on the information gathered or task completed
21 | 
22 | # Tools
23 | 1. CalendarTool: Used for booking events on Google Calendar. Provide event name, date/time, and optional description.
24 |     - The date/time given by user must alawys be converted into a Python datetime.datetime format, keeping in mind the current date/time.
25 | 2. AddContactTool: Used for adding new contacts to Google Contacts. Provide name, phone number, and optional email address.
26 | 3. FetchContactTool: Used for retrieving contact information from Google Contacts. Provide the contact's name (first or last) to search.
27 | 4. EmailingTool: Used for sending emails via Gmail. Provide recipient name, subject, and body content.
28 | 5. SearchWebTool: Used for performing web searches to gather up-to-date information. Provide a search query string.
29 | 
30 | # Examples
31 | - To book a calendar event: Use CalendarTool with event name "Team Meeting" and event_datetime "2024-08-15T14:00:00"
32 | - To add a contact: Use AddContactTool with name "John Doe" and phone "123-456-7890"
33 | - To fetch contact info: Use FetchContactTool with contact_name "John"
34 | - To send an email: Use EmailingTool with recipient_email "John", subject "Meeting Reminder", and body "Hi John, Don't forget our meeting tomorrow at 2 PM."
35 | - To search the web: Use SearchWebTool with query "latest news on AI advancements"
36 | 
37 | # Important/Notes
38 | - The current datetime is: {date_time}
39 | - Use as many tools as necessary to fully address the user's request
40 | - If you don't know the answer or if a tool doesn't work, respond with "I don't know"
41 | - Always provide helpful and accurate information, whether using tools or engaging in normal conversation
42 | - Ensure responses are clear, concise, and directly address the user's query or task
43 | """
44 | 
45 | RAG_SEARCH_PROMPT_TEMPLATE = """
46 | Using the following pieces of retrieved context, answer the question comprehensively and concisely.
47 | Ensure your response fully addresses the question based on the given context.
48 | 
49 | **IMPORTANT:**
50 | Just provide the answer and never mention or refer to having access to the external context or information in your answer.
51 | If you are unable to determine the answer from the provided context, state 'I don't know.'
52 | 
53 | Question: {question}
54 | Context: {context}
55 | """


--------------------------------------------------------------------------------
/src/tools/contacts/fetch_contact_tool.py:
--------------------------------------------------------------------------------
 1 | import os, re
 2 | from google.oauth2.credentials import Credentials
 3 | from googleapiclient.discovery import build
 4 | from googleapiclient.errors import HttpError
 5 | from google.auth.transport.requests import Request
 6 | from google_auth_oauthlib.flow import InstalledAppFlow
 7 | from pydantic import Field
 8 | from ..base_tool import BaseTool
 9 | from src.utils import SCOPES
10 | 
11 | class FetchContactTool(BaseTool):
12 |     """
13 |     A tool for fetching contact information from Google Contacts
14 |     """
15 |     contact_name: str = Field(description='Name (first or last) of the contact to search for')
16 | 
17 |     def get_credentials(self):
18 |         """
19 |         Get and refresh Google Contacts API credentials
20 |         """
21 |         creds = None
22 |         if os.path.exists('token.json'):
23 |             creds = Credentials.from_authorized_user_file('token.json', SCOPES)
24 |         if not creds or not creds.valid:
25 |             if creds and creds.expired and creds.refresh_token:
26 |                 creds.refresh(Request())
27 |             else:
28 |                 flow = InstalledAppFlow.from_client_secrets_file(
29 |                     'credentials.json', SCOPES)
30 |                 creds = flow.run_local_server(port=0)
31 |             with open('token.json', 'w') as token:
32 |                 token.write(creds.to_json())
33 |         return creds
34 | 
35 |     def fetch_contact(self):
36 |         """
37 |         Fetches contact information from Google Contacts
38 |         """
39 |         try:
40 |             creds = self.get_credentials()
41 |             service = build('people', 'v1', credentials=creds)
42 | 
43 |             # Search for the contact
44 |             results = service.people().searchContacts(
45 |                 query=self.contact_name,
46 |                 readMask='names,phoneNumbers,emailAddresses'
47 |             ).execute()
48 | 
49 |             connections = results.get('results', [])
50 | 
51 |             if not connections:
52 |                 return f"No contact found with the name: {self.contact_name}"
53 | 
54 |             matching_contacts = []
55 | 
56 |             for connection in connections:
57 |                 contact = connection['person']
58 |                 names = contact.get('names', [])
59 |                 if names:
60 |                     unstructured_name = names[0].get('unstructuredName', '').lower()
61 |                     # Prepare regex to identify first and last names
62 |                     first_name_pattern = r'^(\w+)'  # Match first word
63 |                     last_name_pattern = r'(\w+)$'   # Match last word
64 |                     first_match = re.search(first_name_pattern, unstructured_name)
65 |                     last_match = re.search(last_name_pattern, unstructured_name)
66 |                     
67 |                     if (first_match and self.contact_name.lower() == first_match.group(1)) or \
68 |                        (last_match and self.contact_name.lower() == last_match.group(1)):
69 |                         full_name = names[0].get('displayName', 'N/A')
70 |                         phone_numbers = [phone.get('value', 'N/A') for phone in contact.get('phoneNumbers', [])]
71 |                         emails = [email.get('value', 'N/A') for email in contact.get('emailAddresses', [])]
72 | 
73 |                         matching_contacts.append({
74 |                             'name': full_name,
75 |                             'phone_numbers': phone_numbers,
76 |                             'emails': emails
77 |                         })
78 | 
79 |             if not matching_contacts:
80 |                 return f"No contact found with the matching criteria: {self.contact_name}"
81 | 
82 |             return str(matching_contacts)
83 | 
84 |         except HttpError as error:
85 |             return f"An error occurred: {error}"
86 | 
87 |     def run(self):
88 |         return self.fetch_contact()
89 | 


--------------------------------------------------------------------------------
/src/speech_processing/speech_to_text.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from dotenv import load_dotenv
  3 | from deepgram import (
  4 |     DeepgramClient,
  5 |     DeepgramClientOptions,
  6 |     LiveTranscriptionEvents,
  7 |     LiveOptions,
  8 |     Microphone
  9 | )
 10 | 
 11 | # Load environment variables from a .env file
 12 | load_dotenv()
 13 | 
 14 | class TranscriptCollector:
 15 |     def __init__(self):
 16 |         self.reset()
 17 | 
 18 |     def reset(self):
 19 |         # Initialize or reset the transcript parts list
 20 |         self.transcript_parts = []
 21 | 
 22 |     def add_part(self, part):
 23 |         # Add a part of the transcript to the list
 24 |         self.transcript_parts.append(part)
 25 | 
 26 |     def get_full_transcript(self):
 27 |         # Join all parts of the transcript into a single string
 28 |         return ' '.join(self.transcript_parts)
 29 | 
 30 | # Create an instance of TranscriptCollector to manage transcript parts
 31 | transcript_collector = TranscriptCollector()
 32 | 
 33 | async def get_transcript(callback):
 34 |     # Event to signal transcription completion
 35 |     transcription_complete = asyncio.Event()
 36 | 
 37 |     try:
 38 |         # Example of setting up a Deepgram client config
 39 |         config = DeepgramClientOptions(options={"keepalive": "true"})
 40 |         deepgram: DeepgramClient = DeepgramClient("", config)
 41 | 
 42 |         # Initialize a connection to Deepgram's asynchronous websocket API
 43 |         dg_connection = deepgram.listen.asyncwebsocket.v("1")
 44 |         print("Listening...")
 45 | 
 46 |         async def on_message(self, result, **kwargs):
 47 |             # Extract the transcript from the result
 48 |             sentence = result.channel.alternatives[0].transcript
 49 |             
 50 |             if not result.speech_final:
 51 |                 # Add interim results to the transcript collector
 52 |                 transcript_collector.add_part(sentence)
 53 |             else:
 54 |                 # Add the final part of the current sentence to the transcript collector
 55 |                 transcript_collector.add_part(sentence)
 56 |                 # Get the full sentence from the transcript collector
 57 |                 full_sentence = transcript_collector.get_full_transcript()
 58 |                 # Check if the full sentence is not empty before printing
 59 |                 if len(full_sentence.strip()) > 0:
 60 |                     full_sentence = full_sentence.strip()
 61 |                     print(f"Human: {full_sentence}")
 62 |                     # Call the callback with the full sentence
 63 |                     callback(full_sentence)
 64 |                     # Reset the transcript collector for the next sentence
 65 |                     transcript_collector.reset()
 66 |                     # Signal to stop transcription and exit
 67 |                     transcription_complete.set()
 68 | 
 69 |         # Set up the event listener for transcription events
 70 |         dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
 71 | 
 72 |         # Define the options for live transcription
 73 |         options = LiveOptions(
 74 |             model="nova-2",
 75 |             punctuate=True,
 76 |             language="en-US",
 77 |             encoding="linear16",
 78 |             channels=1,
 79 |             sample_rate=16000,
 80 |             endpointing=300,
 81 |             smart_format=True,
 82 |         )
 83 | 
 84 |         # Start the connection with the specified options
 85 |         await dg_connection.start(options)
 86 | 
 87 |         # Open a microphone stream on the default input device
 88 |         microphone = Microphone(dg_connection.send)
 89 |         microphone.start()
 90 | 
 91 |         # Wait for the transcription to complete
 92 |         await transcription_complete.wait()
 93 | 
 94 |         # Wait for the microphone to close
 95 |         microphone.finish()
 96 | 
 97 |         # Indicate that we've finished
 98 |         await dg_connection.finish()
 99 | 
100 |     except Exception as e:
101 |         print(f"Could not open socket: {e}")
102 |         return
103 | 
104 | # Global variable to store the transcription response
105 | transcription_response = ""
106 | 
107 | def handle_full_sentence(full_sentence):
108 |     global transcription_response
109 |     transcription_response = full_sentence
110 | 
111 | if __name__ == "__main__":
112 |     # Run the get_transcript function and pass handle_full_sentence as the callback
113 |     asyncio.run(get_transcript(handle_full_sentence))


--------------------------------------------------------------------------------
/src/agents/agent.py:
--------------------------------------------------------------------------------
  1 | from colorama import Fore, init
  2 | from litellm import completion
  3 | 
  4 | # Initialize colorama for colored terminal output
  5 | init(autoreset=True)
  6 | 
  7 | 
  8 | class Agent:
  9 |     def __init__(self, name, model, tools=None, system_prompt=""):
 10 |         self.name = name
 11 |         self.model = model
 12 |         self.messages = []
 13 |         self.tools = tools if tools is not None else []
 14 |         self.tools_schemas = self.get_openai_tools_schema() if self.tools else None
 15 |         self.system_prompt = system_prompt
 16 |         if self.system_prompt and not self.messages:
 17 |             self.handle_messages_history("system", self.system_prompt)
 18 | 
 19 |     def invoke(self, message):
 20 |         print(Fore.GREEN + f"\nCalling Agent: {self.name}")
 21 |         self.handle_messages_history("user", message)
 22 |         result = self.execute()
 23 |         return result
 24 | 
 25 |     def execute(self):
 26 |         response_message = self.call_llm()
 27 |         response_content = response_message.content
 28 |         tool_calls = response_message.tool_calls
 29 |         if tool_calls:
 30 |             try:
 31 |                 response_content = self.run_tools(tool_calls)
 32 |             except Exception as e:
 33 |                 print(Fore.RED + f"\nError: {e}\n")
 34 |         return response_content
 35 | 
 36 |     def run_tools(self, tool_calls):
 37 |         for tool_call in tool_calls:
 38 |             self.execute_tool(tool_call)
 39 |         response_content = self.execute()
 40 |         return response_content
 41 | 
 42 |     def execute_tool(self, tool_call):
 43 |         function_name = tool_call.function.name
 44 |         func = next(
 45 |             iter([func for func in self.tools if func.__name__ == function_name])
 46 |         )
 47 | 
 48 |         if not func:
 49 |             return f"Error: Function {function_name} not found. Available functions: {[func.__name__ for func in self.tools]}"
 50 | 
 51 |         try:
 52 |             print(Fore.GREEN + f"\nCalling Tool: {function_name}")
 53 |             print(Fore.GREEN + f"Arguments: {tool_call.function.arguments}\n")
 54 |             func = func(**eval(tool_call.function.arguments))
 55 |             output = func.run()
 56 | 
 57 |             tool_message = {"name": function_name, "tool_call_id": tool_call.id}
 58 |             self.handle_messages_history("tool", output, tool_output=tool_message)
 59 | 
 60 |             return output
 61 |         except Exception as e:
 62 |             print("Error: ", str(e))
 63 |             return "Error: " + str(e)
 64 | 
 65 |     def call_llm(self):
 66 |         response = completion(
 67 |             model=self.model,
 68 |             messages=self.messages,
 69 |             tools=self.tools_schemas,
 70 |             temperature=0.1,
 71 |         )
 72 |         message = response.choices[0].message
 73 |         if message.tool_calls is None:
 74 |             message.tool_calls = []
 75 |         if message.function_call is None:
 76 |             message.function_call = {}
 77 |         self.handle_messages_history(
 78 |             "assistant", message.content, tool_calls=message.tool_calls
 79 |         )
 80 |         return message
 81 | 
 82 |     def get_openai_tools_schema(self):
 83 |         return [
 84 |             {"type": "function", "function": tool.openai_schema} for tool in self.tools
 85 |         ]
 86 | 
 87 |     def reset(self):
 88 |         self.memory.clear_messages()
 89 |         self.messages = []
 90 |         if self.system_prompt:
 91 |             self.handle_messages_history("system", self.system_prompt)
 92 | 
 93 |     def handle_messages_history(self, role, content, tool_calls=None, tool_output=None):
 94 |         message = {"role": role, "content": content}
 95 |         if tool_calls:
 96 |             message["tool_calls"] = self.parse_tool_calls(tool_calls)
 97 |         if tool_output:
 98 |             message["name"] = tool_output["name"]
 99 |             message["tool_call_id"] = tool_output["tool_call_id"]
100 |         # save short-term memory
101 |         self.messages.append(message)
102 | 
103 |     def parse_tool_calls(self, calls):
104 |         parsed_calls = []
105 |         for call in calls:
106 |             parsed_call = {
107 |                 "function": {
108 |                     "name": call.function.name,
109 |                     "arguments": call.function.arguments,
110 |                 },
111 |                 "id": call.id,
112 |                 "type": call.type,
113 |             }
114 |             parsed_calls.append(parsed_call)
115 |         return parsed_calls
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AI Voice Assistant
  2 | 
  3 | [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
  4 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
  5 | 
  6 | An advanced AI-powered voice assistant that combines speech-to-text and text-to-speech capabilities with intelligent tool integration for seamless digital interactions.
  7 | 
  8 | ## Overview
  9 | 
 10 | This AI Voice Assistant provides natural conversational experiences through voice interaction while offering powerful integrations with Google services, web search, and personal knowledge management. The assistant can understand spoken commands, process requests using various tools, and respond with synthesized speech.
 11 | 
 12 | ## Key Features
 13 | 
 14 | ### Core Capabilities
 15 | - **Speech Recognition**: Real-time speech-to-text conversion using Deepgram API
 16 | - **Voice Synthesis**: Natural text-to-speech responses
 17 | - **Conversational AI**: Powered by advanced language models (Llama3, Gemini)
 18 | - **Multi-tool Integration**: Seamless access to productivity and information tools
 19 | 
 20 | ### Integrated Tools
 21 | 
 22 | | Tool | Description | Functionality |
 23 | |------|-------------|---------------|
 24 | | **CalendarTool** | Google Calendar integration | Schedule events with date, time, and descriptions |
 25 | | **AddContactTool** | Contact management | Add new contacts to Google Contacts |
 26 | | **FetchContactTool** | Contact retrieval | Search and retrieve contact information |
 27 | | **EmailingTool** | Gmail integration | Compose and send emails |
 28 | | **SearchWebTool** | Web search | Access real-time web information via Tavily API |
 29 | | **KnowledgeBaseTool** | Personal knowledge base | Query documents from `/files` directory |
 30 | 
 31 | ## Installation
 32 | 
 33 | ### Prerequisites
 34 | 
 35 | - Python 3.9 or higher
 36 | - Google Cloud Platform account with API access
 37 | - Required API keys (see configuration section)
 38 | 
 39 | ### Quick Start
 40 | 
 41 | 1. **Clone the repository**
 42 | ```bash
 43 | git clone https://github.com/danieladdisonorg/AI-Voice-Assitant.git
 44 | ```
 45 | 
 46 | 2. **Navigate to project directory**
 47 | ```bash
 48 | cd AI-Voice-Assitant
 49 | ```
 50 | 
 51 | 3. **Create virtual environment**
 52 | ```bash
 53 | python -m venv venv
 54 | ```
 55 | 
 56 | 4. **Activate virtual environment**
 57 | ```bash
 58 | source venv/bin/activate
 59 | ```
 60 | 
 61 | 5. **Install dependencies**
 62 | ```bash
 63 | pip install -r requirements.txt
 64 | ```
 65 | 
 66 | ## Configuration
 67 | 
 68 | ### Environment Variables
 69 | 
 70 | Create a `.env` file in the project root:
 71 | 
 72 | ```env
 73 | # Google Services
 74 | GOOGLE_API_KEY=your_google_api_key
 75 | GEMINI_API_KEY=your_gemini_api_key
 76 | 
 77 | # Speech Processing
 78 | DEEPGRAM_API_KEY=your_deepgram_api_key
 79 | 
 80 | # Web Search
 81 | TAVILY_API_KEY=your_tavily_api_key
 82 | 
 83 | # Language Model
 84 | GROQ_API_KEY=your_groq_api_key
 85 | ```
 86 | 
 87 | ### Google API Setup
 88 | 
 89 | 1. Visit the [Google Cloud Console](https://console.cloud.google.com/)
 90 | 2. Create a new project or select an existing one
 91 | 3. Enable the following APIs:
 92 |    - Google Calendar API
 93 |    - Google Contacts API
 94 |    - Gmail API
 95 | 4. Create service account credentials
 96 | 5. Download the credentials JSON file
 97 | 6. Update the credentials path in your configuration
 98 | 
 99 | ### API Key Acquisition
100 | 
101 | - **Deepgram**: [Sign up](https://deepgram.com/) for speech-to-text services
102 | - **Tavily**: [Register](https://tavily.com/) for web search API access
103 | - **Groq**: [Get API key](https://groq.com/) for Llama3 model access
104 | - **Google Gemini**: Available through Google AI Studio
105 | 
106 | ## Usage
107 | 
108 | ### Starting the Assistant
109 | 
110 | ```bash
111 | python main.py
112 | ```
113 | 
114 | ### Voice Commands Examples
115 | 
116 | **Calendar Management**
117 | - "Schedule a team meeting for tomorrow at 3 PM"
118 | - "Book a doctor's appointment for Friday at 10 AM"
119 | 
120 | **Contact Management**
121 | - "Add Sarah Johnson to my contacts, phone number 555-0123"
122 | - "What's Mike's email address?"
123 | 
124 | **Email Communication**
125 | - "Send an email to Jennifer about the quarterly report"
126 | - "Compose a message to the development team"
127 | 
128 | **Information Retrieval**
129 | - "Search for the latest news on renewable energy"
130 | - "Find my notes about the marketing strategy"
131 | 
132 | **Session Management**
133 | - Say "goodbye" to end the conversation
134 | 
135 | ## Project Structure
136 | 
137 | ```
138 | AI-Voice-Assitant/
139 | ├── main.py                 # Application entry point
140 | ├── requirements.txt        # Python dependencies
141 | ├── .env                   # Environment variables (create this)
142 | ├── files/                 # Personal knowledge base documents
143 | ├── tools/                 # Tool implementations
144 | └── README.md              # Project documentation
145 | ```
146 | 
147 | ## Contributing
148 | 
149 | We welcome contributions! Please follow these steps:
150 | 
151 | 1. Fork the repository
152 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
153 | 3. Commit your changes (`git commit -m 'Add amazing feature'`)
154 | 4. Push to the branch (`git push origin feature/amazing-feature`)
155 | 5. Open a Pull Request
156 | 
157 | ### Development Guidelines
158 | 
159 | - Follow PEP 8 style guidelines
160 | - Add unit tests for new features
161 | - Update documentation for API changes
162 | - Ensure all tests pass before submitting
163 | 
164 | ## Troubleshooting
165 | 
166 | ### Common Issues
167 | 
168 | **Authentication Errors**
169 | - Verify all API keys are correctly set in `.env`
170 | - Check Google Cloud credentials and permissions
171 | 
172 | **Speech Recognition Issues**
173 | - Ensure microphone permissions are granted
174 | - Verify Deepgram API key is valid
175 | 
176 | **Tool Integration Problems**
177 | - Confirm Google APIs are enabled in Cloud Console
178 | - Check service account permissions
179 | 
180 | ## License
181 | 
182 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
183 | 
184 | ## Support
185 | 
186 | For questions, issues, or suggestions:
187 | 
188 | - **Email**: aymenMir1001@gmail.com
189 | - **Issues**: [GitHub Issues](https://github.com/danieladdisonorg/AI-Voice-Assitant/issues)
190 | - **Discussions**: [GitHub Discussions](https://github.com/danieladdisonorg/AI-Voice-Assitant/discussions)
191 | 
192 | ## Acknowledgments
193 | 
194 | - [Deepgram](https://deepgram.com/) for speech processing capabilities
195 | - [Google Cloud](https://cloud.google.com/) for productivity service integrations
196 | - [Tavily](https://tavily.com/) for web search functionality
197 | - [Groq](https://groq.com/) for language model inference
198 | 
199 | ---
200 | 
201 | **Built with ❤️ for seamless AI-human interaction**
202 | 


--------------------------------------------------------------------------------