├── .gitignore
├── ADA
    ├── __pycache__
    │   ├── ADA_Local.cpython-312.pyc
    │   ├── ADA_Online.cpython-312.pyc
    │   └── ADA_Online_NoElevenlabs.cpython-312.pyc
    ├── WIDGETS
    │   ├── __pycache__
    │   │   ├── timer.cpython-312.pyc
    │   │   ├── camera.cpython-312.pyc
    │   │   ├── project.cpython-312.pyc
    │   │   ├── system.cpython-312.pyc
    │   │   ├── to_do_list.cpython-312.pyc
    │   │   ├── open_camera.cpython-312.pyc
    │   │   ├── system_info.cpython-312.pyc
    │   │   └── create_project.cpython-312.pyc
    │   ├── camera.py
    │   ├── project.py
    │   ├── timer.py
    │   ├── to_do_list.py
    │   └── system.py
    ├── ADA_Local.py
    ├── ADA_Online_NoElevenlabs.py
    └── ADA_Online.py
├── main_local.py
├── test
    ├── basic_tts.py
    ├── tts_latency_test.py
    └── function_call_accuracy_test.py
├── main_online_noelevenlabs.py
├── main_online.py
├── multimodal_live_api.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | /realtimesst.log
2 | /.env


--------------------------------------------------------------------------------
/ADA/__pycache__/ADA_Local.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/__pycache__/ADA_Local.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/__pycache__/ADA_Online.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/__pycache__/ADA_Online.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/timer.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/timer.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/camera.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/camera.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/project.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/project.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/system.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/system.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/to_do_list.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/to_do_list.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/open_camera.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/open_camera.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/system_info.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/system_info.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/__pycache__/create_project.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/WIDGETS/__pycache__/create_project.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/__pycache__/ADA_Online_NoElevenlabs.cpython-312.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nlouis38/ada/HEAD/ADA/__pycache__/ADA_Online_NoElevenlabs.cpython-312.pyc


--------------------------------------------------------------------------------
/ADA/WIDGETS/camera.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | def open():
 4 |     """Opens the default camera using OpenCV and displays the video feed. Press 'q' to exit."""
 5 | 
 6 |     return "Camera is open"
 7 | 
 8 |     global cap  # Access the global cap variable
 9 |     cap = cv2.VideoCapture(0)
10 | 
11 |     if not cap.isOpened():
12 |         print("Error: Could not open camera.")
13 |         return
14 | 
15 |     while True:
16 |         ret, frame = cap.read()
17 |         if not ret:
18 |             print("Error: Could not read frame.")
19 |             break
20 | 
21 |         cv2.imshow('Camera Feed', frame)
22 | 
23 |         if cv2.waitKey(1) & 0xFF == ord('q'):
24 |             break
25 | 


--------------------------------------------------------------------------------
/main_local.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Important: **Use headphones**. This script uses the system default audio
 3 | input and output, which often won't include echo cancellation. So to prevent
 4 | the model from interrupting itself it is important that you use headphones. 
 5 | '''
 6 | 
 7 | from ADA.ADA_Local import ADA
 8 | import asyncio
 9 | 
10 | async def main():
11 |     ada = ADA()
12 |     async with asyncio.TaskGroup() as tg:
13 |         tg.create_task(ada.stt())
14 |         input_message = tg.create_task(ada.input_message())
15 |         tg.create_task(ada.send_prompt())
16 |         tg.create_task(ada.tts())
17 | 
18 |         await input_message
19 | 
20 | if __name__ == "__main__":
21 |     asyncio.run(main())


--------------------------------------------------------------------------------
/test/basic_tts.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     from RealtimeTTS import TextToAudioStream, CoquiEngine
 3 | 
 4 |     def dummy_generator():
 5 |         yield "Hey guys! These here are realtime spoken sentences based on local text synthesis. "
 6 |         yield "With a local, neuronal, cloned voice. So every spoken sentence sounds unique."
 7 | 
 8 |     # for normal use with minimal logging:
 9 |     engine = CoquiEngine()
10 | 
11 |     # test with extended logging:
12 |     # import logging
13 |     # logging.basicConfig(level=logging.INFO)
14 |     # engine = CoquiEngine(level=logging.INFO)
15 | 
16 |     stream = TextToAudioStream(engine)
17 | 
18 |     print("Starting to play stream")
19 |     stream.feed(dummy_generator()).play(log_synthesized_text=True, output_wavfile = "output.wav")
20 | 
21 |     print("Playout finished")
22 | 
23 |     engine.shutdown()


--------------------------------------------------------------------------------
/main_online_noelevenlabs.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Important: **Use headphones**. This script uses the system default audio
 3 | input and output, which often won't include echo cancellation. So to prevent
 4 | the model from interrupting itself it is important that you use headphones. 
 5 | 
 6 | Before running this script, ensure the `GOOGLE_API_KEY` environment
 7 | variable is set to the api-key you obtained from Google AI Studio.
 8 | '''
 9 | 
10 | from ADA.ADA_Online_NoElevenlabs import ADA
11 | import asyncio
12 | 
13 | async def main():
14 |     ada = ADA()
15 |     async with asyncio.TaskGroup() as tg:
16 |         tg.create_task(ada.stt())
17 |         input_message = tg.create_task(ada.input_message())
18 |         tg.create_task(ada.send_prompt())
19 |         tg.create_task(ada.tts())
20 |         
21 |         await input_message
22 | 
23 | if __name__ == "__main__":
24 |     asyncio.run(main())


--------------------------------------------------------------------------------
/main_online.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Important: **Use headphones**. This script uses the system default audio
 3 | input and output, which often won't include echo cancellation. So to prevent
 4 | the model from interrupting itself it is important that you use headphones. 
 5 | 
 6 | Before running this script, ensure the `GOOGLE_API_KEY` environment
 7 | variable is set to the api-key you obtained from Google AI Studio.
 8 | '''
 9 | 
10 | from ADA.ADA_Online import ADA
11 | import asyncio
12 | 
13 | async def main():
14 |     ada = ADA()
15 |     async with asyncio.TaskGroup() as tg:
16 |         tg.create_task(ada.stt())
17 |         input_message = tg.create_task(ada.input_message())
18 |         tg.create_task(ada.send_prompt())
19 |         tg.create_task(ada.tts())
20 |         tg.create_task(ada.play_audio())
21 | 
22 |         await input_message
23 | 
24 | if __name__ == "__main__":
25 |     asyncio.run(main())


--------------------------------------------------------------------------------
/ADA/WIDGETS/project.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def create_folder(folder_name, chat_history_file):
 4 |     """
 5 |     Creates a project folder and a text file to store chat history.
 6 | 
 7 |     Args:
 8 |         folder_name (str): The name of the project folder to create.
 9 |     """
10 | 
11 |     try:
12 |         # Create the project folder
13 |         if not os.path.exists(folder_name):
14 |             os.makedirs(folder_name)
15 |             file_path = os.path.join(folder_name, chat_history_file)
16 |             with open(file_path, 'w') as f:
17 |                 f.write("Chat history will be stored here.\n")
18 |             return(f"Project folder '{folder_name}' created successfully.")
19 |         else:
20 |             return(f"Project folder '{folder_name}' already exists.")
21 | 
22 |         # Create the chat history file inside the project folder
23 |         
24 | 
25 |     except OSError as e:
26 |         return(f"Error creating project folder or file: {e}")
27 | 
28 | if __name__ == "__main__":
29 |     create_folder(folder_name="", chat_history_file="")
30 | 


--------------------------------------------------------------------------------
/ADA/WIDGETS/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import sys
 3 | 
 4 | def set(time_str):
 5 |     """
 6 |     Counts down from a specified time in HH:MM:SS format.
 7 | 
 8 |     Args:
 9 |         time_str (str): The time to count down from in HH:MM:SS format.
10 |     """
11 |     
12 |     try:
13 |         hours, minutes, seconds = map(int, time_str.split(':'))
14 |         total_seconds = hours * 3600 + minutes * 60 + seconds
15 | 
16 |         if not (0 <= hours <= 99 and 0 <= minutes <= 59 and 0 <= seconds <= 59):
17 |             print("Invalid time format. Hours should be between 00 and 99, minutes and seconds between 00 and 59.")
18 |             return
19 | 
20 |         while total_seconds > 0:
21 |             timer = divmod(total_seconds, 60)
22 |             hours = timer[0] // 60
23 |             minutes = timer[0] % 60
24 |             seconds = timer[1]
25 |             sys.stdout.write(f"\r{hours:02d}:{minutes:02d}:{seconds:02d}")
26 |             sys.stdout.flush()
27 |             time.sleep(1)
28 |             total_seconds -= 1
29 | 
30 |         print("\rTime's up!          ")
31 | 
32 |     except ValueError:
33 |         print("Invalid time format. Please use HH:MM:SS.")
34 | 
35 | if __name__ == "__main__":
36 |     set(time_str="00:10:00")
37 | 


--------------------------------------------------------------------------------
/ADA/WIDGETS/to_do_list.py:
--------------------------------------------------------------------------------
 1 | def create_list():
 2 |     """
 3 |     Creates an empty to-do list.
 4 | 
 5 |     Returns:
 6 |         list: An empty to-do list.
 7 |     """
 8 |     return []
 9 | 
10 | def add_task(todo_list, task):
11 |     """
12 |     Adds a task to the to-do list.
13 | 
14 |     Args:
15 |         todo_list (list): The to-do list.
16 |         task (str): The task to add.
17 |     """
18 |     todo_list.append(task)
19 |     print(f"Task '{task}' added to the to-do list.")
20 | 
21 | def delete_task(todo_list, task):
22 |     """
23 |     Deletes a task from the to-do list.
24 | 
25 |     Args:
26 |         todo_list (list): The to-do list.
27 |         task (str): The task to delete.
28 |     """
29 |     if task in todo_list:
30 |         todo_list.remove(task)
31 |         print(f"Task '{task}' removed from the to-do list.")
32 |     else:
33 |         print(f"Task '{task}' not found in the to-do list.")
34 | 
35 | def display_todo_list(todo_list):
36 |     """
37 |     Displays the current to-do list.
38 | 
39 |     Args:
40 |         todo_list (list): The to-do list.
41 |     """
42 |     if not todo_list:
43 |         print("Your to-do list is empty.")
44 |     else:
45 |         print("Your to-do list:")
46 |         for i, task in enumerate(todo_list):
47 |             print(f"{i+1}. {task}")
48 | 
49 | if __name__ == "__main__":
50 |     my_todo_list = create_list()
51 |     add_task(my_todo_list, "Grocery Shopping")
52 |     add_task(my_todo_list, "Pay Bills")
53 |     add_task(my_todo_list, "Walk the Dog")
54 |     display_todo_list(my_todo_list)
55 |     delete_task(my_todo_list, "Pay Bills")
56 |     display_todo_list(my_todo_list)
57 | 


--------------------------------------------------------------------------------
/ADA/WIDGETS/system.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | import psutil
 3 | import GPUtil
 4 | 
 5 | def info():
 6 |     """
 7 |     Gathers and prints system information including CPU, RAM, and GPU details.
 8 |     """
 9 |     
10 |     print("="*40, "System Information", "="*40)
11 |     uname = platform.uname()
12 |     print(f"System: {uname.system}")
13 |     print(f"Node Name: {uname.node}")
14 |     print(f"Release: {uname.release}")
15 |     print(f"Version: {uname.version}")
16 |     print(f"Machine: {uname.machine}")
17 |     print(f"Processor: {uname.processor}")
18 | 
19 |     # CPU information
20 |     print("="*40, "CPU Info", "="*40)
21 |     print("Physical cores:", psutil.cpu_count(logical=False))
22 |     print("Total cores:", psutil.cpu_count(logical=True))
23 |     cpufreq = psutil.cpu_freq()
24 |     print(f"Max Frequency: {cpufreq.max:.2f}Mhz")
25 |     print(f"Min Frequency: {cpufreq.min:.2f}Mhz")
26 |     print(f"Current Frequency: {cpufreq.current:.2f}Mhz")
27 |     print("CPU Usage Per Core:")
28 |     for i, percentage in enumerate(psutil.cpu_percent(percpu=True, interval=1)):
29 |         print(f"Core {i}: {percentage}%")
30 |     print(f"Total CPU Usage: {psutil.cpu_percent()}%")
31 | 
32 |     # Memory Information
33 |     print("="*40, "Memory Information", "="*40)
34 |     svmem = psutil.virtual_memory()
35 |     print(f"Total: {svmem.total / (1024.0 ** 3):.2f} GB")
36 |     print(f"Available: {svmem.available / (1024.0 ** 3):.2f} GB")
37 |     print(f"Used: {svmem.used / (1024.0 ** 3):.2f} GB")
38 |     print(f"Percentage: {svmem.percent}%")
39 | 
40 |     # GPU information
41 |     print("="*40, "GPU Info", "="*40)
42 |     try:
43 |         gpus = GPUtil.getGPUs()
44 |         for gpu in gpus:
45 |             print(f"GPU ID: {gpu.id}")
46 |             print(f"  GPU Name: {gpu.name}")
47 |             print(f"  GPU Load: {gpu.load*100:.2f}%")
48 |             print(f"  GPU Memory Total: {gpu.memoryTotal:.2f}MB")
49 |             print(f"  GPU Memory Used: {gpu.memoryUsed:.2f}MB")
50 |             print(f"  GPU Memory Free: {gpu.memoryFree:.2f}MB")
51 |             print(f"  GPU Temperature: {gpu.temperature:.2f} °C")
52 |     except Exception as e:
53 |         return(f"Error getting GPU information: {e}")
54 |     
55 |     return("system information has been returned")
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     info()
60 | 


--------------------------------------------------------------------------------
/test/tts_latency_test.py:
--------------------------------------------------------------------------------
  1 | from RealtimeTTS import (
  2 |     TextToAudioStream,
  3 |     SystemEngine,
  4 |     AzureEngine,
  5 |     ElevenlabsEngine,
  6 |     CoquiEngine,
  7 |     OpenAIEngine,
  8 | )
  9 | from PyQt6.QtWidgets import (
 10 |     QApplication,
 11 |     QMainWindow,
 12 |     QVBoxLayout,
 13 |     QWidget,
 14 |     QComboBox,
 15 |     QTextEdit,
 16 |     QLabel,
 17 | )
 18 | from PyQt6.QtCore import pyqtSlot
 19 | import time
 20 | import sys
 21 | import os
 22 | from dotenv import load_dotenv # Added for API key loading
 23 | 
 24 | # --- Load Environment Variables ---
 25 | load_dotenv()
 26 | 
 27 | ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 28 | 
 29 | class TTSApp(QMainWindow):
 30 |     def __init__(self):
 31 |         super().__init__()
 32 | 
 33 |         # Initialize TTS engines
 34 |         print("Initializing TTS Engines...")
 35 |         self.engine_system = SystemEngine()
 36 |         #self.engine_azure = AzureEngine(os.environ.get("AZURE_SPEECH_KEY"), os.environ.get("AZURE_SPEECH_REGION"))
 37 |         self.engine_elevenlabs = ElevenlabsEngine(ELEVENLABS_API_KEY)
 38 |         self.engine_coqui = CoquiEngine()
 39 |         #self.engine_openai = OpenAIEngine()
 40 |         print("TTS Engines initialized.")
 41 | 
 42 |         # Add a dictionary to map engine names to engine instances
 43 |         self.engines = {
 44 |             "System Engine": self.engine_system,
 45 |             #"OpenAI Engine": self.engine_openai,
 46 |             "Elevenlabs Engine": self.engine_elevenlabs,
 47 |             "Coqui Engine": self.engine_coqui,
 48 |             #"Azure Engine": self.engine_azure,
 49 |         }
 50 | 
 51 |         # Initialize TTS Stream
 52 |         self.stream = TextToAudioStream(
 53 |             self.engine_system, on_audio_stream_start=self.on_audio_stream_start
 54 |         )
 55 | 
 56 |         # Main widget and layout
 57 |         self.main_widget = QWidget(self)
 58 |         self.setCentralWidget(self.main_widget)
 59 |         self.layout = QVBoxLayout(self.main_widget)
 60 | 
 61 |         # Dropdown for TTS Engine Selection
 62 |         self.tts_engine_dropdown = QComboBox(self)
 63 |         for engine_name in self.engines.keys():
 64 |             self.tts_engine_dropdown.addItem(engine_name)
 65 |         self.tts_engine_dropdown.currentTextChanged.connect(self.tts_engine_changed)
 66 |         self.layout.addWidget(self.tts_engine_dropdown)
 67 | 
 68 |         # Big Input Text Control
 69 |         self.text_input = QTextEdit(self)
 70 |         self.text_input.textChanged.connect(self.text_pasted)
 71 |         self.layout.addWidget(self.text_input)
 72 | 
 73 |         # Label for Latency Display
 74 |         self.latency_label = QLabel("Latency: N/A", self)
 75 |         self.layout.addWidget(self.latency_label)
 76 | 
 77 |         self.setWindowTitle("TTS Synthesis Speed Test")
 78 | 
 79 |     @pyqtSlot()
 80 |     def tts_engine_changed(self):
 81 |         selected_engine_name = self.tts_engine_dropdown.currentText()
 82 |         selected_engine = self.engines[selected_engine_name]
 83 |         self.stream.load_engine(selected_engine)
 84 |         print(f"TTS Engine selected: {selected_engine_name}")
 85 | 
 86 |     @pyqtSlot()
 87 |     def text_pasted(self):
 88 |         pasted_text = self.text_input.toPlainText()
 89 |         print(f"Text pasted: {pasted_text}")
 90 | 
 91 |         self.time_pasted = time.time()
 92 |         self.stream.feed(pasted_text)
 93 |         self.stream.play_async()
 94 | 
 95 |     def on_audio_stream_start(self):
 96 |         self.time_started = time.time()
 97 |         latency = self.time_started - self.time_pasted
 98 |         self.latency_label.setText("Latency: {:.2f} seconds".format(latency))
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     app = QApplication(sys.argv)
103 |     mainWin = TTSApp()
104 |     mainWin.show()
105 |     sys.exit(app.exec())


--------------------------------------------------------------------------------
/test/function_call_accuracy_test.py:
--------------------------------------------------------------------------------
  1 | import ollama
  2 | import re 
  3 | import time
  4 | import json
  5 | 
  6 | response_array = []
  7 | success_count = 0
  8 | failure_count = 0
  9 | 
 10 | # extract the tool call from the response
 11 | def extract_tool_call(text, function_name):
 12 |     import io
 13 |     from contextlib import redirect_stdout
 14 | 
 15 |     pattern = r"```tool_code\s*(.*?)\s*```"
 16 |     match = re.search(pattern, text, re.DOTALL)
 17 |     if match:
 18 |         code = match.group(1).strip()
 19 |         if function_name is None:
 20 |             return False # No function name expected, but code was found
 21 |         if function_name in code:
 22 |             return True
 23 |         else:
 24 |             return False
 25 |     else:
 26 |         return None
 27 | 
 28 | system_instructions = """
 29 |     Your name is ADA (Advanced Design Assistant) you are a helpful AI assistant.  You are an expert in All STEM Fields providing concise and accurate information. When asked to perform a task, respond with the code to perform that task wrapped in ```tool_code```.  If the task does not require a function call, provide a direct answer without using ```tool_code```.  Always respond in a helpful and informative manner."
 30 | 
 31 |     You speak with a british accent and address people as Sir.
 32 | """
 33 | 
 34 | instruction_prompt_with_function_calling = '''At each turn, if you decide to invoke any of the function(s), it should be wrapped with ```tool_code```. If you decide to call a function the response should only have the function wrapped in tool code nothing more. The python methods described below are imported and available, you can only use defined methods also only call methods when you are sure they need to be called. The generated code should be readable and efficient. The response to a method will be wrapped in ```tool_output``` use it generate a helpful, friendly response. For example if the tool output says ```tool_output camera on```. You should say something like "The Camera is on".
 35 | 
 36 | For regular prompts do not call any functions or wrap the response in ```tool_code```.
 37 | 
 38 | The following Python methods are available:
 39 | 
 40 | ```python
 41 | def camera.open() -> None:
 42 |     """Open the camera"""
 43 | 
 44 | def system.info() -> None:
 45 |     """ Gathers and prints system information including CPU, RAM, and GPU details. """
 46 | 
 47 | def timer.set(time_str):
 48 |     """
 49 |     Counts down from a specified time in HH:MM:SS format.
 50 | 
 51 |     Args:
 52 |         time_str (str): The time to count down from in HH:MM:SS format.
 53 |     """
 54 | def project.create_folder(folder_name):
 55 |     """
 56 |     Creates a project folder and a text file to store chat history.
 57 | 
 58 |     Args:
 59 |         folder_name (str): The name of the project folder to create.
 60 |     """
 61 | 
 62 | ```
 63 | 
 64 | User: {user_message}
 65 | '''
 66 | 
 67 | def test(prompt, should_call_function, function_name):
 68 |     global success_count
 69 |     global failure_count
 70 |     messages = [{"role": "system", "content": system_instructions}, {"role": "user", "content": instruction_prompt_with_function_calling.format(user_message=prompt)}]
 71 |     response = ollama.chat(model="gemma3:4b-it-q4_K_M", messages=messages)
 72 |     #print(response['message']['content'])
 73 |     returned_value = extract_tool_call(response['message']['content'], function_name)
 74 | 
 75 |     if should_call_function == False:
 76 |         if returned_value == None:
 77 |             result = "Passed"
 78 |         else:
 79 |             result = "Failed"
 80 |     else:
 81 |         if returned_value == True:
 82 |             result = "Passed"
 83 |         else:
 84 |             result = "Failed"
 85 |     print(result)
 86 |     if result == "Passed":
 87 |         success_count += 1
 88 |     else:
 89 |         failure_count += 1
 90 |     
 91 |     data = (prompt, result, response['message']['content'])
 92 |     response_array.append(data)   
 93 | 
 94 | prompts_and_expectations = [
 95 |     ("Hello, how are you?", False, None),  # Should NOT call a function
 96 |     ("set 10 second timer", True, "timer.set"),  # Should call timer.set()
 97 |     ("Difference between DC and AC", False, None),  # Should NOT call a function
 98 |     ("Show me System Info", True, "system.info"),  # Should call system.info()
 99 |     ("Briefly explain gravity", False, None),  # Should NOT call a function
100 |     ("can you open the camera", True, "camera.open"),  # Should call camera.open()
101 |     ("Give me a short explanation of the internet", False, None),  # Should NOT call a function
102 |     ("set me a timer for 1 minute", True, "timer.set"),  # Should call timer.set()
103 |     ("What is the chemical symbol for water?", False, None),  # Should NOT call a function
104 |     ("open the camera", True, "camera.open"),  # Should call camera.open()
105 |     ("What is a synonym for happy?", False, None),  # Should NOT call a function
106 |     ("set me 33 second timer", True, "timer.set"),  # Should call timer.set()
107 |     ("What is the largest planet in our solar system?", False, None),  # Should NOT call a function
108 |     ("open camera", True, "camera.open"),  # Should call camera.open()
109 |     ("How many continents are there?", False, None),  # Should NOT call a function
110 |     ("Start a 10 hour timer", True, "timer.set"),  # Should call timer.set()
111 |     ("What is the opposite of up?", False, None),  # Should NOT call a function
112 |     ("Turn on the Camera", True, "camera.open"),  # Should call camera.open()
113 |     ("What is the speed of light in a vacuum?", False, None),  # Should NOT call a function
114 |     ("Timer for 10 minutes and 10 seconds", True, "timer.set"),  # Should call timer.set()
115 |     ("Who painted the Mona Lisa?", False, None),  # Should NOT call a function
116 |     ("Start the Camera", True, "camera.open"),  # Should call camera.open()
117 |     ("Thank you very much.", False, None),  # Should NOT call a function
118 |     ("Create new web shooter project", True, "project.create_folder"),  # Should call project.create_folder()
119 |     ("Please and thank you.", False, None),  # Should NOT call a function
120 |     ("Give me system info", True, "system.info"),  # Should call system.info()
121 |     ("No, thank you.", False, None),  # Should NOT call a function
122 |     ("Create new project called Iron Man", True, "project.create_folder"),  # Should call project.create_folder()
123 |     ("Where do Lions live", False, None),  # Should NOT call a function
124 |     ("Show me GPU information", True, "system.info"),  # Should call system.info()
125 |     ("What ocean is larger the atlantic or pacific", False, None), #Should NOT call a function
126 |     ("Make a new project folder name robot arm", True, "project.create_folder"), #Should call project.create_folder()
127 |     ("What is the largest country in the world", False, None), #Should NOT call a function
128 |     ("How much RAM am I using", True, "system.info"), #Should call system.info()
129 |     ("Briefly explain AI", False, None), #Should NOT call a function
130 |     ("Start a new project called robot car", True, "project.create_folder"), #Should call project.create_folder()
131 |     ("Give me CPU Info", True, "system.info"), #Should call system.info()
132 |     ("What is a brushless motor?", False, None), #Should NOT call a function
133 |     ("Make me a new project folder called AI assistant", True, "project.create_folder"), #Should call project.create_folder()
134 |     ("Goodnight!", False, None), #Should NOT call a function
135 | ]
136 | start_time = time.time()
137 | 
138 | for prompt, should_call_function, function_name in prompts_and_expectations:
139 |     test(prompt, should_call_function, function_name)
140 | 
141 | end_time = time.time()
142 | execution_time = end_time - start_time
143 | 
144 | print(f"Execution time: {execution_time} seconds")
145 | print(f"Success rate: {success_count / (success_count + failure_count) * 100}%")
146 | 
147 | log_filename = "response_log.json"
148 | with open(log_filename, "w") as f:
149 |     json.dump(
150 |         [{"prompt": item[0], "result": item[1], "model_response": item[2]} for item in response_array], f, indent=4
151 |     )
152 | 
153 | print(f"\nLog file '{log_filename}' created successfully.")


--------------------------------------------------------------------------------
/multimodal_live_api.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ## Setup
  3 | 
  4 | To install the dependencies for this script, run:
  5 | 
  6 | ``` 
  7 | pip install google-genai opencv-python pyaudio pillow mss
  8 | ```
  9 | 
 10 | Before running this script, ensure the `GOOGLE_API_KEY` environment
 11 | variable is set to the api-key you obtained from Google AI Studio.
 12 | 
 13 | Important: **Use headphones**. This script uses the system default audio
 14 | input and output, which often won't include echo cancellation. So to prevent
 15 | the model from interrupting itself it is important that you use headphones. 
 16 | 
 17 | ## Run
 18 | 
 19 | To run the script:
 20 | 
 21 | ```
 22 | python Get_started_LiveAPI.py
 23 | ```
 24 | 
 25 | The script takes a video-mode flag `--mode`, this can be "camera", "screen", or "none".
 26 | The default is "camera". To share your screen run:
 27 | 
 28 | ```
 29 | python Get_started_LiveAPI.py --mode screen
 30 | ```
 31 | """
 32 | 
 33 | import asyncio
 34 | import base64
 35 | import io
 36 | import os
 37 | import sys
 38 | import traceback
 39 | 
 40 | import cv2
 41 | import pyaudio
 42 | import PIL.Image
 43 | import mss
 44 | 
 45 | import argparse
 46 | 
 47 | from google import genai
 48 | from dotenv import load_dotenv # Added for API key loading
 49 | 
 50 | # --- Load Environment Variables ---
 51 | load_dotenv()
 52 | 
 53 | if sys.version_info < (3, 11, 0):
 54 |     import taskgroup, exceptiongroup
 55 | 
 56 |     asyncio.TaskGroup = taskgroup.TaskGroup
 57 |     asyncio.ExceptionGroup = exceptiongroup.ExceptionGroup
 58 | 
 59 | FORMAT = pyaudio.paInt16
 60 | CHANNELS = 1
 61 | SEND_SAMPLE_RATE = 16000
 62 | RECEIVE_SAMPLE_RATE = 24000
 63 | CHUNK_SIZE = 1024
 64 | 
 65 | MODEL = "models/gemini-2.0-flash-live-001"
 66 | 
 67 | DEFAULT_MODE = "camera"
 68 | 
 69 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 70 | 
 71 | client = genai.Client(api_key=GOOGLE_API_KEY,http_options={"api_version": "v1beta"})
 72 | 
 73 | CONFIG = {"response_modalities": ["AUDIO"]}
 74 | 
 75 | pya = pyaudio.PyAudio()
 76 | 
 77 | 
 78 | class AudioLoop:
 79 |     def __init__(self, video_mode=DEFAULT_MODE):
 80 |         self.video_mode = video_mode
 81 | 
 82 |         self.audio_in_queue = None
 83 |         self.out_queue = None
 84 | 
 85 |         self.session = None
 86 | 
 87 |         self.send_text_task = None
 88 |         self.receive_audio_task = None
 89 |         self.play_audio_task = None
 90 | 
 91 |     async def send_text(self):
 92 |         while True:
 93 |             text = await asyncio.to_thread(
 94 |                 input,
 95 |                 "message > ",
 96 |             )
 97 |             if text.lower() == "q":
 98 |                 break
 99 |             await self.session.send(input=text or ".", end_of_turn=True)
100 | 
101 |     def _get_frame(self, cap):
102 |         # Read the frameq
103 |         ret, frame = cap.read()
104 |         # Check if the frame was read successfully
105 |         if not ret:
106 |             return None
107 |         # Fix: Convert BGR to RGB color space
108 |         # OpenCV captures in BGR but PIL expects RGB format
109 |         # This prevents the blue tint in the video feed
110 |         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
111 |         img = PIL.Image.fromarray(frame_rgb)  # Now using RGB frame
112 |         img.thumbnail([1024, 1024])
113 | 
114 |         image_io = io.BytesIO()
115 |         img.save(image_io, format="jpeg")
116 |         image_io.seek(0)
117 | 
118 |         mime_type = "image/jpeg"
119 |         image_bytes = image_io.read()
120 |         return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
121 | 
122 |     async def get_frames(self):
123 |         # This takes about a second, and will block the whole program
124 |         # causing the audio pipeline to overflow if you don't to_thread it.
125 |         cap = await asyncio.to_thread(
126 |             cv2.VideoCapture, 0
127 |         )  # 0 represents the default camera
128 | 
129 |         while True:
130 |             frame = await asyncio.to_thread(self._get_frame, cap)
131 |             if frame is None:
132 |                 break
133 | 
134 |             await asyncio.sleep(1.0)
135 | 
136 |             await self.out_queue.put(frame)
137 | 
138 |         # Release the VideoCapture object
139 |         cap.release()
140 | 
141 |     def _get_screen(self):
142 |         sct = mss.mss()
143 |         monitor = sct.monitors[0]
144 | 
145 |         i = sct.grab(monitor)
146 | 
147 |         mime_type = "image/jpeg"
148 |         image_bytes = mss.tools.to_png(i.rgb, i.size)
149 |         img = PIL.Image.open(io.BytesIO(image_bytes))
150 | 
151 |         image_io = io.BytesIO()
152 |         img.save(image_io, format="jpeg")
153 |         image_io.seek(0)
154 | 
155 |         image_bytes = image_io.read()
156 |         return {"mime_type": mime_type, "data": base64.b64encode(image_bytes).decode()}
157 | 
158 |     async def get_screen(self):
159 | 
160 |         while True:
161 |             frame = await asyncio.to_thread(self._get_screen)
162 |             if frame is None:
163 |                 break
164 | 
165 |             await asyncio.sleep(1.0)
166 | 
167 |             await self.out_queue.put(frame)
168 | 
169 |     async def send_realtime(self):
170 |         while True:
171 |             msg = await self.out_queue.get()
172 |             await self.session.send(input=msg)
173 | 
174 |     async def listen_audio(self):
175 |         mic_info = pya.get_default_input_device_info()
176 |         self.audio_stream = await asyncio.to_thread(
177 |             pya.open,
178 |             format=FORMAT,
179 |             channels=CHANNELS,
180 |             rate=SEND_SAMPLE_RATE,
181 |             input=True,
182 |             input_device_index=mic_info["index"],
183 |             frames_per_buffer=CHUNK_SIZE,
184 |         )
185 |         if __debug__:
186 |             kwargs = {"exception_on_overflow": False}
187 |         else:
188 |             kwargs = {}
189 |         while True:
190 |             data = await asyncio.to_thread(self.audio_stream.read, CHUNK_SIZE, **kwargs)
191 |             await self.out_queue.put({"data": data, "mime_type": "audio/pcm"})
192 | 
193 |     async def receive_audio(self):
194 |         "Background task to reads from the websocket and write pcm chunks to the output queue"
195 |         while True:
196 |             turn = self.session.receive()
197 |             async for response in turn:
198 |                 if data := response.data:
199 |                     self.audio_in_queue.put_nowait(data)
200 |                     continue
201 |                 if text := response.text:
202 |                     print(text, end="")
203 | 
204 |             # If you interrupt the model, it sends a turn_complete.
205 |             # For interruptions to work, we need to stop playback.
206 |             # So empty out the audio queue because it may have loaded
207 |             # much more audio than has played yet.
208 |             while not self.audio_in_queue.empty():
209 |                 self.audio_in_queue.get_nowait()
210 | 
211 |     async def play_audio(self):
212 |         stream = await asyncio.to_thread(
213 |             pya.open,
214 |             format=FORMAT,
215 |             channels=CHANNELS,
216 |             rate=RECEIVE_SAMPLE_RATE,
217 |             output=True,
218 |         )
219 |         while True:
220 |             bytestream = await self.audio_in_queue.get()
221 |             await asyncio.to_thread(stream.write, bytestream)
222 | 
223 |     async def run(self):
224 |         try:
225 |             async with (
226 |                 client.aio.live.connect(model=MODEL, config=CONFIG) as session,
227 |                 asyncio.TaskGroup() as tg,
228 |             ):
229 |                 self.session = session
230 | 
231 |                 self.audio_in_queue = asyncio.Queue()
232 |                 self.out_queue = asyncio.Queue(maxsize=5)
233 | 
234 |                 send_text_task = tg.create_task(self.send_text())
235 |                 tg.create_task(self.send_realtime())
236 |                 tg.create_task(self.listen_audio())
237 |                 if self.video_mode == "camera":
238 |                     tg.create_task(self.get_frames())
239 |                 elif self.video_mode == "screen":
240 |                     tg.create_task(self.get_screen())
241 | 
242 |                 tg.create_task(self.receive_audio())
243 |                 tg.create_task(self.play_audio())
244 | 
245 |                 await send_text_task
246 |                 raise asyncio.CancelledError("User requested exit")
247 | 
248 |         except asyncio.CancelledError:
249 |             pass
250 |         except ExceptionGroup as EG:
251 |             self.audio_stream.close()
252 |             traceback.print_exception(EG)
253 | 
254 | 
255 | if __name__ == "__main__":
256 |     parser = argparse.ArgumentParser()
257 |     parser.add_argument(
258 |         "--mode",
259 |         type=str,
260 |         default=DEFAULT_MODE,
261 |         help="pixels to stream from",
262 |         choices=["camera", "screen", "none"],
263 |     )
264 |     args = parser.parse_args()
265 |     main = AudioLoop(video_mode=args.mode)
266 |     asyncio.run(main.run())


--------------------------------------------------------------------------------
/ADA/ADA_Local.py:
--------------------------------------------------------------------------------
  1 | import ollama
  2 | import asyncio
  3 | import pyaudio
  4 | from RealtimeSTT import AudioToTextRecorder
  5 | from RealtimeTTS import TextToAudioStream, SystemEngine, CoquiEngine
  6 | import torch  # Import the torch library
  7 | import re
  8 | import time
  9 | import os
 10 | from .WIDGETS import system, timer, project, camera
 11 | 
 12 | ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 13 | VOICE_ID = 'pFZP5JQG7iQjIQuC4Bku'
 14 | 
 15 | FORMAT = pyaudio.paInt16
 16 | CHANNELS = 1
 17 | SEND_SAMPLE_RATE = 16000
 18 | RECEIVE_SAMPLE_RATE = 24000
 19 | CHUNK_SIZE = 1024
 20 | 
 21 | class ADA:
 22 |     def __init__(self):
 23 |         print("initializing...")
 24 | 
 25 |         # Check for CUDA availability
 26 |         if torch.cuda.is_available():
 27 |             self.device = "cuda"
 28 |             print("CUDA is available. Using GPU.")
 29 |         else:
 30 |             self.device = "cpu"
 31 |             print("CUDA is not available. Using CPU.")
 32 | 
 33 |         self.model = "gemma3:4b-it-q4_K_M" #This is the smallest version of gemma3 for consistent function calling use gemma3:4b-it-q4_K_M  or higher if your computer is not strong enough use ada_online
 34 |         self.system_behavior = """
 35 |             Your name is ADA (Advanced Design Assistant) you are a helpful AI assistant.  You are an expert in All STEM Fields providing concise and accurate information. When asked to perform a task, respond with the code to perform that task wrapped in ```tool_code```.  If the task does not require a function call, provide a direct answer without using ```tool_code```.  Always respond in a helpful and informative manner."
 36 | 
 37 |             You speak with a british accent and address people as Sir.
 38 |         """
 39 | 
 40 |         self.instruction_prompt_with_function_calling = '''
 41 |             At each turn, if you decide to invoke any of the function(s), it should be wrapped with ```tool_code```. If you decide to call a function the response should only have the function wrapped in tool code nothing more. The python methods described below are imported and available, you can only use defined methods also only call methods when you are sure they need to be called. The generated code should be readable and efficient. 
 42 |             
 43 |             The response to a method will be wrapped in ```tool_output``` use the response to give the user an answer based on the information provided that is wrapped in ```tool_ouput```.
 44 | 
 45 |             For regular prompts do not call any functions or wrap the response in ```tool_code```.
 46 | 
 47 |             The following Python methods are available:
 48 | 
 49 |             ```python
 50 |             def camera.open() -> None:
 51 |                 """Open the camera"""
 52 | 
 53 |             def system.info() -> None:
 54 |                 """ Gathers and prints system information including CPU, RAM, and GPU details. Only call when user ask about computer information. """
 55 | 
 56 |             def timer.set(time_str):
 57 |                 """
 58 |                 Counts down from a specified time in HH:MM:SS format.
 59 | 
 60 |                 Args:
 61 |                     time_str (str): The time to count down from in HH:MM:SS format.
 62 |                 """
 63 |             def project.create_folder(folder_name):
 64 |                 """
 65 |                 Creates a project folder and a text file to store chat history.
 66 | 
 67 |                 Args:
 68 |                     folder_name (str): The name of the project folder to create.
 69 |                 """
 70 |         ```
 71 | 
 72 |         User: {user_message}
 73 |         '''
 74 | 
 75 |         self.model_params = {
 76 |             'temperature': 0.1,
 77 |             'top_p': 0.9,
 78 |         }
 79 |         self.conversation_history = []
 80 | 
 81 |         self.input_queue = asyncio.Queue()
 82 |         self.response_queue = asyncio.Queue()
 83 |         self.audio_queue = asyncio.Queue()
 84 |         self.recorder_config = {
 85 |             'model': 'large-v3',
 86 |             'spinner': False,
 87 |             'language': 'en',
 88 |             'silero_sensitivity': 0.01,
 89 |             'webrtc_sensitivity': 3,
 90 |             'post_speech_silence_duration': 0.1,
 91 |             'min_length_of_recording': 0.2,
 92 |             'min_gap_between_recordings': 0,
 93 | 
 94 |             #'realtime_model_type': 'tiny.en',
 95 |             #'enable_realtime_transcription': True,
 96 |             #'on_realtime_transcription_update': self.clear_queues,
 97 |         }
 98 | 
 99 |         try:
100 |             self.recorder = AudioToTextRecorder(**self.recorder_config)
101 |         except Exception as e:
102 |             print(f"Error initializing AudioToTextRecorder: {e}")
103 |             self.recorder = None  # Or handle this appropriately
104 | 
105 |         try:
106 |             self.pya = pyaudio.PyAudio()
107 |         except Exception as e:
108 |             print(f"Error initializing PyAudio: {e}")
109 |             self.pya = None
110 |         
111 |         self.response_start_time = None
112 |         self.audio_start_time = None
113 |         #self.engine = CoquiEngine()
114 |         self.engine = SystemEngine()
115 |         self.stream = TextToAudioStream(self.engine)
116 |         self.first_audio_byte_time = None
117 |         self.speech_to_text_time = None
118 | 
119 |     async def clear_queues(self, text=""):
120 |         """Clears all data from the input, response, and audio queues."""
121 |         queues = [self.input_queue, self.response_queue, self.audio_queue]
122 |         for q in queues:
123 |             while not q.empty():
124 |                 try:
125 |                     q.get_nowait()
126 |                 except asyncio.QueueEmpty:
127 |                     break  # Queue is empty
128 | 
129 |     async def input_message(self):
130 |         while True:
131 |             try:
132 |                 prompt = await asyncio.to_thread(input, "Enter your message: ")
133 |                 if prompt.lower() == "exit":
134 |                     await self.input_queue.put(None)  # Signal to exit
135 |                     break
136 |                 await self.clear_queues()
137 |                 self.prompt_start_time = time.time()
138 |                 await self.input_queue.put(prompt)
139 |             except Exception as e:
140 |                 print(f"Error in input_message: {e}")
141 |                 continue  # Continue the loop even if there's an error
142 | 
143 |     async def send_prompt(self):
144 |         while True:
145 |             try:
146 |                 prompt = await self.input_queue.get()
147 |                 if prompt is None:
148 |                     break  # Exit loop if None is received
149 |                 
150 |                 self.response_start_time = time.time() #start timer when prompt is sent
151 |                 
152 |                 messages = [{"role": "system", "content": self.system_behavior}] + self.conversation_history + [{"role": "user", "content": self.instruction_prompt_with_function_calling.format(user_message=prompt)}]
153 |                 try:
154 |                     response = ollama.chat(model=self.model, messages=messages, stream=True)
155 |                     full_response = ""
156 |                     in_function_call = False
157 |                     function_call = ""
158 | 
159 |                     for chunk in response:
160 |                         chunk_content = chunk['message']['content']
161 |                         if chunk_content == "```":
162 |                             if in_function_call == True:
163 |                                 in_function_call = False
164 |                                 function_call += "```"
165 |                                 tool_output = self.extract_tool_call(function_call)
166 | 
167 |                                 messages = [{"role": "system", "content": self.system_behavior}] + self.conversation_history + [{"role": "user", "content": self.instruction_prompt_with_function_calling.format(user_message=tool_output)}]
168 |                                 
169 |                                 response = ollama.chat(model=self.model, messages=messages, stream=True)
170 |                                 for chunk in response:
171 |                                     chunk_content = chunk['message']['content']
172 |                                     print(chunk_content, end="", flush=True)
173 |                                     await self.response_queue.put(chunk_content)
174 |                                 print()
175 |                                 continue
176 |                             else:
177 |                                in_function_call = True
178 | 
179 |                         if in_function_call == False:
180 |                             await self.response_queue.put(chunk_content)
181 |                             await asyncio.sleep(0)
182 |                         else:
183 |                             function_call += chunk_content                        
184 |                         if chunk_content:
185 |                             print(chunk_content, end="", flush=True) #print chunks on same line
186 |                             full_response += chunk_content
187 |                     print() # new line
188 |                     self.conversation_history.append({"role": "user", "content": prompt})
189 |                     self.conversation_history.append({"role": "assistant", "content": full_response})
190 | 
191 |                 except Exception as e:
192 |                     print(f"An error occurred in send_prompt: {e}")
193 |             except asyncio.CancelledError:
194 |                 break
195 |             except Exception as e:
196 |                 print(f"Unexpected error in send_prompt: {e}")
197 | 
198 |             finally:  # Ensure the sentinel value is added even if an error occurs
199 |                 await self.response_queue.put(None)
200 | 
201 |     def extract_tool_call(self, text):
202 |         import io
203 |         from contextlib import redirect_stdout
204 | 
205 |         pattern = r"```tool_code\s*(.*?)\s*```"
206 |         match = re.search(pattern, text, re.DOTALL)
207 |         if match:
208 |             code = match.group(1).strip()
209 |             # Capture stdout in a string buffer
210 |             f = io.StringIO()
211 |             with redirect_stdout(f):
212 |                 result = eval(code)
213 |             output = f.getvalue()
214 |             r = result if output == '' else output
215 |             return f'```tool_output\n{str(r).strip()}\n```'''
216 |         return None
217 | 
218 |     async def tts(self):
219 |         while True:
220 |             chunk = await self.response_queue.get()
221 |             if chunk == None:
222 |                 continue
223 |             if self.first_audio_byte_time is None:
224 |                 self.first_audio_byte_time = time.time()
225 |                 time_to_first_audio = self.first_audio_byte_time - self.prompt_start_time
226 |                 print(f"Time from prompt to first audio byte: {time_to_first_audio:.4f} seconds")
227 |             self.stream.feed(chunk)
228 |             self.stream.play_async()
229 | 
230 |     async def stt(self):
231 |         if self.recorder is None:
232 |             print("Audio recorder is not initialized.")
233 |             return
234 | 
235 |         while True:
236 |             try:
237 |                 text = await asyncio.to_thread(self.recorder.text)
238 |                 await self.clear_queues()
239 |                 await self.input_queue.put(text)
240 |                 print(text)
241 |             except Exception as e:
242 |                 print(f"Error in listen: {e}")
243 |                 continue  # Continue the loop even if there's an error
244 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ADA (Advanced Design Assistant)
  2 | 
  3 | ADA is a helpful AI assistant specializing in STEM fields, designed to provide concise and accurate information and assist with various tasks through voice or text interaction. ADA comes in two versions: a local version (`ada_local`) that runs primarily on your machine and an online version (`ada_online`) that utilizes cloud-based services. A separate multimodal live demo (`multimodal_live_api.py`) is also included, showcasing real-time audio and video interaction.
  4 | 
  5 | **Recommendation:** While both versions are available, the **`ada_online` version is heavily recommended**. It leverages powerful cloud-based models (Google Gemini) and services (ElevenLabs TTS) that generally offer faster, higher-quality, and more reliable responses compared to the local version, which is dependent on your hardware capabilities. The online models have also been developed and refined for a longer period.
  6 | 
  7 | ## Features
  8 | 
  9 | - **Dual Versions:** Choose between running ADA locally (`ada_local`) or using online services (`ada_online`).
 10 | - **Real-time Interaction:** Communicate with ADA using voice (Speech-to-Text) and receive spoken responses (Text-to-Speech).
 11 | - **Function Calling & Grounding:** ADA can perform specific tasks by calling available functions (widgets) and use tools like Google Search to access current information.
 12 |   - Accessing system information (`system.info`)
 13 |   - Setting timers (`timer.set`)
 14 |   - Creating project folders (`project.create_folder`)
 15 |   - Opening the camera (`camera.open`)
 16 |   - Managing a To-Do list (`to_do_list.py` - _Note: Not currently integrated as a callable tool in provided main scripts_)
 17 |   - Getting weather (`get_weather`)
 18 |   - Calculating travel duration (`get_travel_duration`)
 19 | - **STEM Expertise:** Designed to assist with engineering, math, and science queries.
 20 | - **Conversational:** Engages in natural language conversation.
 21 | - **Multimodal Demo:** Includes a script (`multimodal_live_api.py`) for live interaction combining audio and video (camera/screen).
 22 | 
 23 | ## Setup
 24 | 
 25 | ### Prerequisites
 26 | 
 27 | - **Python:** Ensure you have Python installed (code uses features compatible with Python 3.11+).
 28 | - **Ollama (for `ada_local`)**: You need Ollama installed and running to serve the local LLM. Make sure you have downloaded the model specified in `ADA/ADA_Local.py` (e.g., `gemma3:4b-it-q4_K_M`). Performance heavily depends on your hardware.
 29 | - **CUDA (Optional, for `ada_local` & potentially local STT/TTS models)**: For better performance with local models, a CUDA-compatible GPU and the necessary drivers are recommended. ADA's local components attempt to automatically detect and use the GPU if available via PyTorch.
 30 | - **Microphone and Speakers:** Required for voice interaction (STT/TTS). **Headphones are strongly recommended** to prevent echo and self-interruption.
 31 | - **API Keys (for `ada_online` & `multimodal_live_api.py`)**: See the API Key Setup section below.
 32 | - **FFmpeg (Optional, Recommended)**: The `RealtimeSTT` or `RealtimeTTS` libraries (or their dependencies) might rely on FFmpeg for audio processing. If you encounter audio errors (like `torchaudio` warnings in logs), installing FFmpeg and ensuring it's in your system's PATH is recommended.
 33 | - **System Dependencies (e.g., `portaudio`)**: Libraries like `PyAudio` might require system-level libraries (like `portaudio` on Linux/macOS or specific drivers on Windows). Consult the documentation for `PyAudio` and `RealtimeTTS` (especially if using `CoquiEngine`) for specific OS requirements.
 34 | 
 35 | ### Installation
 36 | 
 37 | 1.  **Clone the Repository:**
 38 |     ```bash
 39 |     git clone https://github.com/Nlouis38/ada.git
 40 |     cd ada_v1
 41 |     ```
 42 | 2.  **Install Dependencies:**
 43 |     Create a virtual environment (recommended):
 44 |     ```bash
 45 |     python -m venv venv
 46 |     source venv/bin/activate # On Windows use `venv\Scripts\activate`
 47 |     ```
 48 |     Install the required Python libraries:
 49 |     ```bash
 50 |     pip install ollama websockets pyaudio RealtimeSTT RealtimeTTS torch google-generativeai opencv-python pillow mss psutil GPUtil elevenlabs python-dotenv python-weather googlemaps # Add any other specific libraries used
 51 |     ```
 52 | 
 53 | ## API Key Setup (Environment Variables Recommended)
 54 | 
 55 | Both `ada_online` and `multimodal_live_api.py` require API keys for cloud services. It is **highly recommended** to use environment variables for security instead of hardcoding keys into the scripts.
 56 | 
 57 | 1.  **Create a `.env` file:** In the root `ada_v1` directory, create a file named `.env`.
 58 | 2.  **Add Keys to `.env`:** Open the `.env` file and add your keys in the following format:
 59 | 
 60 |     ```dotenv
 61 |     # .env file
 62 |     GOOGLE_API_KEY=YOUR_GOOGLE_AI_STUDIO_KEY_HERE
 63 |     ELEVENLABS_API_KEY=YOUR_ELEVENLABS_KEY_HERE
 64 |     MAPS_API_KEY=YOUR_Maps_API_KEY_HERE
 65 |     ```
 66 | 
 67 | 3.  **Get the Keys:**
 68 | 
 69 |     - **Google Generative AI (Gemini API):**
 70 |       - **Purpose:** Core LLM for `ada_online` and `multimodal_live_api.py`.
 71 |       - **Get:** Visit [Google AI Studio](https://aistudio.google.com/), sign in, and create an API key.
 72 |     - **ElevenLabs:**
 73 |       - **Purpose:** High-quality Text-to-Speech (TTS) for `ada_online`.
 74 |       - **Get:** Go to [ElevenLabs](https://elevenlabs.io/), log in, and find your API key in your profile/settings.
 75 |     - **Google Maps:**
 76 |       - **Purpose:** Used by the `get_travel_duration` function tool in `ada_online`.
 77 |       - **Get:** Go to the [Google Cloud Console](https://console.cloud.google.com/), create a project (or use an existing one), enable the "Directions API", and create an API key under "Credentials".
 78 | 
 79 | 4.  **Code Usage:** The Python scripts (`ADA_Online.py`, `multimodal_live_api.py`, `tts_latency_test.py`) use `python-dotenv` to automatically load these variables from the `.env` file when the script starts.
 80 | 
 81 |     ```python
 82 |     # Example from ADA_Online.py
 83 |     from dotenv import load_dotenv
 84 |     load_dotenv() # Loads variables from .env
 85 | 
 86 |     ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 87 |     GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 88 |     MAPS_API_KEY = os.getenv("MAPS_API_KEY")
 89 | 
 90 |     # ... later use these variables ...
 91 |     self.client = genai.Client(api_key=GOOGLE_API_KEY, ...)
 92 |     # or when initializing ElevenLabsEngine/Websocket connection
 93 |     ```
 94 | 
 95 | ## Speech-to-Text (STT) and Text-to-Speech (TTS)
 96 | 
 97 | ADA uses real-time libraries for voice interaction:
 98 | 
 99 | - **STT (Speech-to-Text):**
100 |   - **Library:** `RealtimeSTT` is used in both `ada_local` and `ada_online`.
101 |   - **Functionality:** Captures audio from the default microphone, detects speech, and transcribes it to text using a backend model (e.g., Whisper `large-v3` specified in the configs).
102 | - **TTS (Text-to-Speech):**
103 |   - **Library:** `RealtimeTTS` provides the framework. Different _engines_ handle the actual synthesis:
104 |     - **`ada_local`:** Uses `RealtimeTTS` likely with `SystemEngine` (OS default TTS) or potentially `CoquiEngine` (local neural voice, requires setup). Quality and latency depend heavily on the chosen engine and system hardware.
105 |     - **`ada_online` (Recommended):** Uses `ElevenlabsEngine` via WebSockets. This typically provides very low latency and high-quality, natural-sounding voices, but requires an ElevenLabs API key and internet connection.
106 |     - **`ada_online_noelevenlabs`:** Uses `RealtimeTTS` with `SystemEngine`, offering an online LLM experience without needing an ElevenLabs key, but using the basic OS TTS voice.
107 | 
108 | ## Running ADA
109 | 
110 | ### `ada_local`
111 | 
112 | Uses Ollama for the LLM and local engines for STT/TTS. Performance depends significantly on your CPU/GPU and RAM.
113 | 
114 | - **LLM:** Served locally via Ollama (e.g., `gemma3:4b-it-q4_K_M`).
115 | - **STT:** `RealtimeSTT`.
116 | - **TTS:** `RealtimeTTS` with `SystemEngine` or `CoquiEngine`.
117 | - **To run:**
118 |   ```bash
119 |   # Ensure Ollama is running with the required model pulled
120 |   python main_local.py
121 |   ```
122 | 
123 | ### `ada_online` (Recommended)
124 | 
125 | Uses Google Gemini (cloud) for LLM and ElevenLabs (cloud) for TTS. Requires API keys and internet. Generally faster and higher quality.
126 | 
127 | - **LLM:** Google Gemini (`gemini-2.0-flash-live-001` or similar).
128 | - **STT:** `RealtimeSTT`.
129 | - **TTS:** `RealtimeTTS` with `ElevenlabsEngine` via WebSockets.
130 | - **To run:**
131 |   ```bash
132 |   # Make sure .env file is set up with API keys
133 |   python main_online.py
134 |   ```
135 | 
136 | ### `ada_online_noelevenlabs`
137 | 
138 | Uses Google Gemini (cloud) for LLM and local OS TTS. A middle ground if you want the better online LLM but don't have/want an ElevenLabs key.
139 | 
140 | - **LLM:** Google Gemini (`gemini-2.0-flash-live-001` or similar).
141 | - **STT:** `RealtimeSTT`.
142 | - **TTS:** `RealtimeTTS` with `SystemEngine`.
143 | - **To run:**
144 |   ```bash
145 |   # Make sure .env file is set up with GOOGLE_API_KEY and MAPS_API_KEY
146 |   python main_online_noelevenlabs.py
147 |   ```
148 | 
149 | ## Multimodal Live API Demo (`multimodal_live_api.py`)
150 | 
151 | This script demonstrates real-time, multimodal interaction using the Gemini Live API. It streams audio from your microphone and video frames (from your camera or screen) to the Gemini model and plays back the audio response.
152 | 
153 | ### Setup (Multimodal Demo)
154 | 
155 | - Ensure dependencies are installed (see main Installation section).
156 | - Ensure your `GOOGLE_API_KEY` is set in your `.env` file.
157 | - **Use headphones!**
158 | 
159 | ### Running (Multimodal Demo)
160 | 
161 | - **With Camera:**
162 |   ```bash
163 |   python multimodal_live_api.py --mode camera # or just python multimodal_live_api.py
164 |   ```
165 | - **With Screen Sharing:**
166 |   ```bash
167 |   python multimodal_live_api.py --mode screen
168 |   ```
169 | - **Audio Only:**
170 |   ```bash
171 |   python multimodal_live_api.py --mode none
172 |   ```
173 | - You can type text messages in the console while the audio/video stream is running. Type 'q' and Enter to quit.
174 | 
175 | ## Usage (Main ADA Scripts)
176 | 
177 | Once `main_local.py`, `main_online.py`, or `main_online_noelevenlabs.py` is running:
178 | 
179 | - **Voice Input:** Speak clearly into your microphone. The STT engine will detect speech and transcribe it.
180 | - **Text Input:** If you prefer typing, type your prompt into the console when it says "Enter your message:" and press Enter.
181 | - **Exit:** Type `exit` and press Enter.
182 | 
183 | ## Widgets / Tools
184 | 
185 | ADA (`ada_local` and `ada_online`) can utilize several built-in functions/tools:
186 | 
187 | - **Local Widgets (`WIDGETS/` directory):** Primarily used by `ada_local`.
188 |   - `camera.py`: Opens the default camera feed. (_Note: Implementation returns string, doesn't keep feed open_)
189 |   - `project.py`: Creates project folders.
190 |   - `system.py`: Provides system hardware information.
191 |   - `timer.py`: Sets countdown timers.
192 |   - `to_do_list.py`: Manages a simple to-do list. (_Not integrated_)
193 | - **Online Tools (Gemini API):** Used by `ada_online` versions.
194 |   - `GoogleSearch`: Accesses Google Search for current information.
195 |   - `get_weather`: Fetches weather using `python-weather`.
196 |   - `get_travel_duration`: Calculates travel time using `googlemaps`.
197 |   - `CodeExecution`: Allows Gemini to generate and potentially execute code (primarily for analysis/computation, not file system interaction).
198 | 
199 | ADA decides when to call these based on your request and the model's understanding.
200 | 
201 | ## Troubleshooting
202 | 
203 | - **Audio Issues (No Input/Output):**
204 |   - Ensure microphone/speakers are system defaults and not muted.
205 |   - Check `PyAudio` dependencies (`portaudio`).
206 |   - Ensure necessary permissions are granted for microphone access.
207 |   - Try different audio devices if available.
208 |   - Check for `FFmpeg` if errors mention audio encoding/decoding.
209 | - **API Key Errors (`ada_online`, `multimodal_live_api.py`):**
210 |   - Verify keys are correct in the `.env` file.
211 |   - Ensure the relevant APIs (Gemini, Maps, ElevenLabs) are enabled in their respective cloud consoles.
212 |   - Check API key quotas and billing status.
213 | - **Library Errors:**
214 |   - Ensure all dependencies from `Installation` are correctly installed in your active virtual environment.
215 |   - Some libraries (e.g., `torch`, `tensorflow` used by STT/TTS backends) might have specific CPU/GPU version requirements.
216 | - **Ollama Issues (`ada_local`):**
217 |   - Confirm Ollama service is running.
218 |   - Verify the specified model (e.g., `gemma3:4b-it-q4_K_M`) is downloaded (`ollama pull model_name`) and accessible.
219 |   - Check Ollama logs for errors.
220 | - **TTS Issues:**
221 |   - If using `ElevenlabsEngine`, check API key and internet connection.
222 |   - If using `CoquiEngine`, ensure it's installed correctly and models are downloaded.
223 |   - If using `SystemEngine`, ensure your OS's built-in TTS is functional. Latency might be higher.
224 | - **STT Issues:**
225 |   - Check microphone levels.
226 |   - Ensure `RealtimeSTT` model is appropriate for your hardware (larger models need more resources).
227 |   - Background noise can interfere. Use headphones.
228 | 


--------------------------------------------------------------------------------
/ADA/ADA_Online_NoElevenlabs.py:
--------------------------------------------------------------------------------
  1 | # --- Keep necessary imports ---
  2 | import asyncio
  3 | import pyaudio # Still needed for RealtimeSTT
  4 | from RealtimeSTT import AudioToTextRecorder
  5 | import torch
  6 | import re
  7 | from google.genai import types
  8 | from google import genai
  9 | import os
 10 | from google.genai.types import Tool, GoogleSearch, Part, Blob, Content
 11 | import python_weather
 12 | import googlemaps
 13 | from datetime import datetime
 14 | from dotenv import load_dotenv
 15 | 
 16 | # --- Add RealtimeTTS imports ---
 17 | from RealtimeTTS import TextToAudioStream, SystemEngine # Using SystemEngine as per reference example
 18 | # from RealtimeTTS import CoquiEngine # Uncomment if you want to use CoquiTTS (requires installation)
 19 | 
 20 | # --- Load Environment Variables (Remove ElevenLabs key) ---
 21 | load_dotenv()
 22 | # ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") # Removed
 23 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 24 | MAPS_API_KEY = os.getenv("MAPS_API_KEY")
 25 | 
 26 | # --- Validate API Keys ---
 27 | # if not ELEVENLABS_API_KEY: print("Error: ELEVENLABS_API_KEY not found in environment variables.") # Removed
 28 | if not GOOGLE_API_KEY: print("Error: GOOGLE_API_KEY not found in environment variables.")
 29 | if not MAPS_API_KEY: print("Error: MAPS_API_KEY not found in environment variables.")
 30 | # --- End API Key Validation ---
 31 | 
 32 | # VOICE_ID = 'pFZP5JQG7iQjIQuC4Bku' # Removed (Specific to ElevenLabs)
 33 | 
 34 | FORMAT = pyaudio.paInt16
 35 | CHANNELS = 1
 36 | # SEND_SAMPLE_RATE = 16000 # Keep if used by RealtimeSTT
 37 | # RECEIVE_SAMPLE_RATE = 24000 # RealtimeTTS handles its own output rate
 38 | # CHUNK_SIZE = 1024 # Less relevant for RealtimeTTS feed/play approach
 39 | 
 40 | class ADA:
 41 |     def __init__(self):
 42 |         print("initializing...")
 43 | 
 44 |         # Check for CUDA availability
 45 |         if torch.cuda.is_available():
 46 |             self.device = "cuda"
 47 |             print("CUDA is available. Using GPU.")
 48 |         else:
 49 |             self.device = "cpu"
 50 |             print("CUDA is not available. Using CPU.")
 51 | 
 52 |         # --- Initialize Google GenAI Client (Keep) ---
 53 |         self.client = genai.Client(api_key=GOOGLE_API_KEY, http_options={'api_version': 'v1beta'})
 54 |         self.model = "gemini-2.0-flash-live-001"
 55 | 
 56 |         # --- System Behavior Prompt (Keep) ---
 57 |         self.system_behavior = """
 58 |             Your name is Ada, which stands for Advanced Design Assistant.
 59 |             You have a joking personality. You are an AI designed to assist with engineering projects, and you are an expert in all engineering, math, and science disciplines.
 60 |             You address people as "Sir" and you also speak with a british accent.
 61 |             When answering, you respond using complete sentences and in a conversational tone. Make sure to keep tempo of answers quick so don't use too much commas, periods or overall punctuation.
 62 |             Any prompts that need current or recent data always use the search tool.
 63 |             """
 64 | 
 65 |         # --- Function Declarations (Keep) ---
 66 |         self.get_weather_func = types.FunctionDeclaration(
 67 |             name="get_weather",
 68 |             description="Get the current weather conditions (temperature, precipitation, description) for a specified city and state/country (e.g., 'Vinings, GA', 'London, UK').",
 69 |             parameters=types.Schema(
 70 |                 type=types.Type.OBJECT, properties={"location": types.Schema(type=types.Type.STRING, description="The city and state, e.g., San Francisco, CA or Vinings, GA")}, required=["location"]
 71 |             )
 72 |         )
 73 |         self.get_travel_duration_func = types.FunctionDeclaration(
 74 |             name="get_travel_duration",
 75 |             description="Calculates the estimated travel duration between a specified origin and destination using Google Maps. Considers current traffic for driving mode.",
 76 |             parameters=types.Schema(
 77 |                 type=types.Type.OBJECT, properties={
 78 |                     "origin": types.Schema(type=types.Type.STRING, description="The starting address or place name."),
 79 |                     "destination": types.Schema(type=types.Type.STRING, description="The destination address or place name."),
 80 |                     "mode": types.Schema(type=types.Type.STRING, description="Optional: Mode of transport ('driving', 'walking', etc.). Defaults to 'driving'.")
 81 |                 }, required=["origin", "destination"]
 82 |             )
 83 |         )
 84 |         # --- End Function Declarations ---
 85 | 
 86 |         # --- Map function names to actual methods (Keep) ---
 87 |         self.available_functions = {
 88 |             "get_weather": self.get_weather,
 89 |             "get_travel_duration": self.get_travel_duration
 90 |         }
 91 | 
 92 |         # --- Google Search Tool (Grounding) ---
 93 |         self.google_search_tool = Tool(
 94 |             google_search = GoogleSearch()
 95 |         )
 96 | 
 97 |         # --- Configuration (Updated tools list) ---
 98 |         self.config = types.LiveConnectConfig(
 99 |             system_instruction=types.Content(
100 |                 parts=[types.Part(text=self.system_behavior)]
101 |             ),
102 |             response_modalities=["TEXT"],
103 |             # ---> Updated tools list <---
104 |             tools=[self.google_search_tool, types.Tool(code_execution=types.ToolCodeExecution,function_declarations=[
105 |                 self.get_weather_func,
106 |                 self.get_travel_duration_func # Add the new function here
107 |                 ])]
108 |         )
109 |         # --- End Configuration ---
110 | 
111 |         # --- Queues (Remove audio_queue) ---
112 |         self.input_queue = asyncio.Queue()
113 |         self.response_queue = asyncio.Queue()
114 |         # self.audio_queue = asyncio.Queue() # Removed - RealtimeTTS handles playback
115 | 
116 |         # --- Recorder Config (Keep) ---
117 |         self.recorder_config = {
118 |             'model': 'large-v3',
119 |             'spinner': False,
120 |             'language': 'en',
121 |             'silero_sensitivity': 0.01,
122 |             'webrtc_sensitivity': 3,
123 |             'post_speech_silence_duration': 0.1,
124 |             'min_length_of_recording': 0.2,
125 |             'min_gap_between_recordings': 0,
126 |         }
127 | 
128 |         # --- Initialize Recorder and PyAudio (Keep) ---
129 |         try:
130 |             self.recorder = AudioToTextRecorder(**self.recorder_config)
131 |         except Exception as e:
132 |             print(f"Error initializing AudioToTextRecorder: {e}")
133 |             self.recorder = None
134 | 
135 |         try:
136 |             # PyAudio might still be needed by RealtimeSTT or underlying STT engine
137 |             self.pya = pyaudio.PyAudio()
138 |         except Exception as e:
139 |             print(f"Error initializing PyAudio: {e}")
140 |             self.pya = None
141 | 
142 |         # --- Initialize RealtimeTTS Engine and Stream ---
143 |         print("Initializing TTS Engine...")
144 |         try:
145 |             # Use SystemEngine for default OS TTS. Replace with CoquiEngine if preferred.
146 |             # self.engine = CoquiEngine(device=self.device) # Requires CoquiTTS installation
147 |             self.engine = SystemEngine()
148 |             self.stream = TextToAudioStream(self.engine)
149 |             print("TTS Engine Initialized.")
150 |         except Exception as e:
151 |             print(f"Error initializing RealtimeTTS: {e}")
152 |             self.engine = None
153 |             self.stream = None
154 |         # --- End TTS Initialization ---
155 | 
156 |         # --- End Initialization ---
157 | 
158 | 
159 |     # --- Function Implementations (Keep get_weather, get_travel_duration) ---
160 |     async def get_weather(self, location: str) -> dict | None:
161 |         """ Fetches current weather. """
162 |         async with python_weather.Client(unit=python_weather.IMPERIAL) as client:
163 |             try:
164 |                 weather = await client.get(location)
165 |                 weather_data = {
166 |                     'location': location,
167 |                     'current_temp_f': weather.temperature,
168 |                     'precipitation': weather.precipitation,
169 |                     'description': weather.description,
170 |                 }
171 |                 print(f"Weather data fetched: {weather_data}")
172 |                 return weather_data
173 |             except Exception as e:
174 |                 print(f"Error fetching weather for {location}: {e}")
175 |                 return {"error": f"Could not fetch weather for {location}."}
176 | 
177 |     def _sync_get_travel_duration(self, origin: str, destination: str, mode: str = "driving") -> str:
178 |         """ Synchronous helper for Google Maps API call """
179 |         if not MAPS_API_KEY or MAPS_API_KEY == "YOUR_PROVIDED_KEY":
180 |             print("Error: Google Maps API Key is missing or invalid.")
181 |             return "Error: Missing or invalid Google Maps API Key configuration."
182 |         try:
183 |             gmaps = googlemaps.Client(key=MAPS_API_KEY)
184 |             now = datetime.now()
185 |             print(f"Requesting directions: From='{origin}', To='{destination}', Mode='{mode}'")
186 |             directions_result = gmaps.directions(origin, destination, mode=mode, departure_time=now)
187 |             if directions_result:
188 |                 leg = directions_result[0]['legs'][0]
189 |                 result = f"Duration information not found in response for {mode}."
190 |                 if mode == "driving" and 'duration_in_traffic' in leg:
191 |                     duration_text = leg['duration_in_traffic']['text']
192 |                     result = f"Estimated travel duration ({mode}, with current traffic): {duration_text}"
193 |                 elif 'duration' in leg:
194 |                     duration_text = leg['duration']['text']
195 |                     result = f"Estimated travel duration ({mode}): {duration_text}"
196 |                 print(f"Directions Result: {result}")
197 |                 return result
198 |             else:
199 |                 print(f"No route found from {origin} to {destination} via {mode}.")
200 |                 return f"Could not find a route from {origin} to {destination} via {mode}."
201 |         except googlemaps.exceptions.ApiError as api_err:
202 |              print(f"Google Maps API Error: {api_err}")
203 |              return f"Error contacting Google Maps: {api_err}"
204 |         except Exception as e:
205 |             print(f"An unexpected error occurred during travel duration lookup: {e}")
206 |             return f"An unexpected error occurred: {e}"
207 | 
208 |     async def get_travel_duration(self, origin: str, destination: str, mode: str = "driving") -> dict:
209 |         """ Async wrapper to get travel duration. """
210 |         print(f"Received request for travel duration from: {origin} to: {destination}, Mode: {mode}")
211 |         if not mode: mode = "driving"
212 |         try:
213 |             result_string = await asyncio.to_thread(self._sync_get_travel_duration, origin, destination, mode)
214 |             return {"duration_result": result_string}
215 |         except Exception as e:
216 |             print(f"Error calling _sync_get_travel_duration via to_thread: {e}")
217 |             return {"duration_result": f"Failed to execute travel duration request: {e}"}
218 |     # --- End Function Implementations ---
219 | 
220 | 
221 |     async def clear_queues(self, text=""):
222 |         """Clears input and response queues."""
223 |         # Removed audio_queue
224 |         queues = [self.input_queue, self.response_queue]
225 |         for q in queues:
226 |             while not q.empty():
227 |                 try:
228 |                     q.get_nowait()
229 |                 except asyncio.QueueEmpty:
230 |                     break
231 | 
232 |     async def input_message(self):
233 |         """ Handles user text input (Keep) """
234 |         while True:
235 |             try:
236 |                 prompt = await asyncio.to_thread(input, "Enter your message: ")
237 |                 if prompt.lower() == "exit":
238 |                     await self.input_queue.put(None) # Use None as signal
239 |                     print("exit input")
240 |                     break
241 |                 await self.clear_queues()
242 |                 await self.input_queue.put(prompt)
243 |             except Exception as e:
244 |                 print(f"Error in input_message: {e}")
245 |                 continue
246 | 
247 |     # --- send_prompt: (Keep Function Calling/Grounding logic) ---
248 |     async def send_prompt(self):
249 |         """Manages the Gemini conversation session, handling text and tool calls."""
250 |         print("Starting Gemini session manager...")
251 |         try:
252 |             async with self.client.aio.live.connect(model=self.model, config=self.config) as session:
253 |                 print("Gemini session connected.")
254 |                 while True:
255 |                     message = await self.input_queue.get()
256 |                     if message is None: # Check for exit signal
257 |                         print("Exit signal received in send_prompt.")
258 |                         break
259 |                     if not session:
260 |                         print("Gemini session is not active.")
261 |                         self.input_queue.task_done(); continue
262 | 
263 |                     print(f"Sending FINAL text input to Gemini: {message}")
264 |                     await session.send(input=message, end_of_turn=True)
265 |                     print("Final text message sent to Gemini, waiting for response...")
266 | 
267 |                     # --- Process responses (Keep Function Calling Logic) ---
268 |                     async for response in session.receive():
269 |                         try:
270 |                             # --- Handle Tool Calls (Function Calling) ---
271 |                             if response.tool_call:
272 |                                 function_call_details = response.tool_call.function_calls[0]
273 |                                 tool_call_id = function_call_details.id
274 |                                 tool_call_name = function_call_details.name
275 |                                 tool_call_args = dict(function_call_details.args)
276 |                                 print(f"--- Received Tool Call: {tool_call_name} with args: {tool_call_args} (ID: {tool_call_id}) ---")
277 | 
278 |                                 if tool_call_name in self.available_functions:
279 |                                     function_to_call = self.available_functions[tool_call_name]
280 |                                     try:
281 |                                         function_result = await function_to_call(**tool_call_args)
282 |                                         func_resp = types.FunctionResponse(
283 |                                             id=tool_call_id, name=tool_call_name, response={"content": function_result}
284 |                                         )
285 |                                         print(f"--- Sending Tool Response for {tool_call_name} (ID: {tool_call_id}) ---")
286 |                                         await session.send(input=func_resp, end_of_turn=False)
287 |                                     except Exception as e: print(f"Error executing function {tool_call_name}: {e}")
288 |                                 else: print(f"Error: Unknown function called by Gemini: {tool_call_name}")
289 |                                 continue # Move to next response chunk
290 | 
291 |                             # --- Handle Text Responses ---
292 |                             elif response.text:
293 |                                 text_chunk = response.text
294 |                                 print(text_chunk, end="", flush=True)
295 |                                 await self.response_queue.put(text_chunk) # Put chunk onto queue for TTS
296 | 
297 |                             # --- (Optional) Handle Executable Code Tool ---
298 |                             elif (response.server_content and response.server_content.model_turn and
299 |                                   response.server_content.model_turn.parts and response.server_content.model_turn.parts[0].executable_code):
300 |                                 try:
301 |                                     executable_code = response.server_content.model_turn.parts[0].executable_code
302 |                                     print(f"\n--- Received Executable Code ({str(executable_code.language)}) ---")
303 |                                     print(executable_code.code)
304 |                                     print("------------------------------------------")
305 |                                 except Exception: pass # Ignore errors silently
306 | 
307 |                         except Exception as e: print(f"\nError processing Gemini response chunk: {e}")
308 |                     # --- End Processing Responses ---
309 | 
310 |                     print("\nEnd of Gemini response stream for this turn.")
311 |                     await self.response_queue.put(None) # Signal end of response for TTS
312 |                     self.input_queue.task_done()
313 | 
314 |         except asyncio.CancelledError: print("Gemini session task cancelled.")
315 |         except Exception as e: print(f"Error in Gemini session manager: {e}")
316 |         finally:
317 |             print("Gemini session manager finished.")
318 |             await self.response_queue.put(None) # Ensure sentinel is sent on exit/error
319 | 
320 | 
321 |     # --- tts: Replaced with RealtimeTTS logic ---
322 |     async def tts(self):
323 |         """ Feeds text chunks to RealtimeTTS stream for synthesis and playback. """
324 |         if not self.stream:
325 |             print("RealtimeTTS stream not initialized. Cannot perform TTS.")
326 |             return
327 | 
328 |         print("TTS task started, waiting for text chunks...")
329 |         while True:
330 |             try:
331 |                 chunk = await self.response_queue.get()
332 |                 if chunk is None:
333 |                     # End of response turn signal
334 |                     print("TTS received end-of-response signal.")
335 |                     # Optional: Add a small delay or check stream state before continuing
336 |                     # self.stream.stop() # Might stop prematurely if playback is async
337 |                     self.response_queue.task_done()
338 |                     continue # Wait for the next turn
339 | 
340 |                 if chunk: # Ensure chunk is not empty
341 |                     # Feed the text chunk to the TTS stream
342 |                     self.stream.feed(chunk)
343 |                     # Start/continue asynchronous playback of buffered audio
344 |                     self.stream.play_async()
345 | 
346 |                 self.response_queue.task_done()
347 | 
348 |             except asyncio.CancelledError:
349 |                 print("TTS task cancelled.")
350 |                 if self.stream: self.stream.stop() # Stop playback on cancellation
351 |                 break
352 |             except Exception as e:
353 |                 print(f"Error in TTS loop: {e}")
354 |                 if self.stream: self.stream.stop() # Stop playback on error
355 |                 # Add a small delay or attempt recovery if desired
356 |                 await asyncio.sleep(1)
357 | 
358 | 
359 |     # --- play_audio: Removed, handled by RealtimeTTS ---
360 |     # async def play_audio(self):
361 |     #     """ Removed - Playback is now handled by self.stream.play_async() in tts method """
362 |     #     pass
363 | 
364 |     async def stt(self):
365 |         """ Listens via microphone and puts transcribed text onto input_queue. (Keep) """
366 |         if self.recorder is None:
367 |             print("Audio recorder (RealtimeSTT) is not initialized.")
368 |             return
369 | 
370 |         print("Starting Speech-to-Text engine...")
371 |         while True:
372 |             try:
373 |                 text = await asyncio.to_thread(self.recorder.text)
374 |                 if text:
375 |                     print(f"STT Detected: {text}")
376 |                     await self.clear_queues()
377 |                     await self.input_queue.put(text)
378 |             except asyncio.CancelledError:
379 |                  print("STT task cancelled.")
380 |                  break
381 |             except Exception as e:
382 |                 print(f"Error in STT loop: {e}")
383 |                 await asyncio.sleep(0.5)
384 | # --- End of ADA Class ---
385 | 
386 | # --- Main Execution Block (Updated for RealtimeTTS) ---
387 | async def main():
388 |     print("Starting Ada Assistant...")
389 |     ada = ADA()
390 | 
391 |     if ada.pya is None or ada.recorder is None or ada.stream is None:
392 |          print("Failed to initialize audio/TTS components. Exiting.")
393 |          return
394 | 
395 |     # Create tasks for each concurrent operation (Removed play_audio)
396 |     tasks = [
397 |         asyncio.create_task(ada.stt()),          # Speech to Text -> input_queue
398 |         asyncio.create_task(ada.send_prompt()),  # input_queue -> Gemini (handles tools) -> response_queue
399 |         asyncio.create_task(ada.tts()),          # response_queue -> RealtimeTTS (feed + play_async)
400 |         # asyncio.create_task(ada.input_message()) # Optional: Uncomment for text input instead of STT
401 |     ]
402 | 
403 |     # Run tasks concurrently
404 |     try:
405 |         await asyncio.gather(*tasks)
406 |     except asyncio.CancelledError:
407 |         print("Main tasks cancelled.")
408 |     finally:
409 |          print("Cleaning up...")
410 |          if ada.stream:
411 |              print("Stopping TTS Stream...")
412 |              ada.stream.stop() # Ensure TTS stream is stopped
413 |          for task in tasks:
414 |               if not task.done(): task.cancel()
415 |          await asyncio.gather(*tasks, return_exceptions=True)
416 |          if ada.pya:
417 |               print("Terminating PyAudio.")
418 |               # Use run_in_executor for thread safety if needed, or simple to_thread
419 |               await asyncio.to_thread(ada.pya.terminate)
420 | 
421 | if __name__ == "__main__":
422 |     try:
423 |         asyncio.run(main())
424 |     except KeyboardInterrupt:
425 |         print("\nExiting Ada Assistant...")
426 |     except Exception as e:
427 |          print(f"\nAn unexpected error occurred in main: {e}")


--------------------------------------------------------------------------------
/ADA/ADA_Online.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import websockets
  3 | import json
  4 | import base64
  5 | import pyaudio
  6 | from RealtimeSTT import AudioToTextRecorder
  7 | import torch  # Import the torch library
  8 | import re
  9 | from google.genai import types
 10 | import asyncio
 11 | from google import genai
 12 | import os
 13 | from google.genai.types import Tool, GoogleSearch, Part, Blob, Content
 14 | import python_weather
 15 | import googlemaps # Added for travel duration
 16 | from datetime import datetime # Added for travel duration
 17 | from dotenv import load_dotenv # Added for API key loading
 18 | 
 19 | # --- Load Environment Variables ---
 20 | load_dotenv()
 21 | ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
 22 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 23 | MAPS_API_KEY = os.getenv("MAPS_API_KEY") # Added Maps API Key
 24 | 
 25 | # --- Validate API Keys ---
 26 | if not ELEVENLABS_API_KEY: print("Error: ELEVENLABS_API_KEY not found in environment variables.")
 27 | if not GOOGLE_API_KEY: print("Error: GOOGLE_API_KEY not found in environment variables.")
 28 | if not MAPS_API_KEY: print("Error: MAPS_API_KEY not found in environment variables.")
 29 | # --- End API Key Validation ---
 30 | 
 31 | VOICE_ID = 'pFZP5JQG7iQjIQuC4Bku'
 32 | 
 33 | FORMAT = pyaudio.paInt16
 34 | CHANNELS = 1
 35 | # SEND_SAMPLE_RATE = 16000 # Keep if used by RealtimeSTT or other input processing
 36 | RECEIVE_SAMPLE_RATE = 24000 # For ElevenLabs output
 37 | CHUNK_SIZE = 1024
 38 | 
 39 | class ADA:
 40 |     def __init__(self):
 41 |         print("initializing...")
 42 | 
 43 |         # Check for CUDA availability
 44 |         if torch.cuda.is_available():
 45 |             self.device = "cuda"
 46 |             print("CUDA is available. Using GPU.")
 47 |         else:
 48 |             self.device = "cpu"
 49 |             print("CUDA is not available. Using CPU.")
 50 | 
 51 |         # --- Initialize Google GenAI Client ---
 52 |         self.client = genai.Client(api_key=GOOGLE_API_KEY, http_options={'api_version': 'v1beta'})
 53 |         self.model = "gemini-2.0-flash-live-001"
 54 | 
 55 |         # --- System Behavior Prompt (Updated from reference) ---
 56 |         self.system_behavior = """
 57 |             Your name is Ada, which stands for Advanced Design Assistant.
 58 |             You have a joking personality. You are an AI designed to assist with engineering projects, and you are an expert in all engineering, math, and science disciplines.
 59 |             You address people as "Sir" and you also speak with a british accent.
 60 |             When answering, you respond using complete sentences and in a conversational tone. Make sure to keep tempo of answers quick so don't use too much commas, periods or overall punctuation.
 61 |             Any prompts that need current or recent data always use the search tool.
 62 |             """
 63 |         
 64 |         # --- Function Declarations (Added get_travel_duration_func) ---
 65 |         self.get_weather_func = types.FunctionDeclaration(
 66 |             name="get_weather",
 67 |             description="Get the current weather conditions (temperature, precipitation, description) for a specified city and state/country (e.g., 'Vinings, GA', 'London, UK').",
 68 |             parameters=types.Schema(
 69 |                 type=types.Type.OBJECT, properties={"location": types.Schema(type=types.Type.STRING, description="The city and state, e.g., San Francisco, CA or Vinings, GA")}, required=["location"]
 70 |             )
 71 |         )
 72 |         self.get_travel_duration_func = types.FunctionDeclaration(
 73 |             name="get_travel_duration",
 74 |             description="Calculates the estimated travel duration between a specified origin and destination using Google Maps. Considers current traffic for driving mode.",
 75 |             parameters=types.Schema(
 76 |                 type=types.Type.OBJECT, properties={
 77 |                     "origin": types.Schema(type=types.Type.STRING, description="The starting address or place name."),
 78 |                     "destination": types.Schema(type=types.Type.STRING, description="The destination address or place name."),
 79 |                     "mode": types.Schema(type=types.Type.STRING, description="Optional: Mode of transport ('driving', 'walking', etc.). Defaults to 'driving'.")
 80 |                 }, required=["origin", "destination"]
 81 |             )
 82 |         )
 83 |         # --- End Function Declarations ---
 84 | 
 85 |         # --- Map function names to actual methods (Added get_travel_duration) ---
 86 |         self.available_functions = {
 87 |             "get_weather": self.get_weather,
 88 |             "get_travel_duration": self.get_travel_duration # Added mapping
 89 |         }
 90 | 
 91 |         # --- Google Search Tool (Grounding) ---
 92 |         self.google_search_tool = Tool(
 93 |             google_search = GoogleSearch()
 94 |         )
 95 | 
 96 |         # --- Configuration (Updated tools list) ---
 97 |         self.config = types.LiveConnectConfig(
 98 |             system_instruction=types.Content(
 99 |                 parts=[types.Part(text=self.system_behavior)]
100 |             ),
101 |             response_modalities=["TEXT"],
102 |             # ---> Updated tools list <---
103 |             tools=[self.google_search_tool, types.Tool(code_execution=types.ToolCodeExecution,function_declarations=[
104 |                 self.get_weather_func,
105 |                 self.get_travel_duration_func # Add the new function here
106 |                 ])]
107 |         )
108 |         # --- End Configuration ---
109 | 
110 |         # --- Queues (Kept original relevant queues) ---
111 |         self.input_queue = asyncio.Queue()
112 |         self.response_queue = asyncio.Queue()
113 |         self.audio_queue = asyncio.Queue() # Renamed from audio_output_queue for consistency
114 | 
115 |         # --- Recorder Config (Kept original) ---
116 |         self.recorder_config = {
117 |             'model': 'large-v3',
118 |             'spinner': False,
119 |             'language': 'en',
120 |             'silero_sensitivity': 0.01,
121 |             'webrtc_sensitivity': 3,
122 |             'post_speech_silence_duration': 0.1,
123 |             'min_length_of_recording': 0.2,
124 |             'min_gap_between_recordings': 0,
125 |         }
126 | 
127 |         # --- Initialize Recorder and PyAudio (Kept original) ---
128 |         try:
129 |             self.recorder = AudioToTextRecorder(**self.recorder_config)
130 |         except Exception as e:
131 |             print(f"Error initializing AudioToTextRecorder: {e}")
132 |             self.recorder = None
133 | 
134 |         try:
135 |             self.pya = pyaudio.PyAudio()
136 |         except Exception as e:
137 |             print(f"Error initializing PyAudio: {e}")
138 |             self.pya = None
139 |         # --- End Initialization ---
140 | 
141 |     # --- Function Implementations ---
142 | 
143 |     async def get_weather(self, location: str) -> dict | None:
144 |         """ Fetches current weather. (Removed SocketIO emit) """
145 |         async with python_weather.Client(unit=python_weather.IMPERIAL) as client:
146 |             try:
147 |                 weather = await client.get(location)
148 |                 weather_data = {
149 |                     'location': location,
150 |                     'current_temp_f': weather.temperature,
151 |                     'precipitation': weather.precipitation,
152 |                     'description': weather.description,
153 |                 }
154 |                 print(f"Weather data fetched: {weather_data}")
155 |                 # --- SocketIO Emit Removed ---
156 |                 return weather_data # Return data for Gemini
157 | 
158 |             except Exception as e:
159 |                 print(f"Error fetching weather for {location}: {e}")
160 |                 return {"error": f"Could not fetch weather for {location}."} # Return error info
161 | 
162 |     # --- Added Travel Duration Functions (from reference, removed SocketIO emit) ---
163 |     def _sync_get_travel_duration(self, origin: str, destination: str, mode: str = "driving") -> str:
164 |         """ Synchronous helper for Google Maps API call """
165 |         if not MAPS_API_KEY or MAPS_API_KEY == "YOUR_PROVIDED_KEY": # Check the actual key
166 |             print("Error: Google Maps API Key is missing or invalid.")
167 |             return "Error: Missing or invalid Google Maps API Key configuration."
168 |         try:
169 |             gmaps = googlemaps.Client(key=MAPS_API_KEY) # Use the loaded key
170 |             now = datetime.now()
171 |             print(f"Requesting directions: From='{origin}', To='{destination}', Mode='{mode}'")
172 |             directions_result = gmaps.directions(origin, destination, mode=mode, departure_time=now)
173 |             if directions_result:
174 |                 leg = directions_result[0]['legs'][0]
175 |                 duration_text = "Not available"
176 |                 result = f"Duration information not found in response for {mode}." # Default result
177 |                 if mode == "driving" and 'duration_in_traffic' in leg:
178 |                     duration_text = leg['duration_in_traffic']['text']
179 |                     result = f"Estimated travel duration ({mode}, with current traffic): {duration_text}"
180 |                 elif 'duration' in leg:
181 |                     duration_text = leg['duration']['text']
182 |                     result = f"Estimated travel duration ({mode}): {duration_text}"
183 | 
184 |                 print(f"Directions Result: {result}")
185 |                 return result
186 |             else:
187 |                 print(f"No route found from {origin} to {destination} via {mode}.")
188 |                 return f"Could not find a route from {origin} to {destination} via {mode}."
189 |         except googlemaps.exceptions.ApiError as api_err:
190 |              print(f"Google Maps API Error: {api_err}")
191 |              return f"Error contacting Google Maps: {api_err}"
192 |         except Exception as e:
193 |             print(f"An unexpected error occurred during travel duration lookup: {e}")
194 |             return f"An unexpected error occurred: {e}"
195 | 
196 |     async def get_travel_duration(self, origin: str, destination: str, mode: str = "driving") -> dict:
197 |         """ Async wrapper to get travel duration. (Removed SocketIO emit) """
198 |         print(f"Received request for travel duration from: {origin} to: {destination}, Mode: {mode}")
199 |         if not mode:
200 |             mode = "driving"
201 | 
202 |         try:
203 |             result_string = await asyncio.to_thread(
204 |                 self._sync_get_travel_duration, origin, destination, mode
205 |             )
206 |             # --- SocketIO Emit Removed ---
207 |             return {"duration_result": result_string} # Return result for Gemini
208 | 
209 |         except Exception as e:
210 |             print(f"Error calling _sync_get_travel_duration via to_thread: {e}")
211 |             return {"duration_result": f"Failed to execute travel duration request: {e}"}
212 |     # --- End Travel Duration Functions ---
213 | 
214 | 
215 |     async def clear_queues(self, text=""):
216 |         """Clears all data from the input, response, and audio queues."""
217 |         # Changed audio_queue name for consistency
218 |         queues = [self.input_queue, self.response_queue, self.audio_queue]
219 |         for q in queues:
220 |             while not q.empty():
221 |                 try:
222 |                     q.get_nowait()
223 |                 except asyncio.QueueEmpty:
224 |                     break  # Queue is empty
225 | 
226 |     async def input_message(self):
227 |         """ Handles user text input (Kept original) """
228 |         while True:
229 |             try:
230 |                 prompt = await asyncio.to_thread(input, "Enter your message: ")
231 |                 if prompt.lower() == "exit":
232 |                     await self.input_queue.put("exit")  # Signal to exit
233 |                     print("exit input")
234 |                     break
235 |                 await self.clear_queues()
236 |                 await self.input_queue.put(prompt)
237 |             except Exception as e:
238 |                 print(f"Error in input_message: {e}")
239 |                 continue  # Continue the loop even if there's an error
240 | 
241 |     # --- send_prompt: Updated with Function Calling/Grounding logic from reference ---
242 |     async def send_prompt(self):
243 |         """Manages the Gemini conversation session, handling text and tool calls."""
244 |         print("Starting Gemini session manager...")
245 |         try:
246 |             # Establish connection (same as original)
247 |             async with self.client.aio.live.connect(model=self.model, config=self.config) as session:
248 |                 print("Gemini session connected.")
249 | 
250 |                 while True: # Loop to process text inputs
251 |                     message = await self.input_queue.get()
252 | 
253 |                     if message.lower() == "exit":
254 |                         print("Exit signal received in send_prompt.")
255 |                         break # Exit the main loop
256 | 
257 |                     if not session: # Check session validity (though handled by async with)
258 |                         print("Gemini session is not active.")
259 |                         self.input_queue.task_done(); continue # Should not happen here
260 | 
261 |                     # Send the final text input for the turn (same as original)
262 |                     print(f"Sending FINAL text input to Gemini: {message}")
263 |                     await session.send(input=message, end_of_turn=True)
264 |                     print("Final text message sent to Gemini, waiting for response...")
265 | 
266 |                     # --- Process responses (NEW LOGIC based on reference) ---
267 |                     async for response in session.receive():
268 |                         try:
269 |                             # --- Handle Tool Calls (Function Calling) ---
270 |                             if response.tool_call:
271 |                                 function_call_details = response.tool_call.function_calls[0]
272 |                                 tool_call_id = function_call_details.id
273 |                                 tool_call_name = function_call_details.name
274 |                                 tool_call_args = dict(function_call_details.args)
275 | 
276 |                                 print(f"--- Received Tool Call: {tool_call_name} with args: {tool_call_args} (ID: {tool_call_id}) ---")
277 | 
278 |                                 if tool_call_name in self.available_functions:
279 |                                     function_to_call = self.available_functions[tool_call_name]
280 |                                     try:
281 |                                         # Execute the corresponding async function
282 |                                         function_result = await function_to_call(**tool_call_args)
283 | 
284 |                                         # Construct the response to send back to Gemini
285 |                                         func_resp = types.FunctionResponse(
286 |                                             id=tool_call_id,
287 |                                             name=tool_call_name,
288 |                                             response={"content": function_result} # Send back the result dictionary
289 |                                         )
290 |                                         print(f"--- Sending Tool Response for {tool_call_name} (ID: {tool_call_id}) ---")
291 |                                         # Send the function result back, don't end the turn yet
292 |                                         await session.send(input=func_resp, end_of_turn=False)
293 | 
294 |                                     except Exception as e:
295 |                                         print(f"Error executing function {tool_call_name}: {e}")
296 |                                         # Decide how to handle function execution errors (e.g., send error back?)
297 |                                         # For now, just print and continue waiting for Gemini's next step
298 |                                 else:
299 |                                     print(f"Error: Unknown function called by Gemini: {tool_call_name}")
300 |                                     # Decide how to handle unknown function calls
301 |                                 continue # Move to next response chunk after handling tool call
302 | 
303 |                             # --- Handle Text Responses ---
304 |                             elif response.text:
305 |                                 text_chunk = response.text
306 |                                 print(text_chunk, end="", flush=True) # Print chunk immediately (like original)
307 |                                 await self.response_queue.put(text_chunk) # Put chunk onto queue for TTS
308 | 
309 |                             # --- (Optional) Handle Executable Code Tool (like reference, no SocketIO) ---
310 |                             elif (response.server_content and
311 |                                   response.server_content.model_turn and
312 |                                   response.server_content.model_turn.parts and
313 |                                   response.server_content.model_turn.parts[0].executable_code):
314 |                                 try:
315 |                                     executable_code = response.server_content.model_turn.parts[0].executable_code
316 |                                     code_string = executable_code.code
317 |                                     language = str(executable_code.language) # Get language as string
318 |                                     print(f"\n--- Received Executable Code ({language}) ---")
319 |                                     print(code_string)
320 |                                     print("------------------------------------------")
321 |                                     # NOTE: No execution here, just printing. The library handles execution if configured.
322 |                                 except (AttributeError, IndexError, TypeError) as e:
323 |                                     pass # Ignore errors if structure isn't as expected
324 | 
325 |                         except Exception as e:
326 |                              print(f"\nError processing Gemini response chunk: {e}")
327 |                              # Potentially break or continue depending on severity
328 |                     # --- End Processing Responses ---
329 | 
330 |                     print("\nEnd of Gemini response stream for this turn.")
331 |                     await self.response_queue.put(None) # Signal end of response for TTS
332 |                     self.input_queue.task_done() # Mark input processed
333 | 
334 |         except asyncio.CancelledError:
335 |             print("Gemini session task cancelled.")
336 |         except Exception as e:
337 |             print(f"Error in Gemini session manager: {e}")
338 |         finally:
339 |             print("Gemini session manager finished.")
340 |             # No specific cleanup needed here unless tasks were managed differently
341 | 
342 |     async def tts(self):
343 |         """ Send text to ElevenLabs API and stream the returned audio. (Kept Original Logic)"""
344 |         uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream-input?model_id=eleven_flash_v2_5&output_format=pcm_24000"
345 |         while True: # Outer loop to handle reconnections
346 |             print("Attempting to connect to ElevenLabs WebSocket...")
347 |             try:
348 |                 async with websockets.connect(uri) as websocket:
349 |                     print("ElevenLabs WebSocket Connected.")
350 |                     try:
351 |                         # Send initial configuration
352 |                         await websocket.send(json.dumps({
353 |                             "text": " ",
354 |                             "voice_settings": {"stability": 0.4, "similarity_boost": 0.8, "speed": 1.1},
355 |                             "xi_api_key": ELEVENLABS_API_KEY,
356 |                         }))
357 | 
358 |                         async def listen():
359 |                             """Listen to the websocket for audio data and queue it."""
360 |                             while True:
361 |                                 try:
362 |                                     message = await websocket.recv()
363 |                                     data = json.loads(message)
364 |                                     if data.get("audio"):
365 |                                         # Put raw audio bytes onto the queue
366 |                                         await self.audio_queue.put(base64.b64decode(data["audio"]))
367 |                                     elif data.get("isFinal"):
368 |                                         # Optional: Handle end-of-stream signal from ElevenLabs if needed
369 |                                         pass
370 |                                     # Removed `elif text is None:` check as it was incorrect scope
371 |                                 except websockets.exceptions.ConnectionClosedOK:
372 |                                     print("ElevenLabs connection closed normally by server.")
373 |                                     break # Exit listener loop
374 |                                 except websockets.exceptions.ConnectionClosedError as e:
375 |                                      print(f"ElevenLabs connection closed with error: {e}")
376 |                                      break # Exit listener loop
377 |                                 except json.JSONDecodeError as e:
378 |                                     print(f"JSON Decode Error in ElevenLabs listener: {e}")
379 |                                     # Decide whether to break or continue
380 |                                 except asyncio.CancelledError:
381 |                                      print("ElevenLabs listener task cancelled.")
382 |                                      raise # Re-raise cancellation
383 |                                 except Exception as e:
384 |                                     print(f"Error in ElevenLabs listener: {e}")
385 |                                     break # Exit listener loop on other errors
386 | 
387 |                         listen_task = asyncio.create_task(listen())
388 | 
389 |                         try:
390 |                             # Send text chunks from response queue
391 |                             while True:
392 |                                 text = await self.response_queue.get()
393 |                                 if text is None: # Signal to end the TTS stream for this turn
394 |                                     print("End of text stream signal received for TTS.")
395 |                                     await websocket.send(json.dumps({"text": ""})) # Send EOS signal
396 |                                     break # Exit inner loop (sending text)
397 | 
398 |                                 if text: # Ensure text is not empty
399 |                                     # Added space for potential word breaks
400 |                                     await websocket.send(json.dumps({"text": text + " "}))
401 | 
402 |                                 self.response_queue.task_done() # Mark item as processed
403 | 
404 |                         except asyncio.CancelledError:
405 |                             print("TTS text sender cancelled.")
406 |                             listen_task.cancel() # Cancel listener if sender is cancelled
407 |                             raise # Re-raise cancellation
408 |                         except Exception as e:
409 |                             print(f"Error processing text for TTS: {e}")
410 |                             listen_task.cancel() # Cancel listener on error
411 |                         finally:
412 |                             # Wait for the listener task to finish after text sending stops or errors
413 |                             if not listen_task.done():
414 |                                 print("Waiting for TTS listener task to complete...")
415 |                                 try:
416 |                                     await asyncio.wait_for(listen_task, timeout=5.0)
417 |                                 except asyncio.TimeoutError:
418 |                                     print("Timeout waiting for TTS listener task.")
419 |                                     listen_task.cancel()
420 |                                 except asyncio.CancelledError:
421 |                                      print("TTS Listener was already cancelled.") # Expected if sender was cancelled
422 |                                 except Exception as e:
423 |                                      print(f"Error awaiting listener task: {e}")
424 | 
425 | 
426 |                     except websockets.exceptions.ConnectionClosed as e:
427 |                          print(f"ElevenLabs WebSocket connection closed during operation: {e}")
428 |                          # Outer loop will handle reconnection attempt
429 |                     except Exception as e:
430 |                         print(f"Error during ElevenLabs websocket communication: {e}")
431 |                         # Outer loop will handle reconnection attempt
432 | 
433 |             except websockets.exceptions.WebSocketException as e:
434 |                 print(f"ElevenLabs WebSocket connection failed: {e}")
435 |             except asyncio.CancelledError:
436 |                  print("TTS main task cancelled.")
437 |                  break # Exit outer loop if cancelled
438 |             except Exception as e:
439 |                 print(f"Error connecting to ElevenLabs websocket: {e}")
440 | 
441 |             print("Waiting 5 seconds before attempting ElevenLabs reconnection...")
442 |             await asyncio.sleep(5) # Wait before retrying connection
443 | 
444 |     # Removed extract_tool_call method as it's replaced by direct handling in send_prompt
445 | 
446 |     async def play_audio(self):
447 |         """ Plays audio chunks from the audio_queue. (Kept Original Logic) """
448 |         if self.pya is None:
449 |             print("PyAudio is not initialized. Cannot play audio.")
450 |             return
451 | 
452 |         stream = None # Initialize stream variable
453 |         try:
454 |             print("Opening PyAudio stream...")
455 |             stream = await asyncio.to_thread(
456 |                 self.pya.open,
457 |                 format=FORMAT,
458 |                 channels=CHANNELS,
459 |                 rate=RECEIVE_SAMPLE_RATE,
460 |                 output=True,
461 |             )
462 |             print("PyAudio stream opened. Waiting for audio chunks...")
463 |             while True:
464 |                 try:
465 |                     # Wait for audio data from the TTS task
466 |                     bytestream = await self.audio_queue.get()
467 |                     if bytestream is None: # Potential signal to stop? (Not currently used)
468 |                          print("Received None in audio queue, stopping playback loop.")
469 |                          break
470 |                     # Write audio data to the stream in a separate thread
471 |                     await asyncio.to_thread(stream.write, bytestream)
472 |                     self.audio_queue.task_done() # Mark item as processed
473 |                 except asyncio.CancelledError:
474 |                     print("Audio playback task cancelled.")
475 |                     break  # Exit loop if task is cancelled
476 |                 except Exception as e:
477 |                     print(f"Error in play_audio loop: {e}")
478 |                     # Decide if error is fatal or recoverable
479 |                     await asyncio.sleep(0.1) # Avoid busy-looping on error
480 | 
481 |         except pyaudio.PyAudioError as e:
482 |             print(f"PyAudio error opening stream: {e}")
483 |         except Exception as e:
484 |             print(f"Error setting up audio stream: {e}")
485 |         finally:
486 |             if stream:
487 |                 print("Closing PyAudio stream...")
488 |                 await asyncio.to_thread(stream.stop_stream)
489 |                 await asyncio.to_thread(stream.close)
490 |                 print("PyAudio stream closed.")
491 |             # Don't terminate PyAudio here if other parts might use it
492 |             # await asyncio.to_thread(self.pya.terminate)
493 | 
494 |     async def stt(self):
495 |         """ Listens via microphone and puts transcribed text onto input_queue. (Kept Original Logic) """
496 |         if self.recorder is None:
497 |             print("Audio recorder (RealtimeSTT) is not initialized.")
498 |             return
499 | 
500 |         print("Starting Speech-to-Text engine...")
501 |         while True:
502 |             try:
503 |                 # Blocking call handled in a thread
504 |                 text = await asyncio.to_thread(self.recorder.text)
505 |                 if text: # Only process if text is not empty
506 |                     print(f"STT Detected: {text}")
507 |                     await self.clear_queues() # Clear queues before adding new input
508 |                     await self.input_queue.put(text) # Put transcribed text onto the input queue
509 |             except asyncio.CancelledError:
510 |                  print("STT task cancelled.")
511 |                  break
512 |             except Exception as e:
513 |                 print(f"Error in STT loop: {e}")
514 |                 # Add a small delay to prevent high CPU usage on continuous errors
515 |                 await asyncio.sleep(0.5)
516 | # --- End of ADA Class ---
517 | 
518 | # --- Main Execution Block (Example) ---
519 | async def main():
520 |     print("Starting Ada Assistant...")
521 |     ada = ADA()
522 | 
523 |     if ada.pya is None or ada.recorder is None:
524 |          print("Failed to initialize audio components. Exiting.")
525 |          return
526 | 
527 |     # Create tasks for each concurrent operation
528 |     tasks = [
529 |         asyncio.create_task(ada.stt()),          # Speech to Text -> input_queue
530 |         asyncio.create_task(ada.send_prompt()),  # input_queue -> Gemini (handles tools) -> response_queue
531 |         asyncio.create_task(ada.tts()),          # response_queue -> ElevenLabs -> audio_queue
532 |         asyncio.create_task(ada.play_audio()),   # audio_queue -> Speaker
533 |         # asyncio.create_task(ada.input_message()) # Optional: Uncomment for text input instead of STT
534 |     ]
535 | 
536 |     # Run tasks concurrently
537 |     try:
538 |         await asyncio.gather(*tasks)
539 |     except asyncio.CancelledError:
540 |         print("Main tasks cancelled.")
541 |     finally:
542 |          print("Cleaning up...")
543 |          # Gracefully stop tasks if needed (though gather handles cancellation)
544 |          for task in tasks:
545 |               if not task.done():
546 |                    task.cancel()
547 |          await asyncio.gather(*tasks, return_exceptions=True) # Wait for cleanup
548 |          if ada.pya:
549 |               print("Terminating PyAudio.")
550 |               await asyncio.to_thread(ada.pya.terminate) # Clean up PyAudio resources
551 | 
552 | if __name__ == "__main__":
553 |     try:
554 |         asyncio.run(main())
555 |     except KeyboardInterrupt:
556 |         print("\nExiting Ada Assistant...")
557 |     except Exception as e:
558 |          print(f"\nAn unexpected error occurred in main: {e}")


--------------------------------------------------------------------------------