├── Crow.py
├── CrowAssistant.py
├── CrowAssistant.spec
├── CrowBrain.py
├── CrowConfig.py
├── CrowSTT.py
├── LICENSE
├── README.md
├── Volume.py
├── config.json
├── crow.ico
├── images
    ├── crow-idle1.png
    ├── crow-idle2.png
    ├── crow-wingleft.png
    ├── crow-wingright.png
    ├── crowfly.png
    ├── crowhead-blink.png
    ├── crowhead-lookback.png
    ├── crowhead-tilt.png
    ├── crowhead-tiltold.png
    └── crowhead.png
└── templates
    ├── index.html
    └── settings.html


/Crow.py:
--------------------------------------------------------------------------------
  1 | import pygame
  2 | import sys
  3 | import os
  4 | import random
  5 | import psutil
  6 | import CrowConfig
  7 | import webbrowser
  8 | 
  9 | 
 10 | # For Windows, we need to set these environment variables
 11 | # to make the window clickthrough
 12 | if os.name == 'nt':
 13 |     import ctypes
 14 |     try:
 15 |         ctypes.windll.user32.SetProcessDPIAware()
 16 |     except AttributeError:
 17 |         pass
 18 |     import win32gui
 19 |     import win32con
 20 |     import win32api  # Add this import
 21 |     import win32process
 22 | # Get the directory of the script
 23 | script_dir = os.path.dirname(os.path.abspath(__file__))
 24 | 
 25 | 
 26 | # Set the working directory to the script's directory
 27 | os.chdir(script_dir)
 28 | pygame_icon = pygame.image.load('crow.ico')
 29 | pygame.display.set_icon(pygame_icon)
 30 | 
 31 | def load_sprite_animation(name):
 32 |     sprites = []
 33 |     index = 1
 34 |     while True:
 35 |         filename = f"{name}{index}.png"
 36 |         try:
 37 |             sprite = pygame.image.load(filename).convert_alpha()
 38 |             sprites.append(sprite)
 39 |             index += 1
 40 |         except FileNotFoundError:
 41 |             break
 42 |     return sprites
 43 | 
 44 | def load_sprite_sheet(filename, sprite_size):
 45 |     sprite_sheet = pygame.image.load(filename).convert_alpha()
 46 |     sprite_width, sprite_height = sprite_size
 47 |     sheet_width, sheet_height = sprite_sheet.get_size()
 48 |     sprites = []
 49 |     
 50 |     for y in range(0, sheet_height, sprite_height):
 51 |         for x in range(0, sheet_width, sprite_width):
 52 |             sprite = sprite_sheet.subsurface((x, y, sprite_width, sprite_height))
 53 |             if not is_blank(sprite):
 54 |                 sprites.append(sprite)
 55 |     
 56 |     return sprites
 57 | 
 58 | def is_blank(sprite):
 59 |     for x in range(sprite.get_width()):
 60 |         for y in range(sprite.get_height()):
 61 |             if sprite.get_at((x, y)).a != 0:
 62 |                 return False
 63 |     return True
 64 | 
 65 | 
 66 | 
 67 | class CrowAnimationController:
 68 |     def __init__(self, crow):
 69 |         self.crow = crow
 70 |         self.head_index = 0
 71 |         self.idle_index = 0
 72 |         self.fly_index = 0
 73 |         self.blink_timer = 0
 74 |         self.lookback_timer = 0
 75 | 
 76 | 
 77 |         # Animation speeds
 78 |         self.head_animation_speed = 10  # frames per second
 79 |         self.idle_animation_speed = 1  # frames per second
 80 |         self.fly_animation_speed = 10  # frames per second
 81 |         self.blink_interval = 10  # frames between blinks
 82 |         self.lookback_interval = 120  # frames between lookbacks
 83 | 
 84 |         # Timers
 85 |         self.idle_timer = 0
 86 |         self.fly_timer = 0
 87 |         self.blink_cooldown = 0
 88 |         self.lookback_cooldown = 0
 89 | 
 90 |     def update(self):
 91 |         # Update head animation based on volume
 92 |         if self.crow.volume > 0:
 93 |             target_index = int(len(self.crow.head) * self.crow.volume)
 94 |             self.head_index = min(target_index, len(self.crow.head) - 1)
 95 |         else:
 96 |             self.head_index = 0
 97 | 
 98 |         # Update idle animation
 99 |         if not self.crow.flying:
100 |             self.idle_timer += 1
101 |             if self.idle_timer >= (60 / self.idle_animation_speed):
102 |                 self.idle_index = (self.idle_index + 1) % len(self.crow.idle)
103 |                 self.idle_timer = 0
104 | 
105 |         # Update fly animation
106 |         if self.crow.flying:
107 |             self.fly_timer += 1
108 |             if self.fly_timer >= (60 / self.fly_animation_speed):
109 |                 self.fly_index = (self.fly_index + 1) % len(self.crow.fly)
110 |                 self.fly_timer = 0
111 | 
112 |         # Update blink timer
113 |         if self.crow.volume == 0:
114 |             self.blink_cooldown -= 1
115 |             if self.blink_cooldown <= 0:
116 |                 self.blink_timer = self.blink_interval
117 |                 self.blink_cooldown = random.randint(self.blink_interval, self.blink_interval * 20)
118 |                 if random.random() < 0.1:  # 10% chance to look back instead of blink
119 |                     self.lookback_timer = 120
120 |                 else:
121 |                     self.lookback_timer = 0
122 | 
123 |         # Decrement blink timer
124 |         if self.blink_timer > 0:
125 |             self.blink_timer -= 1
126 | 
127 |         # Update lookback timer
128 |         if self.lookback_timer > 0:
129 |             self.lookback_timer -= 1
130 |             self.lookback_cooldown = self.lookback_interval
131 | 
132 |         # Update lookback cooldown
133 |         if self.lookback_cooldown > 0:
134 |             self.lookback_cooldown -= 1
135 | 
136 |     def render(self, screen):
137 |         # Render body
138 |         if self.crow.flying:
139 |             screen.blit(self.crow.fly[self.fly_index], (0, 0))
140 |         else:
141 |             screen.blit(self.crow.idle[self.idle_index], (0, 0))
142 | 
143 |         # Render head
144 |         if self.crow.listen and not self.crow.Sleeping:
145 |             screen.blit(self.crow.headtilt, (0, 0))           
146 |         elif not self.crow.flying and self.lookback_timer > 0 and self.crow.volume == 0:
147 |             screen.blit(self.crow.headlookback, (0, 0))
148 |         elif self.blink_timer > 0 and self.crow.volume == 0:
149 |             screen.blit(self.crow.headblink, (0, 0))
150 |         else:
151 |             screen.blit(self.crow.head[self.head_index], (0, 0))
152 |         
153 |         return screen
154 |     
155 | def Init():
156 |     return DesktopPet.get_instance()
157 | 
158 | class DesktopPet:
159 |     _instance = None
160 | 
161 |     @classmethod
162 |     def get_instance(cls):
163 |         if cls._instance is None:
164 |             cls._instance = cls()
165 |         return cls._instance
166 |     
167 |     def __init__(self):
168 |         if DesktopPet._instance is not None:
169 |             raise Exception("This class is a singleton!")
170 |         else:
171 |             DesktopPet._instance = self
172 |         #pygame.init()
173 |         self.last_click_time = 0
174 |         self.double_click_threshold = 500  # milliseconds
175 |         self.config = CrowConfig.config()
176 |         self.Sleeping = False
177 |         self.SleepTimer = 0
178 |         self.scale = self.config.config['scale']   
179 |         # Set up the window
180 |     
181 |         self.screen = pygame.display.set_mode((64*self.scale, 64*self.scale), pygame.NOFRAME)
182 |         pygame.display.set_caption("Crow")
183 |         print("title set")
184 |         
185 |         self.right = False
186 | 
187 | 
188 |         self.volume = 0 #a float that is a voice volume from 0 to 1 
189 |         self.listen = False 
190 |         self.flying = False
191 | 
192 |         # Load sprites
193 |         self.head = load_sprite_sheet("images/crowhead.png",(64,64)) #  a set of mouth animations the last is wide open, the first is closed. we should lerp to these from the volume
194 |         self.headblink = pygame.image.load("images/crowhead-blink.png").convert_alpha() #blink randomly every once in a while as long as talking is 0
195 |         self.headlookback = pygame.image.load("images/crowhead-lookback.png").convert_alpha() #rarely instead of blink
196 |         self.headtilt = pygame.image.load("images/crowhead-tilt.png").convert_alpha()  #if listen is true, this should render    
197 |         
198 |         self.idle = load_sprite_animation("images/crow-idle") #when not moving
199 |         self.fly = load_sprite_sheet("images/crowfly.png",(64,64)) #when moving to a new spot
200 |         print("Sprites Loaded")
201 | 
202 | 
203 |         
204 |         # Set the window to be transparent
205 |         self.screen.set_colorkey((0,0,0))  # Black will be transparent, any sprites can not be black
206 |         self.screen.fill((0,0,0))
207 |         
208 |         # For Windows, set the window to be clickthrough
209 |         if os.name == 'nt':
210 |             hwnd = pygame.display.get_wm_info()["window"]
211 |             win32gui.SetWindowLong(hwnd, win32con.GWL_EXSTYLE,
212 |                                    win32gui.GetWindowLong(hwnd, win32con.GWL_EXSTYLE) | win32con.WS_EX_LAYERED)
213 |             win32gui.SetLayeredWindowAttributes(hwnd, win32api.RGB(0,0,0), 0, win32con.LWA_COLORKEY)
214 |         
215 |         
216 |         self.clock = pygame.time.Clock()
217 |         self.dragging = False
218 |         self.hwnd = pygame.display.get_wm_info()["window"]
219 |         self.set_always_on_top()
220 |         self.current_window = None
221 |         self.target_x = 0
222 |         self.target_y = 0
223 |         self.current_x = 0
224 |         self.current_y = 0
225 |         self.move_speed = 4  # Adjust this to change animation speed
226 |         self.wincheck = 0
227 | 
228 |         self.running = True
229 |         self.animation_controller = CrowAnimationController(self)
230 |         self.wincheck = 0
231 | 
232 | 
233 | 
234 |     def set_always_on_top(self):
235 |         win32gui.SetWindowPos(
236 |             self.hwnd,
237 |             win32con.HWND_TOPMOST,
238 |             0, 0, 0, 0,
239 |             win32con.SWP_NOMOVE | win32con.SWP_NOSIZE    
240 |         )
241 | 
242 | 
243 |     def get_focused_window(self):
244 |         focused = win32gui.GetForegroundWindow()
245 |         if focused == self.hwnd:
246 |             return None  # Return None if our window is focused
247 |         title = win32gui.GetWindowText(focused)
248 |         if title:
249 |             return focused
250 |         return None
251 |     
252 |     def get_window_info(self, hwnd):
253 |         if hwnd:
254 |             try:
255 |                 rect = win32gui.GetWindowRect(hwnd)
256 |                 return rect
257 |             except win32gui.error:
258 |                 return None
259 |         return None
260 | 
261 |     def move_to_window(self, hwnd):
262 |         if hwnd and hwnd != self.hwnd:  # Only move if it's not our own window
263 |             rect = self.get_window_info(hwnd)
264 |             if rect:
265 |                 window_width = rect[2] - rect[0]
266 |                 window_bottom = rect[3]
267 |                 
268 |                 # Set new target x and y positions
269 |                 self.target_x = random.randint(rect[0], rect[2] - self.screen.get_width())
270 |                 self.target_y = window_bottom - self.screen.get_height()   
271 |             else:
272 |                 # If we can't get window info, move to a default position
273 |                 self.target_x = 0
274 |                 self.target_y = win32api.GetSystemMetrics(win32con.SM_CYSCREEN) - self.screen.get_height()
275 | 
276 |     def move_to_taskbar_clock(self):
277 |         # Get the screen size
278 |         screen_width = win32api.GetSystemMetrics(win32con.SM_CXSCREEN)
279 |         screen_height = win32api.GetSystemMetrics(win32con.SM_CYSCREEN)
280 | 
281 |         # Get the taskbar height
282 |         taskbar_hwnd = win32gui.FindWindow("Shell_TrayWnd", None)
283 |         if taskbar_hwnd:
284 |             taskbar_rect = win32gui.GetWindowRect(taskbar_hwnd)
285 |             taskbar_height = taskbar_rect[3] - taskbar_rect[1]
286 |         else:
287 |             # If we can't find the taskbar, assume a default height
288 |             taskbar_height = 40
289 | 
290 |         # Calculate the position
291 |         # We'll position it slightly to the left of the very corner to avoid overlapping with any system tray icons
292 |         offset_from_right = 100  # Adjust this value as needed
293 |         self.target_x = screen_width - self.screen.get_width() - offset_from_right
294 |         self.target_y = screen_height - self.screen.get_height() - taskbar_height
295 | 
296 |     def update_position(self):
297 |         # Calculate the distance between the current position and the target position
298 |         dx = self.target_x - self.current_x
299 |         dy = self.target_y - self.current_y
300 |         
301 |         # Calculate the length of the distance vector
302 |         distance = (dx ** 2 + dy ** 2) ** 0.5
303 |         
304 |         # If the distance is very small, just snap to the target position
305 |         if distance < self.move_speed:
306 |             self.current_x = self.target_x
307 |             self.current_y = self.target_y
308 |             self.flying = False
309 |         else:
310 |             # Move the pet by a fraction of the distance each frame
311 |             fraction = self.move_speed / distance
312 |             self.current_x += dx * fraction
313 |             self.current_y += dy * fraction
314 |             self.flying = True
315 |             if self.target_x > self.current_x:
316 |                 self.right = True
317 |             else:
318 |                 self.right = False
319 |         
320 |         # Set new window position
321 |         win32gui.SetWindowPos(self.hwnd, 0, 
322 |                             int(self.current_x),
323 |                             int(self.current_y),
324 |                             0, 0, win32con.SWP_NOSIZE | win32con.SWP_NOZORDER)
325 |         
326 | 
327 |             
328 | 
329 |     def launch_webpage(self):
330 |         url = "http://127.0.0.1:" + str(self.config.config['port'])
331 |         webbrowser.open(url)
332 | 
333 | 
334 |     def Update(self):
335 |         #while running:
336 |         for event in pygame.event.get():
337 |             if event.type == pygame.QUIT:
338 |                 running = False
339 |             elif event.type == pygame.MOUSEBUTTONDOWN:
340 |                 if event.button == 1:  # Left mouse button
341 |                     current_time = pygame.time.get_ticks()
342 |                     if current_time - self.last_click_time < self.double_click_threshold:
343 |                         self.launch_webpage()
344 |                     self.last_click_time = current_time
345 | 
346 |  
347 | 
348 |         self.wincheck+=1
349 |         if self.wincheck > 300:
350 |             self.wincheck = 0
351 |             if(self.Sleeping):
352 |                 self.move_to_taskbar_clock()
353 |             else:
354 |                 # Check for new focused window
355 |                 focused_window = self.get_focused_window()
356 |                 if focused_window and focused_window != self.current_window:
357 |                     self.current_window = focused_window
358 |                     self.move_to_window(self.current_window)
359 |                     self.last_window_info = self.get_window_info(self.current_window)
360 |             
361 |             # Check if current window has moved or resized
362 |         if not self.Sleeping and self.current_window:
363 |             current_window_info = self.get_window_info(self.current_window)
364 |             if not self.flying and current_window_info != self.last_window_info:
365 |                 self.move_to_window(self.current_window)
366 |                 self.last_window_info = current_window_info
367 | 
368 |         # Update position for animation
369 |         self.update_position()
370 |         
371 |         
372 |         # Clear the screen
373 |         self.screen.fill((0,0,0))  # Fill with the transparent color
374 |         
375 |         # Draw the current sprite
376 |         temp_screen = pygame.Surface((64, 64), pygame.SRCALPHA)
377 |         self.animation_controller.update()
378 |         self.animation_controller.render(temp_screen)
379 | 
380 |         if self.right:
381 |             temp_screen = pygame.transform.flip(temp_screen, True, False)
382 | 
383 |         scaled_screen = pygame.transform.scale(temp_screen, (64 * self.scale, 64 * self.scale))
384 | 
385 |         self.screen.blit(scaled_screen, (0, 0))
386 |         # Update the display
387 |         pygame.display.flip()
388 |         
389 |         self.SleepTimer += self.clock.get_time()
390 |         self.clock.tick(60)
391 |     
392 |     def End(self):
393 |         self.running = False
394 |         print("crow end")
395 |         pygame.quit()
396 | 


--------------------------------------------------------------------------------
/CrowAssistant.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | from queue import Queue
  4 | import pyaudio
  5 | from CrowSTT import AudioToTextRecorder
  6 | import random
  7 | import string
  8 | import time
  9 | import keyboard
 10 | import threading
 11 | import Crow
 12 | import CrowBrain
 13 | import wave
 14 | import numpy as np
 15 | import re
 16 | import sys
 17 | import shlex
 18 | import requests
 19 | import ctypes
 20 | import Volume
 21 | import CrowConfig
 22 | # import zipfile
 23 | # from urllib.parse import urlparse
 24 | # import shutil
 25 | 
 26 | 
 27 | ##At some point I want to make it so it downloads the things it needs for TTS etc... but for now we'll do it manually
 28 | # def download_and_extract(target_file, url):
 29 | #     # Determine the filename from the URL
 30 | #     parsed_url = urlparse(url)
 31 | #     download_filename = os.path.basename(parsed_url.path) or 'downloaded_file'
 32 |     
 33 | #     # If the target file exists, we'll still proceed with the download
 34 | #     # as we want to update all files in the zip
 35 | #     if os.path.exists(target_file):
 36 | #         print(f"Note: {target_file} already exists, but we'll proceed with the download and update.")
 37 | 
 38 | #     print(f"Downloading from {url}")
 39 | #     try:
 40 | #         response = requests.get(url, stream=True)
 41 | #         response.raise_for_status()  # Raises an HTTPError for bad requests
 42 | 
 43 | #         # Save the downloaded content
 44 | #         with open(download_filename, 'wb') as file:
 45 | #             for chunk in response.iter_content(chunk_size=8192):
 46 | #                 file.write(chunk)
 47 | #         print(f"File {download_filename} has been downloaded successfully.")
 48 | 
 49 | #         # Process the downloaded file
 50 | #         if download_filename.lower().endswith('.zip'):
 51 | #             print(f"Extracting all files from {download_filename}")
 52 | #             try:
 53 | #                 with zipfile.ZipFile(download_filename, 'r') as zip_ref:
 54 | #                     # Extract all contents, overwriting existing files
 55 | #                     zip_ref.extractall(path=".", members=None)
 56 | #                 print("All files have been extracted successfully.")
 57 |                 
 58 | #                 # Verify if the target file was part of the extracted files
 59 | #                 if os.path.exists(target_file):
 60 | #                     print(f"Successfully obtained {target_file}")
 61 | #                 else:
 62 | #                     print(f"Warning: {target_file} was not found in the extracted files.")
 63 | #             except zipfile.BadZipFile:
 64 | #                 print("Error: The downloaded file is not a valid zip file.")
 65 | #                 return
 66 | #         else:
 67 | #             # If it's not a zip, just rename it to the target file
 68 | #             os.replace(download_filename, target_file)
 69 | #             print(f"Downloaded file renamed to {target_file}")
 70 | 
 71 | #     except requests.RequestException as e:
 72 | #         print(f"Error downloading file: {e}")
 73 | #     finally:
 74 | #         # Clean up the downloaded zip file if it exists
 75 | #         if os.path.exists(download_filename) and download_filename != target_file:
 76 | #             os.remove(download_filename)
 77 | #             print(f"Cleaned up {download_filename}")
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | def delete_wav_files():
 84 |     # Get the directory of the current script
 85 |     script_dir = os.path.dirname(os.path.abspath(__file__))
 86 |     
 87 |     # Construct the path to the 'wav' folder
 88 |     wav_folder = os.path.join(script_dir, 'wav')
 89 |     
 90 |     # Check if the 'wav' folder exists
 91 |     if not os.path.exists(wav_folder):
 92 |         print(f"The folder {wav_folder} does not exist.")
 93 |         return
 94 |     
 95 |     try:
 96 |         # Iterate over all files in the 'wav' folder
 97 |         for filename in os.listdir(wav_folder):
 98 |             file_path = os.path.join(wav_folder, filename)
 99 |             
100 |             # Check if it's a file (not a subdirectory)
101 |             if os.path.isfile(file_path):
102 |                 # Delete the file
103 |                 os.remove(file_path)
104 |                 print(f"Deleted: {filename}")
105 |         
106 |         print("All files in the 'wav' folder have been deleted.")
107 |     
108 |     except Exception as e:
109 |         print(f"An error occurred: {e}")
110 | 
111 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
112 | # Fetch the absolute path of the script
113 | script_path = os.path.abspath(__file__)
114 | 
115 | # Extract the directory from the absolute path
116 | script_dir = os.path.dirname(script_path)
117 | os.chdir(script_dir)
118 | 
119 | 
120 | #download_and_extract('piper.exe', 'https://github.com/rhasspy/piper/releases/download/2023.11.14-2/piper_windows_amd64.zip')
121 | 
122 | 
123 | # Global queue for TTS files
124 | tts_queue = Queue()
125 | inputque = Queue()
126 | 
127 | is_playing = False
128 | is_talking = False
129 | outputdev = 12
130 | 
131 | # Initialize PyAudio
132 | audio = pyaudio.PyAudio()
133 | 
134 | voicenum = 113# config.config['voice']
135 | 
136 | 
137 | 
138 | # Initialize CrowBrain
139 | brain = None# CrowBrain.Init()
140 | current_conversation_id = 1
141 | 
142 | saidname = False
143 | 
144 | def handle_completed_sentence(sentence):
145 |     global current_conversation_id
146 |     global brain
147 |     global voicenum
148 |     if not is_playing:
149 |         #wintts(sentence, "en_US-libritts_r-medium.onnx -s " + str(voicenum))
150 |         #return
151 |         # Generate response using CrowBrain
152 |         response = brain.generate(sentence, current_conversation_id)
153 |         if 'error' not in response:
154 |             ai_response = response['content']
155 |             wintts(ai_response, "en_US-libritts_r-medium.onnx -s " + str(voicenum))
156 |         else:
157 |             print(f"Error in AI response: {response['error']}")
158 |             wintts(response['error'], "en_US-libritts_r-medium.onnx -s " + str(voicenum))
159 | 
160 | def test_voice(vid):
161 |     global voicenum
162 |     voicenum = vid
163 |     wintts("This is my voice for number " + str(vid), "en_US-libritts_r-medium.onnx -s " + str(vid))
164 | 
165 | def contains_word(text, word):
166 |     pattern = r'\b' + re.escape(word) + r'\b'
167 |     return bool(re.search(pattern, text, re.IGNORECASE))
168 | 
169 | 
170 | def wintts(text, model):
171 |     global config
172 |     global saidname
173 |     global stopplayback
174 |     stopplayback=False
175 |     saidname = contains_word(text, config.config['name'])
176 | 
177 |     # Clean up the text
178 |     text = re.sub(r"\'", "", text)
179 |     text = re.sub(r"\*", "", text)
180 |     text = text.strip()
181 |     remove_chars = "&<>[]|^%:\""
182 |     text = "".join(char for char in text if char not in remove_chars)
183 | 
184 |     # Split the text into sentences or lines
185 |     sentences = re.split(r'(?<=[.!?])\s+|\n', text)
186 |     
187 |     # Process each non-empty sentence
188 |     for sentence in sentences:
189 |         sentence = sentence.strip()
190 |         if not sentence:
191 |             continue
192 | 
193 |         print(sentence)
194 | 
195 |         # Generate a random filename
196 |         random_filename = ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + ".wav"
197 |         random_filename = os.path.join("wav", random_filename)
198 | 
199 |         # Use shell escaping for the sentence to handle special characters
200 |         safe_sentence = shlex.quote(sentence)
201 |         
202 |         command = f"echo {safe_sentence} | piper -m {model} -f {random_filename}"
203 |         os.system(command)
204 | 
205 |         # Add the file to the queue
206 |         tts_queue.put(random_filename)
207 | 
208 | def play_and_delete_wav():
209 |     global stopplayback
210 |     global Running
211 |     global is_playing
212 |     global is_talking
213 |     global recorder
214 |     global saidname
215 |     while Running:
216 |         if not tts_queue.empty():
217 |             filename = tts_queue.get()
218 |             print(filename)
219 |             if(not stopplayback):
220 |                 is_talking=True
221 |                 play_wav(filename)  # Assuming play_wav is a function to play WAV files
222 |             os.remove(filename)  # Delete the WAV file after playing
223 |             tts_queue.task_done()
224 |         else:
225 |             if is_talking:
226 |                 
227 |                 #print("Playing Done")
228 |                 recorder.interrupt_stop_event.set()
229 |                 recorder.stop()
230 |                 time.sleep(0.1)
231 |                 is_talking=False
232 |                 saidname=False
233 |             time.sleep(0.1)  # Sleep for a short duration to avoid busy waiting
234 |     print("WAVE THREAD END")
235 | 
236 | wf = None
237 | vol = 0
238 | 
239 | stopplayback = False
240 | 
241 | 
242 | def callback(in_data, frame_count, time_info, status):
243 |     global vol
244 |     global wf
245 |     global stopplayback
246 |     if stopplayback:
247 |         return (None, pyaudio.paComplete)
248 |     # Read data from file
249 |     data = wf.readframes(frame_count)
250 |     d = np.frombuffer(data, dtype=np.int16)
251 |     v = np.average(np.abs(d))  
252 |     if (not np.isnan(v)):
253 |         vol = v * .0001
254 |     else:
255 |         vol = 0
256 | 
257 |     return (data, pyaudio.paContinue)
258 | 
259 | 
260 | 
261 | def play_wav(wavefile):
262 |     #print("wavstart")
263 |     athread = threading.Thread(target=wavethread, args=(wavefile,))
264 |     athread.start()
265 |     while is_playing:
266 |         time.sleep(0.01)
267 |     #print("end of fun")
268 | 
269 | def wavethread(wavefile):
270 |     global wf  # Make wf global so it can be accessed by callback
271 |     global is_playing
272 |     global outputdev
273 |     is_playing = True
274 |     # Open the wav file
275 |     wf = wave.open(wavefile, 'rb')
276 | 
277 |     p = pyaudio.PyAudio()
278 |     RATE = wf.getframerate()
279 |     CHUNK = int(RATE / 10)
280 | 
281 |     stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
282 |                     channels=wf.getnchannels(),
283 |                     rate=RATE,
284 |                     output=True,
285 |                     output_device_index=outputdev,
286 |                     frames_per_buffer=CHUNK,
287 |                     stream_callback=callback)
288 | 
289 |     # Start the stream
290 |     stream.start_stream()
291 | 
292 |     # Keep the script running while the audio is playing
293 |     while stream.is_active():
294 |         time.sleep(0.1)
295 | 
296 |     # Stop stream
297 |     stream.stop_stream()
298 |     stream.close()
299 | 
300 |     # Close PyAudio and wave file
301 |     wf.close()
302 |     p.terminate()
303 |     is_playing=False
304 | 
305 | 
306 | def process_text(text):
307 |     global mute
308 |     global Running
309 |     global Sleeping
310 |     print(text)
311 |     if not mute and not Sleeping and Running:
312 |         handle_completed_sentence(text)
313 | 
314 | mute = False
315 | recorder = None
316 | listening = False
317 | 
318 | def my_start_callback():
319 |     global is_talking
320 |     global listening
321 |     global vc
322 |     global lastvolume
323 |     print("Recording started!")
324 |     if(not is_talking):
325 |         listening=True
326 |         lastvolume = vc.get_volume()
327 |         print(lastvolume)
328 |         vc.set_volume(25)
329 | 
330 | def my_stop_callback():
331 |     global listening
332 |     listening=False
333 |     vc.set_volume(lastvolume)
334 |     print("Recording stopped!")
335 | 
336 | def tupdate(text):
337 |     global stopplayback
338 |     global brain
339 |     global saidname
340 |     global current_conversation_id
341 |     global Sleeping
342 |     global crow
343 |     global is_talking
344 |     global listening
345 |     print(text)
346 |     if Sleeping:
347 |         if(contains_word(text.lower(),config.config['name'].lower())):
348 |             print("wake")
349 |             Sleeping = False
350 |             crow.SleepTimer = 0
351 |     else:
352 |         crow.SleepTimer = 0
353 |     if is_talking:
354 |         if(contains_word(text.lower(),config.config['name'].lower()) and not saidname):
355 |             #stop current playback
356 |             print("INTERUPT")
357 |             stopplayback=True
358 |             brain.addSystemMessage("[Interupted]",current_conversation_id)
359 |             return
360 |     else:
361 |         listening=True
362 | 
363 | def transcriptstart():
364 |     global crow
365 |     crow.SleepTimer = 0
366 | 
367 | 
368 | 
369 | whisperprompt = ""
370 | 
371 | 
372 | 
373 | def aibrains():
374 |     global Running
375 |     global mute
376 |     global recorder
377 |     print("BRAINS")
378 |     try:
379 |         print('Listening ... (press Shift + ESC to exit)')
380 |         
381 |         while Running:
382 |             recorder.text(process_text)
383 |        
384 |     except Exception as e:
385 |         print(f'An error occurred in aibrains: {e}')
386 |         
387 |     finally:
388 |         print("end brain")
389 |         Running = False
390 | 
391 | 
392 | recorder = None
393 | Sleeping = True
394 | Running = True
395 | crow = None
396 | config = None
397 | 
398 | vc = Volume.VolumeControl(outputdev)
399 | lastvolume = vc.get_volume()
400 | 
401 | def main():
402 |     global crow
403 |     global Running
404 |     global mute
405 |     global voicenum
406 |     global vol
407 |     global is_playing
408 |     global listening
409 |     global Sleeping
410 |     global recorder
411 |     global outputdev
412 |     #list_output_devices()
413 | 
414 |     print("MAIN")
415 | 
416 |     print(lastvolume)
417 |     try:
418 | 
419 |         while Running:
420 |             #Sleeping=False
421 | 
422 |             #update crow visuals
423 |             crow.listen = listening
424 |             crow.Sleeping = Sleeping
425 |             if(is_playing):
426 |                 crow.volume = vol
427 |                 crow.SleepTimer= 0
428 |             crow.Update()
429 | 
430 | 
431 |             if(listening):
432 |                 crow.SleepTimer= 0
433 | 
434 |             if(not Sleeping and crow.SleepTimer>15000):
435 |                 print("Sleep Mode")
436 |                 Sleeping=True
437 |                 crow.SleepTimer = 0
438 | 
439 |             if keyboard.is_pressed('Esc') and keyboard.is_pressed('Shift'):
440 |                 print("Escape key pressed. Exiting loop.")
441 |                 break
442 |             if(not crow.running):
443 |                 break
444 |         
445 |     except Exception as e:
446 |         print(f'An error occurred in the main loop: {e}')
447 |     finally:
448 |         Running = False
449 |         shutdown()
450 | 
451 | def shutdown():
452 |     global Running, recorder, brain, crow
453 |     print("Initiating shutdown...")
454 |     
455 |     # Stop the main loop
456 |     Running = False
457 |     
458 |     # Stop the recorder
459 |     if recorder:
460 |         print("Shutting down recorder...")
461 |         recorder.abort()
462 |         recorder.shutdown()
463 |     
464 | 
465 |     # Stop the brain server
466 |     if brain and brain.server_thread:
467 |         print("Shutting down brain server...")
468 |         brain.app.config['TESTING'] = True  # This should make the server more responsive to shutdown
469 |         requests.get('http://localhost:5000/shutdown')  # Assuming you add a /shutdown route
470 |         brain.server_thread.join(timeout=5)
471 |     
472 |     # Stop Crow
473 |     if crow:
474 |         print("Shutting down Crow...")
475 |         crow.End()
476 |     
477 |     # Close PyAudio
478 |     #if 'audio' in globals():
479 |         print("Closing PyAudio...")
480 |         audio.terminate()
481 | 
482 |     print("Forcing termination of remaining threads...")
483 |     for thread in [brainthread, playback_thread]:
484 |         if thread and thread.is_alive():
485 |             force_thread_termination(thread)
486 | 
487 |     print("Shutdown complete. Exiting...")
488 |     os._exit(0)  # Force exit the Python process
489 |     
490 |     print("Shutdown complete.")
491 | 
492 | def force_thread_termination(thread):
493 |     if thread.is_alive():
494 |         print(f"Force terminating thread: {thread.name}")
495 |         tid = thread.ident
496 |         if tid is not None:
497 |             res = ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), ctypes.py_object(SystemExit))
498 |             if res > 1:
499 |                 ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(tid), None)
500 |                 print("Exception raise failed")
501 | 
502 | 
503 | def name_to_index(device_name, is_input):
504 |     p = pyaudio.PyAudio()
505 |     for i in range(p.get_device_count()):
506 |         device_info = p.get_device_info_by_index(i)
507 |         if device_info['name'] == device_name:
508 |             if (is_input and device_info['maxInputChannels'] > 0) or \
509 |                (not is_input and device_info['maxOutputChannels'] > 0):
510 |                 p.terminate()
511 |                 return i
512 |     p.terminate()
513 |     return None  # Device not found
514 | 
515 | def get_audio_output_devices():
516 |     """
517 |     Prints a list of audio output devices and their device indices on Windows.
518 |     """
519 |     p = pyaudio.PyAudio()
520 |     
521 |     print("Audio Output Devices:")
522 |     for i in range(p.get_device_count()):
523 |         device_info = p.get_device_info_by_index(i)
524 |         if device_info["maxOutputChannels"] > 0:
525 |             print(f"{i}: {device_info['name']}")
526 |     
527 |     p.terminate()
528 | 
529 | if __name__ == '__main__':
530 | 
531 |     get_audio_output_devices()
532 |     delete_wav_files()
533 |     config = CrowConfig.config()
534 |     ainame = config.config['name']
535 |     whisperprompt = "Talking to" + ainame
536 |     voicenum = config.config['voice']
537 |     micnum = name_to_index( config.config['mic'],True)
538 |     outputdev = name_to_index( config.config['speaker'],False)
539 |     print("Output Device: " + str(outputdev))
540 |     if(micnum is None):
541 |         micnum=0
542 |         print("Mic Not Set")
543 | 
544 |     recorder_config = {
545 |         'input_device_index': micnum,
546 |         'spinner': False,
547 |         'model': 'base.en',
548 |         'language': 'en',
549 |         'silero_sensitivity': 0.4,
550 |         'silero_use_onnx': True,
551 |         'webrtc_sensitivity': 2,
552 |         'device':'cuda',
553 |         'post_speech_silence_duration': 1.0,
554 |         'min_length_of_recording': 0,
555 |         'min_gap_between_recordings': 0,
556 |         'enable_realtime_transcription': True,
557 |         'realtime_processing_pause': 0.2,
558 |         'realtime_model_type': 'tiny.en',
559 |         'on_recording_start': my_start_callback,
560 |         'on_recording_stop': my_stop_callback,
561 |         'on_transcription_start': transcriptstart,
562 |         'on_realtime_transcription_update': tupdate, 
563 |         #'on_vad_detect_start': vadstart,
564 |         #'on_vad_detect_stop': vadstop,
565 |         #'on_realtime_transcription_stabilized': process_text,
566 |         'initial_prompt':whisperprompt,
567 |     }
568 |     recorder = AudioToTextRecorder(**recorder_config)
569 |     brainthread = threading.Thread(target=aibrains)
570 |     brainthread.start()
571 |     time.sleep(1)
572 |     playback_thread = threading.Thread(target=play_and_delete_wav)
573 |     playback_thread.daemon = True  # Daemonize thread
574 |     playback_thread.start()
575 |     brain = CrowBrain.Init()
576 |     brain.config = config
577 |     brain.set_test_voice_callback(test_voice)
578 |     crow = Crow.Init()
579 |     wintts("Crow is Online", "en_US-libritts_r-medium.onnx -s " + str(voicenum))
580 |     main()
581 |     print("END")


--------------------------------------------------------------------------------
/CrowAssistant.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python ; coding: utf-8 -*-
 2 | 
 3 | block_cipher = None
 4 | 
 5 | a = Analysis(
 6 |     ['CrowAssistant.py'],
 7 |     pathex=[],
 8 |     binaries=[],
 9 |     datas=[],
10 |     hiddenimports=['tiktoken_ext.openai_public', 'tiktoken_ext'],
11 |     hookspath=[],
12 |     hooksconfig={},
13 |     runtime_hooks=[],
14 |     excludes=[],
15 |     noarchive=False,
16 |     optimize=0,
17 |     win_no_prefer_redirects=False,
18 |     win_private_assemblies=False,
19 |     cipher=block_cipher
20 | )
21 | pyz = PYZ(a.pure, a.zipped_data,
22 |           cipher=block_cipher)
23 | 
24 | exe = EXE(
25 |     pyz,
26 |     a.scripts,
27 |     [],
28 |     exclude_binaries=True,
29 |     name='CrowAssistant',
30 |     debug=False,
31 |     bootloader_ignore_signals=False,
32 |     strip=False,
33 |     upx=True,
34 |     console=True,
35 |     disable_windowed_traceback=False,
36 |     argv_emulation=False,
37 |     target_arch=None,
38 |     codesign_identity=None,
39 |     entitlements_file=None,
40 |     icon=['crow.ico'],
41 | )
42 | coll = COLLECT(
43 |     exe,
44 |     a.binaries,
45 |     a.datas,
46 |     strip=False,
47 |     upx=True,
48 |     upx_exclude=[],
49 |     name='CrowAssistant',
50 | )
51 | 


--------------------------------------------------------------------------------
/CrowBrain.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from datetime import datetime
  4 | from flask import Flask, request, jsonify, render_template
  5 | from flask_sqlalchemy import SQLAlchemy
  6 | from sqlalchemy import desc
  7 | from openai import OpenAI
  8 | import threading
  9 | import tiktoken
 10 | import CrowConfig
 11 | import pyaudio
 12 | 
 13 | class CrowBrain:
 14 |     _instance = None
 15 | 
 16 |     @classmethod
 17 |     def get_instance(cls):
 18 |         if cls._instance is None:
 19 |             cls._instance = cls()
 20 |         return cls._instance
 21 |     
 22 |    
 23 |     def set_test_voice_callback(self, callback):
 24 |         """Set the callback function for testing voice."""
 25 |         self.test_voice_callback = callback
 26 | 
 27 |     def test_voice(self, voice_id):
 28 |         if self.test_voice_callback is None:
 29 |             print("Test voice callback not set")
 30 |             return
 31 |         
 32 |         # Call the callback function
 33 |         self.test_voice_callback(voice_id)
 34 |     
 35 |     def count_tokens(self, messages):
 36 |         """Count the number of tokens in a list of messages."""
 37 |         num_tokens = 0
 38 |         for message in messages:
 39 |             num_tokens += 4  # Every message follows <im_start>{role/name}\n{content}<im_end>\n
 40 |             for key, value in message.items():
 41 |                 num_tokens += len(self.encoding.encode(value))
 42 |                 if key == "name":  # If there's a name, the role is omitted
 43 |                     num_tokens -= 1  # Role is always required and always 1 token
 44 |         num_tokens += 2  # Every reply is primed with <im_start>assistant
 45 |         return num_tokens
 46 | 
 47 |     def trim_messages(self, messages, max_tokens):
 48 |         """Trim the messages to fit within max_tokens."""
 49 |         while self.count_tokens(messages) > max_tokens:
 50 |             # Remove the second message (keeping the first system message)
 51 |             if len(messages) > 1:
 52 |                 messages.pop(1)
 53 |             else:
 54 |                 # If we're down to one message and still over the limit, truncate it
 55 |                 content = messages[0]['content']
 56 |                 messages[0]['content'] = self.encoding.decode(self.encoding.encode(content)[:max_tokens])
 57 |                 break
 58 |         return messages
 59 |     
 60 |     def __init__(self):
 61 |         if CrowBrain._instance is not None:
 62 |             raise Exception("This class is a singleton!")
 63 |         else:
 64 |             CrowBrain._instance = self
 65 |         
 66 |         self.config = CrowConfig.config()
 67 |         #self.name = "Crow"
 68 |         self.thecrow = None
 69 |         #self.max_tokens = 32000  # Maximum context length for mixtral-8x7b-32768
 70 |         self.encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")  # This works for most models
 71 |         #self.max_messages = 100  # Maximum number of messages to retrieve from the database
 72 |       
 73 |         # Get the directory of the current script
 74 |         base_dir = os.path.abspath(os.path.dirname(__file__))
 75 | 
 76 |         # Create the path for your database file
 77 |         db_path = os.path.join(base_dir, 'conversations.db')
 78 | 
 79 |         self.app = Flask(__name__)
 80 |         self.app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}'
 81 |         self.app.config['TEMPLATES_AUTO_RELOAD'] = True  # Enable template auto-reloading
 82 |         self.db = SQLAlchemy(self.app)
 83 | 
 84 |         self.client = OpenAI(api_key=self.config.config['api_key'], base_url=self.config.config['url'])
 85 | 
 86 |         class Conversation(self.db.Model):
 87 |             id = self.db.Column(self.db.Integer, primary_key=True)
 88 |             name = self.db.Column(self.db.String(100))
 89 |             created_at = self.db.Column(self.db.DateTime, default=datetime.utcnow)
 90 | 
 91 |         class Message(self.db.Model):
 92 |             id = self.db.Column(self.db.Integer, primary_key=True)
 93 |             conversation_id = self.db.Column(self.db.Integer, self.db.ForeignKey('conversation.id'))
 94 |             role = self.db.Column(self.db.String(50))
 95 |             content = self.db.Column(self.db.Text)
 96 |             timestamp = self.db.Column(self.db.DateTime, default=datetime.utcnow)
 97 |             conversation = self.db.relationship('Conversation', backref=self.db.backref('messages', lazy=True))
 98 | 
 99 |         self.Conversation = Conversation
100 |         self.Message = Message
101 | 
102 |         with self.app.app_context():
103 |             self.db.create_all()
104 | 
105 |         self.setup_routes()
106 |         self.server_thread = None
107 | 
108 |     def run_server(self, host='0.0.0.0', port=5000):
109 |         self.app.run(host=host, port=port, debug=False, use_reloader=False)
110 | 
111 |     def start_server_thread(self):
112 |         if self.server_thread is None or not self.server_thread.is_alive():
113 |             self.server_thread = threading.Thread(target=self.run_server)
114 |             self.server_thread.start()
115 | 
116 | 
117 | 
118 |     def load_config(self):
119 |         self.config.load_config()
120 | 
121 |     def save_config(self):
122 |         self.config.save_config()
123 | 
124 | 
125 |     def new_conversation(self):
126 |         with self.app.app_context():
127 |             conversation = self.Conversation(name=f"Conversation {self.Conversation.query.count() + 1}")
128 |             self.db.session.add(conversation)
129 |             self.db.session.commit()
130 |             return {"conversation_id": conversation.id, "conversation_name": conversation.name}
131 | 
132 |     def get_conversations(self):
133 |         with self.app.app_context():
134 |             conversations = self.Conversation.query.all()
135 |             return {"conversations": [{"id": c.id, "name": c.name} for c in conversations]}
136 | 
137 |     def select_conversation(self, conversation_id):
138 |         with self.app.app_context():
139 |             messages = self.Message.query.filter_by(conversation_id=conversation_id).order_by(self.Message.timestamp).all()
140 |             return {"conversation_log": [{"role": m.role, "content": m.content} for m in messages]}
141 | 
142 |     def addSystemMessage(self, input_text, conversation_id):
143 |         def add_message():
144 |             user_message = self.Message(conversation_id=conversation_id, role="system", content=input_text)
145 |             self.db.session.add(user_message)
146 |             self.db.session.commit()
147 | 
148 |         with self.app.app_context():
149 |             add_message()
150 | 
151 |     def generate(self, input_text, conversation_id):
152 |         with self.app.app_context():
153 |             ainame = self.config.config['name']
154 |             personality = self.config.config['personality']
155 |             self.system_message = {"role": "system", "content": "You are a semi-sentient AI named "+ ainame +". "+personality+" You hear and talk with speech to text and text to speech, don't use descriptions of what you are doing. Be concise and direct in your responses."}
156 | 
157 |             conversation = self.Conversation.query.get(conversation_id)
158 |             if not conversation:
159 |                 # Create a new conversation
160 |                 print("create new conversation")
161 |                 conversation = self.Conversation(name=f"Conversation {self.Conversation.query.count() + 1}")
162 |                 self.db.session.add(conversation)
163 |                 self.db.session.commit()
164 |                 conversation_id = conversation.id
165 | 
166 |             user_message = self.Message(conversation_id=conversation_id, role="user", content=input_text)
167 |             self.db.session.add(user_message)
168 |             self.db.session.commit()
169 | 
170 |             
171 |                         # Retrieve the most recent messages, including the new one
172 |             recent_messages = self.Message.query.filter_by(conversation_id=conversation_id) \
173 |                 .order_by(desc(self.Message.timestamp)) \
174 |                 .limit(self.config.config['maxmsg']) \
175 |                 .all()
176 |             
177 |             # Reverse the order to get chronological order
178 |             recent_messages = recent_messages[::-1]
179 |             
180 |             messages_for_api = [{"role": m.role, "content": m.content} for m in recent_messages]
181 | 
182 |             # Calculate tokens for system message
183 |             system_message_tokens = self.count_tokens([self.system_message])
184 | 
185 |             # Trim messages to fit within token limit, leaving room for system message
186 |             trimmed_messages = self.trim_messages(messages_for_api, self.config.config['maxtoken'] - system_message_tokens)
187 | 
188 |             # Add the system message at the beginning after trimming
189 |             final_messages = [self.system_message] + trimmed_messages
190 |             
191 |             try:
192 |                 response = self.client.chat.completions.create(
193 |                     model=self.config.config['model'],
194 |                     messages=final_messages
195 |                 )
196 |                 ai_message_content = response.choices[0].message.content
197 |             except Exception as e:
198 |                 print(e)
199 |                 return {"error": str(e)}
200 | 
201 |             ai_message = self.Message(conversation_id=conversation_id, role="assistant", content=ai_message_content)
202 |             self.db.session.add(ai_message)
203 |             self.db.session.commit()
204 |             return {"role": "assistant", "content": ai_message_content}
205 | 
206 |     def delete_conversation(self, conversation_id):
207 |         with self.app.app_context():
208 |             conversation = self.Conversation.query.get(conversation_id)
209 |             if conversation:
210 |                 self.Message.query.filter_by(conversation_id=conversation_id).delete()
211 |                 self.db.session.delete(conversation)
212 |                 self.db.session.commit()
213 |                 return {"status": "success"}
214 |             else:
215 |                 return {"status": "error", "message": "Conversation not found"}
216 |             
217 |     def list_audio_input_names(self):
218 |         p = pyaudio.PyAudio()
219 |         input_devices = []
220 |         for i in range(p.get_device_count()):
221 |             device_info = p.get_device_info_by_index(i)
222 |             if device_info['maxInputChannels'] > 0:
223 |                 input_devices.append(device_info['name'])
224 |         p.terminate()
225 |         return input_devices
226 | 
227 |     def list_audio_output_names(self):
228 |         p = pyaudio.PyAudio()
229 |         output_devices = []
230 |         for i in range(p.get_device_count()):
231 |             device_info = p.get_device_info_by_index(i)
232 |             if device_info['maxOutputChannels'] > 0:
233 |                 output_devices.append(device_info['name'])
234 |         p.terminate()
235 |         return output_devices
236 | 
237 |     def setup_routes(self):
238 |         @self.app.route('/')
239 |         def index():
240 |             return render_template('index.html')
241 | 
242 |         @self.app.route('/new_conversation', methods=['POST'])
243 |         def new_conversation_route():
244 |             return jsonify(self.new_conversation())
245 | 
246 |         @self.app.route('/get_conversations', methods=['GET'])
247 |         def get_conversations_route():
248 |             return jsonify(self.get_conversations())
249 | 
250 |         @self.app.route('/select_conversation', methods=['GET'])
251 |         def select_conversation_route():
252 |             conversation_id = request.args.get('conversation_id')
253 |             return jsonify(self.select_conversation(conversation_id))
254 | 
255 |         @self.app.route('/generate', methods=['POST'])
256 |         def generate_route():
257 |             input_text = request.form['input_text']
258 |             conversation_id = request.form['conversation_id']
259 |             return jsonify(self.generate(input_text, conversation_id))
260 | 
261 |         @self.app.route('/delete_conversation', methods=['POST'])
262 |         def delete_conversation_route():
263 |             conversation_id = request.form['conversation_id']
264 |             return jsonify(self.delete_conversation(conversation_id))
265 |         
266 |         @self.app.route('/shutdown', methods=['GET'])
267 |         def shutdown():
268 |             func = request.environ.get('werkzeug.server.shutdown')
269 |             if func is None:
270 |                 raise RuntimeError('Not running with the Werkzeug Server')
271 |             func()
272 |             return 'Server shutting down...'
273 |         
274 |         @self.app.route('/settings', methods=['GET', 'POST'])
275 |         def settings_route():
276 |             if request.method == 'POST':
277 |                 # Update config with form data
278 |                 self.config.config['name'] = request.form['name']
279 |                 self.config.config['personality'] = request.form['personality']
280 |                 self.config.config['voice'] = int(request.form['voice'])
281 |                 self.config.config['url'] = request.form['url']
282 |                 self.config.config['api_key'] = request.form['api_key']
283 |                 self.config.config['model'] = request.form['model']
284 |                 self.config.config['scale'] = int(request.form['scale'])           
285 |                 self.config.config['mic'] = request.form['mic']
286 |                 self.config.config['speaker'] = request.form['speaker']
287 |                 self.config.config['maxtoken'] = int(request.form['maxtoken'])
288 |                 self.config.config['maxmsg'] = int(request.form['maxmsg'])
289 |                 self.save_config()
290 |                 return jsonify({"status": "success"})
291 |             else:
292 |                 input_devices = self.list_audio_input_names()
293 |                 output_devices = self.list_audio_output_names()
294 |                 return render_template('settings.html', config=self.config.config, input_devices=input_devices, output_devices=output_devices)
295 | 
296 |         @self.app.route('/test_voice', methods=['POST'])
297 |         def test_voice_route():
298 |             voice_id = int(request.form['voice'])
299 |             self.test_voice(voice_id)
300 |             return jsonify({"status": "success"})
301 | 
302 |         
303 |  
304 | 
305 | def Init():
306 |     brain = CrowBrain.get_instance()
307 |     brain.start_server_thread()
308 |     return CrowBrain.get_instance()
309 | 


--------------------------------------------------------------------------------
/CrowConfig.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | class config:
 5 | 
 6 |     def __init__(self):
 7 |         self.config = {
 8 |             "name": "Crow",
 9 |             "personality": "",
10 |             "voice": 1,
11 |             "url": "https://api.groq.com/openai/v1",
12 |             "api_key": os.environ.get("API_KEY", ""),
13 |             "model": "mixtral-8x7b-32768",
14 |             "mic":"default",
15 |             "speaker":"default",
16 |             "scale":3,
17 |             "port":5000,
18 |             "maxtoken":32000,
19 |             "maxmsg":100,
20 |         }
21 |         self.CONFIG_FILE = 'config.json'
22 |         self.load_config()
23 | 
24 |     def load_config(self):
25 |         print("Loading Config")
26 |         if os.path.exists(self.CONFIG_FILE):
27 |             with open(self.CONFIG_FILE, 'r') as f:
28 |                 self.config = json.load(f)
29 |         else:
30 |             print("no config")
31 |             #we need to launch the settings window
32 |       
33 |     def save_config(self):
34 |         with open(self.CONFIG_FILE, 'w') as f:
35 |             json.dump(self.config, f, indent=2)
36 |     
37 | 
38 | 


--------------------------------------------------------------------------------
/CrowSTT.py:
--------------------------------------------------------------------------------
   1 | from typing import Iterable, List, Optional, Union
   2 | import torch.multiprocessing as mp
   3 | import torch
   4 | from typing import List, Union
   5 | from ctypes import c_bool
   6 | from scipy.signal import resample
   7 | from scipy import signal
   8 | import faster_whisper
   9 | import collections
  10 | import numpy as np
  11 | import traceback
  12 | import threading
  13 | import webrtcvad
  14 | # import itertools
  15 | import platform
  16 | import pyaudio
  17 | import logging
  18 | import struct
  19 | import halo
  20 | import time
  21 | import copy
  22 | import os
  23 | import re
  24 | import gc
  25 | 
  26 | # Set OpenMP runtime duplicate library handling to OK (Use only for development!)
  27 | #os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
  28 | 
  29 | INIT_MODEL_TRANSCRIPTION = "tiny"
  30 | INIT_MODEL_TRANSCRIPTION_REALTIME = "tiny"
  31 | INIT_REALTIME_PROCESSING_PAUSE = 0.2
  32 | INIT_SILERO_SENSITIVITY = 0.4
  33 | INIT_WEBRTC_SENSITIVITY = 3
  34 | INIT_POST_SPEECH_SILENCE_DURATION = 0.6
  35 | INIT_MIN_LENGTH_OF_RECORDING = 0.5
  36 | INIT_MIN_GAP_BETWEEN_RECORDINGS = 0
  37 | INIT_WAKE_WORDS_SENSITIVITY = 0.6
  38 | INIT_PRE_RECORDING_BUFFER_DURATION = 1.0
  39 | INIT_WAKE_WORD_ACTIVATION_DELAY = 0.0
  40 | INIT_WAKE_WORD_TIMEOUT = 5.0
  41 | INIT_WAKE_WORD_BUFFER_DURATION = 0.1
  42 | ALLOWED_LATENCY_LIMIT = 10
  43 | 
  44 | TIME_SLEEP = 0.02
  45 | SAMPLE_RATE = 16000
  46 | BUFFER_SIZE = 512
  47 | INT16_MAX_ABS_VALUE = 32768.0
  48 | 
  49 | INIT_HANDLE_BUFFER_OVERFLOW = False
  50 | if platform.system() != 'Darwin':
  51 |     INIT_HANDLE_BUFFER_OVERFLOW = True
  52 | 
  53 | 
  54 | class AudioToTextRecorder:
  55 |     _instance = None
  56 | 
  57 |     @classmethod
  58 |     def get_instance(cls):
  59 |         if cls._instance is None:
  60 |             cls._instance = cls()
  61 |         return cls._instance
  62 | 
  63 | 
  64 |     def __init__(self,
  65 |                  model: str = INIT_MODEL_TRANSCRIPTION,
  66 |                  language: str = "",
  67 |                  compute_type: str = "default",
  68 |                  input_device_index: int = None,
  69 |                  gpu_device_index: Union[int, List[int]] = 0,
  70 |                  device: str = "cuda",
  71 |                  on_recording_start=None,
  72 |                  on_recording_stop=None,
  73 |                  on_transcription_start=None,
  74 |                  ensure_sentence_starting_uppercase=True,
  75 |                  ensure_sentence_ends_with_period=True,
  76 |                  use_microphone=True,
  77 |                  spinner=True,
  78 |                  level=logging.WARNING,
  79 | 
  80 |                  # Realtime transcription parameters
  81 |                  enable_realtime_transcription=False,
  82 |                  realtime_model_type=INIT_MODEL_TRANSCRIPTION_REALTIME,
  83 |                  realtime_processing_pause=INIT_REALTIME_PROCESSING_PAUSE,
  84 |                  on_realtime_transcription_update=None,
  85 |                  on_realtime_transcription_stabilized=None,
  86 | 
  87 |                  # Voice activation parameters
  88 |                  silero_sensitivity: float = INIT_SILERO_SENSITIVITY,
  89 |                  silero_use_onnx: bool = False,
  90 |                  webrtc_sensitivity: int = INIT_WEBRTC_SENSITIVITY,
  91 |                  post_speech_silence_duration: float = (
  92 |                      INIT_POST_SPEECH_SILENCE_DURATION
  93 |                  ),
  94 |                  min_length_of_recording: float = (
  95 |                      INIT_MIN_LENGTH_OF_RECORDING
  96 |                  ),
  97 |                  min_gap_between_recordings: float = (
  98 |                      INIT_MIN_GAP_BETWEEN_RECORDINGS
  99 |                  ),
 100 |                  pre_recording_buffer_duration: float = (
 101 |                      INIT_PRE_RECORDING_BUFFER_DURATION
 102 |                  ),
 103 |                  on_vad_detect_start=None,
 104 |                  on_vad_detect_stop=None,
 105 | 
 106 |                  on_recorded_chunk=None,
 107 |                  debug_mode=False,
 108 |                  handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW,
 109 |                  beam_size: int = 5,
 110 |                  beam_size_realtime: int = 3,
 111 |                  buffer_size: int = BUFFER_SIZE,
 112 |                  sample_rate: int = SAMPLE_RATE,
 113 |                  initial_prompt: Optional[Union[str, Iterable[int]]] = None,
 114 |                  suppress_tokens: Optional[List[int]] = [-1],
 115 |                  ):
 116 |         print("STARTING STT")
 117 |         mp.freeze_support()
 118 |         if AudioToTextRecorder._instance is not None:
 119 |             raise Exception("This class is a singleton!")
 120 |         else:
 121 |             AudioToTextRecorder._instance = self
 122 |         
 123 |         self.language = language
 124 |         self.compute_type = compute_type
 125 |         self.input_device_index = input_device_index
 126 |         self.gpu_device_index = gpu_device_index
 127 |         self.device = device
 128 |         # self.wake_words = wake_words
 129 |         # self.wake_word_activation_delay = wake_word_activation_delay
 130 |         # self.wake_word_timeout = wake_word_timeout
 131 |         # self.wake_word_buffer_duration = wake_word_buffer_duration
 132 |         self.ensure_sentence_starting_uppercase = (
 133 |             ensure_sentence_starting_uppercase
 134 |         )
 135 |         self.ensure_sentence_ends_with_period = (
 136 |             ensure_sentence_ends_with_period
 137 |         )
 138 |         self.use_microphone = mp.Value(c_bool, use_microphone)
 139 |         self.min_gap_between_recordings = min_gap_between_recordings
 140 |         self.min_length_of_recording = min_length_of_recording
 141 |         self.pre_recording_buffer_duration = pre_recording_buffer_duration
 142 |         self.post_speech_silence_duration = post_speech_silence_duration
 143 |         self.on_recording_start = on_recording_start
 144 |         self.on_recording_stop = on_recording_stop
 145 |         # self.on_wakeword_detected = on_wakeword_detected
 146 |         # self.on_wakeword_timeout = on_wakeword_timeout
 147 |         self.on_vad_detect_start = on_vad_detect_start
 148 |         self.on_vad_detect_stop = on_vad_detect_stop
 149 |         # self.on_wakeword_detection_start = on_wakeword_detection_start
 150 |         # self.on_wakeword_detection_end = on_wakeword_detection_end
 151 |         self.on_recorded_chunk = on_recorded_chunk
 152 |         self.on_transcription_start = on_transcription_start
 153 |         self.enable_realtime_transcription = enable_realtime_transcription
 154 |         self.realtime_model_type = realtime_model_type
 155 |         self.realtime_processing_pause = realtime_processing_pause
 156 |         self.on_realtime_transcription_update = (
 157 |             on_realtime_transcription_update
 158 |         )
 159 |         self.on_realtime_transcription_stabilized = (
 160 |             on_realtime_transcription_stabilized
 161 |         )
 162 |         self.debug_mode = debug_mode
 163 |         self.handle_buffer_overflow = handle_buffer_overflow
 164 |         self.beam_size = beam_size
 165 |         self.beam_size_realtime = beam_size_realtime
 166 |         self.allowed_latency_limit = ALLOWED_LATENCY_LIMIT
 167 | 
 168 |         self.level = level
 169 |         self.audio_queue = mp.Queue()
 170 |         self.buffer_size = buffer_size
 171 |         self.sample_rate = sample_rate
 172 |         self.recording_start_time = 0
 173 |         self.recording_stop_time = 0
 174 |         self.wake_word_detect_time = 0
 175 |         self.silero_check_time = 0
 176 |         self.silero_working = False
 177 |         self.speech_end_silence_start = 0
 178 |         self.silero_sensitivity = silero_sensitivity
 179 |         self.listen_start = 0
 180 |         self.spinner = spinner
 181 |         self.halo = None
 182 |         self.state = "inactive"
 183 |         self.wakeword_detected = False
 184 |         self.text_storage = []
 185 |         self.realtime_stabilized_text = ""
 186 |         self.realtime_stabilized_safetext = ""
 187 |         self.is_webrtc_speech_active = False
 188 |         self.is_silero_speech_active = False
 189 |         self.recording_thread = None
 190 |         self.realtime_thread = None
 191 |         self.audio_interface = None
 192 |         self.audio = None
 193 |         self.stream = None
 194 |         self.start_recording_event = threading.Event()
 195 |         self.stop_recording_event = threading.Event()
 196 |         self.last_transcription_bytes = None
 197 |         self.initial_prompt = initial_prompt
 198 |         self.suppress_tokens = suppress_tokens
 199 |         # self.use_wake_words = wake_words or wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}
 200 | 
 201 |         # Initialize the logging configuration with the specified level
 202 |         log_format = 'RealTimeSTT: %(name)s - %(levelname)s - %(message)s'
 203 | 
 204 |         # Create a logger
 205 |         logger = logging.getLogger()
 206 |         logger.setLevel(level)  # Set the root logger's level
 207 | 
 208 |         # Create a file handler and set its level
 209 |         file_handler = logging.FileHandler('realtimesst.log')
 210 |         file_handler.setLevel(logging.DEBUG)
 211 |         file_handler.setFormatter(logging.Formatter(log_format))
 212 | 
 213 |         # Create a console handler and set its level
 214 |         console_handler = logging.StreamHandler()
 215 |         console_handler.setLevel(level)
 216 |         console_handler.setFormatter(logging.Formatter(log_format))
 217 | 
 218 |         # Add the handlers to the logger
 219 |         logger.addHandler(file_handler)
 220 |         logger.addHandler(console_handler)
 221 | 
 222 |         self.is_shut_down = False
 223 |         self.shutdown_event = mp.Event()
 224 | 
 225 |         try:
 226 |             logging.debug("Explicitly setting the multiprocessing start method to 'spawn'")
 227 |             mp.set_start_method('spawn')
 228 |         except RuntimeError as e:
 229 |             logging.debug(f"Start method has already been set. Details: {e}")
 230 | 
 231 |         logging.info("Starting RealTimeSTT")
 232 | 
 233 |         self.interrupt_stop_event = mp.Event()
 234 |         self.was_interrupted = mp.Event()
 235 |         self.main_transcription_ready_event = mp.Event()
 236 |         self.parent_transcription_pipe, child_transcription_pipe = mp.Pipe()
 237 | 
 238 |         # Set device for model
 239 |         self.device = "cuda" if self.device == "cuda" and torch.cuda.is_available() else "cpu"
 240 | 
 241 |         self.transcript_process = self._start_thread(
 242 |             target=AudioToTextRecorder._transcription_worker,
 243 |             args=(
 244 |                 child_transcription_pipe,
 245 |                 model,
 246 |                 self.compute_type,
 247 |                 self.gpu_device_index,
 248 |                 self.device,
 249 |                 self.main_transcription_ready_event,
 250 |                 self.shutdown_event,
 251 |                 self.interrupt_stop_event,
 252 |                 self.beam_size,
 253 |                 self.initial_prompt,
 254 |                 self.suppress_tokens
 255 |             )
 256 |         )
 257 | 
 258 |         # Start audio data reading process
 259 |         if self.use_microphone.value:
 260 |             logging.info("Initializing audio recording"
 261 |                          " (creating pyAudio input stream,"
 262 |                          f" sample rate: {self.sample_rate}"
 263 |                          f" buffer size: {self.buffer_size}"
 264 |                          )
 265 |             self.reader_process = self._start_thread(
 266 |                 target=AudioToTextRecorder._audio_data_worker,
 267 |                 args=(
 268 |                     self.audio_queue,
 269 |                     self.sample_rate,
 270 |                     self.buffer_size,
 271 |                     self.input_device_index,
 272 |                     self.shutdown_event,
 273 |                     self.interrupt_stop_event,
 274 |                     self.use_microphone
 275 |                 )
 276 |             )
 277 | 
 278 |         # Initialize the realtime transcription model
 279 |         if self.enable_realtime_transcription:
 280 |             try:
 281 |                 logging.info("Initializing faster_whisper realtime "
 282 |                              f"transcription model {self.realtime_model_type}"
 283 |                              )
 284 |                 self.realtime_model_type = faster_whisper.WhisperModel(
 285 |                     model_size_or_path=self.realtime_model_type,
 286 |                     device=self.device,
 287 |                     compute_type=self.compute_type,
 288 |                     device_index=self.gpu_device_index
 289 |                 )
 290 | 
 291 |             except Exception as e:
 292 |                 logging.exception("Error initializing faster_whisper "
 293 |                                   f"realtime transcription model: {e}"
 294 |                                   )
 295 |                 raise
 296 | 
 297 |             logging.debug("Faster_whisper realtime speech to text "
 298 |                           "transcription model initialized successfully")
 299 | 
 300 |       
 301 |         # Setup voice activity detection model WebRTC
 302 |         try:
 303 |             logging.info("Initializing WebRTC voice with "
 304 |                          f"Sensitivity {webrtc_sensitivity}"
 305 |                          )
 306 |             self.webrtc_vad_model = webrtcvad.Vad()
 307 |             self.webrtc_vad_model.set_mode(webrtc_sensitivity)
 308 | 
 309 |         except Exception as e:
 310 |             logging.exception("Error initializing WebRTC voice "
 311 |                               f"activity detection engine: {e}"
 312 |                               )
 313 |             raise
 314 | 
 315 |         logging.debug("WebRTC VAD voice activity detection "
 316 |                       "engine initialized successfully"
 317 |                       )
 318 | 
 319 |         # Setup voice activity detection model Silero VAD
 320 |         try:
 321 |             self.silero_vad_model, _ = torch.hub.load(
 322 |                 repo_or_dir="snakers4/silero-vad",
 323 |                 model="silero_vad",
 324 |                 verbose=False,
 325 |                 onnx=silero_use_onnx
 326 |             )
 327 | 
 328 |         except Exception as e:
 329 |             logging.exception(f"Error initializing Silero VAD "
 330 |                               f"voice activity detection engine: {e}"
 331 |                               )
 332 |             raise
 333 | 
 334 |         logging.debug("Silero VAD voice activity detection "
 335 |                       "engine initialized successfully"
 336 |                       )
 337 | 
 338 |         self.audio_buffer = collections.deque(
 339 |             maxlen=int((self.sample_rate // self.buffer_size) *
 340 |                        self.pre_recording_buffer_duration)
 341 |         )
 342 |         self.frames = []
 343 | 
 344 |         # Recording control flags
 345 |         self.is_recording = False
 346 |         self.is_running = True
 347 |         self.start_recording_on_voice_activity = False
 348 |         self.stop_recording_on_voice_deactivity = False
 349 | 
 350 |         # Start the recording worker thread
 351 |         self.recording_thread = threading.Thread(target=self._recording_worker)
 352 |         self.recording_thread.daemon = True
 353 |         self.recording_thread.start()
 354 | 
 355 |         # Start the realtime transcription worker thread
 356 |         self.realtime_thread = threading.Thread(target=self._realtime_worker)
 357 |         self.realtime_thread.daemon = True
 358 |         self.realtime_thread.start()
 359 |                    
 360 |         # Wait for transcription models to start
 361 |         logging.debug('Waiting for main transcription model to start')
 362 |         self.main_transcription_ready_event.wait()
 363 |         logging.debug('Main transcription model ready')
 364 | 
 365 |         logging.debug('RealtimeSTT initialization completed successfully')
 366 |                    
 367 |     def _start_thread(self, target=None, args=()):
 368 | 
 369 |         if (platform.system() == 'Linux'):
 370 |             thread = threading.Thread(target=target, args=args)
 371 |             thread.deamon = True
 372 |             thread.start()
 373 |             return thread
 374 |         else:
 375 |             thread = mp.Process(target=target, args=args)
 376 |             thread.start()
 377 |             return thread
 378 | 
 379 |     @staticmethod
 380 |     def _transcription_worker(conn,
 381 |                               model_path,
 382 |                               compute_type,
 383 |                               gpu_device_index,
 384 |                               device,
 385 |                               ready_event,
 386 |                               shutdown_event,
 387 |                               interrupt_stop_event,
 388 |                               beam_size,
 389 |                               initial_prompt,
 390 |                               suppress_tokens
 391 |                               ):
 392 |   
 393 | 
 394 |         logging.info("Initializing faster_whisper "
 395 |                      f"main transcription model {model_path}"
 396 |                      )
 397 | 
 398 |         try:
 399 |             model = faster_whisper.WhisperModel(
 400 |                 model_size_or_path=model_path,
 401 |                 device=device,
 402 |                 compute_type=compute_type,
 403 |                 device_index=gpu_device_index,
 404 |             )
 405 | 
 406 |         except Exception as e:
 407 |             logging.exception("Error initializing main "
 408 |                               f"faster_whisper transcription model: {e}"
 409 |                               )
 410 |             raise
 411 | 
 412 |         ready_event.set()
 413 | 
 414 |         logging.debug("Faster_whisper main speech to text "
 415 |                       "transcription model initialized successfully"
 416 |                       )
 417 | 
 418 |         while not shutdown_event.is_set():
 419 |             try:
 420 |                 if conn.poll(0.5):
 421 |                     audio, language = conn.recv()
 422 |                     try:
 423 |                         segments = model.transcribe(
 424 |                             audio,
 425 |                             language=language if language else None,
 426 |                             beam_size=beam_size,
 427 |                             initial_prompt=initial_prompt,
 428 |                             suppress_tokens=suppress_tokens
 429 |                         )
 430 |                         segments = segments[0]
 431 |                         transcription = " ".join(seg.text for seg in segments)
 432 |                         transcription = transcription.strip()
 433 |                         conn.send(('success', transcription))
 434 |                     except Exception as e:
 435 |                         logging.error(f"General transcription error: {e}")
 436 |                         conn.send(('error', str(e)))
 437 |                 else:
 438 |                     # If there's no data, sleep / prevent busy waiting
 439 |                     time.sleep(0.02)
 440 |             except KeyboardInterrupt:
 441 |                 interrupt_stop_event.set()
 442 |                 logging.debug("Transcription worker process "
 443 |                               "finished due to KeyboardInterrupt"
 444 |                               )
 445 |                 break
 446 | 
 447 |     @staticmethod
 448 |     def _audio_data_worker(audio_queue,
 449 |                            sample_rate,
 450 |                            buffer_size,
 451 |                            input_device_index,
 452 |                            shutdown_event,
 453 |                            interrupt_stop_event,
 454 |                            use_microphone):
 455 | 
 456 |         try:
 457 |             audio_interface = pyaudio.PyAudio()
 458 |             if input_device_index is None:
 459 |                 default_device = audio_interface.get_default_input_device_info()
 460 |                 input_device_index = default_device['index']
 461 |             stream = audio_interface.open(
 462 |                 rate=sample_rate,
 463 |                 format=pyaudio.paInt16,
 464 |                 channels=1,
 465 |                 input=True,
 466 |                 frames_per_buffer=buffer_size,
 467 |                 input_device_index=input_device_index,
 468 |                 )
 469 | 
 470 |         except Exception as e:
 471 |             logging.exception("Error initializing pyaudio "
 472 |                               f"audio recording: {e}"
 473 |                               )
 474 |             raise
 475 | 
 476 |         logging.debug("Audio recording (pyAudio input "
 477 |                       "stream) initialized successfully"
 478 |                       )
 479 | 
 480 |         try:
 481 |             while not shutdown_event.is_set():
 482 |                 try:
 483 |                     data = stream.read(buffer_size)
 484 | 
 485 |                 except OSError as e:
 486 |                     if e.errno == pyaudio.paInputOverflowed:
 487 |                         logging.warning("Input overflowed. Frame dropped.")
 488 |                     else:
 489 |                         logging.error(f"Error during recording: {e}")
 490 |                     tb_str = traceback.format_exc()
 491 |                     print(f"Traceback: {tb_str}")
 492 |                     print(f"Error: {e}")
 493 |                     continue
 494 | 
 495 |                 except Exception as e:
 496 |                     logging.error(f"Error during recording: {e}")
 497 |                     tb_str = traceback.format_exc()
 498 |                     print(f"Traceback: {tb_str}")
 499 |                     print(f"Error: {e}")
 500 |                     continue
 501 | 
 502 |                 if use_microphone.value:
 503 |                     audio_queue.put(data)
 504 | 
 505 |         except KeyboardInterrupt:
 506 |             interrupt_stop_event.set()
 507 |             logging.debug("Audio data worker process "
 508 |                           "finished due to KeyboardInterrupt"
 509 |                           )
 510 |         finally:
 511 |             stream.stop_stream()
 512 |             stream.close()
 513 |             audio_interface.terminate()
 514 | 
 515 |     def wakeup(self):
 516 |  
 517 |         self.listen_start = time.time()
 518 | 
 519 |     def abort(self):
 520 |         self.start_recording_on_voice_activity = False
 521 |         self.stop_recording_on_voice_deactivity = False
 522 |         self._set_state("inactive")
 523 |         self.interrupt_stop_event.set()
 524 |         self.was_interrupted.wait()
 525 |         self.was_interrupted.clear()
 526 | 
 527 |     def wait_audio(self):
 528 | 
 529 | 
 530 |         self.listen_start = time.time()
 531 | 
 532 |         # If not yet started recording, wait for voice activity to initiate.
 533 |         if not self.is_recording and not self.frames:
 534 |             self._set_state("listening")
 535 |             self.start_recording_on_voice_activity = True
 536 | 
 537 |             # Wait until recording starts
 538 |             while not self.interrupt_stop_event.is_set():
 539 |                 if self.start_recording_event.wait(timeout=0.02):
 540 |                     break
 541 | 
 542 |         # If recording is ongoing, wait for voice inactivity
 543 |         # to finish recording.
 544 |         if self.is_recording:
 545 |             self.stop_recording_on_voice_deactivity = True
 546 | 
 547 |             # Wait until recording stops
 548 |             while not self.interrupt_stop_event.is_set():
 549 |                 if (self.stop_recording_event.wait(timeout=0.02)):
 550 |                     break
 551 | 
 552 |         # Convert recorded frames to the appropriate audio format.
 553 |         audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
 554 |         self.audio = audio_array.astype(np.float32) / INT16_MAX_ABS_VALUE
 555 |         self.frames.clear()
 556 | 
 557 |         # Reset recording-related timestamps
 558 |         self.recording_stop_time = 0
 559 |         self.listen_start = 0
 560 | 
 561 |         self._set_state("inactive")
 562 | 
 563 |     def transcribe(self):
 564 | 
 565 |         self._set_state("transcribing")
 566 |         audio_copy = copy.deepcopy(self.audio)
 567 |         self.parent_transcription_pipe.send((self.audio, self.language))
 568 |         status, result = self.parent_transcription_pipe.recv()
 569 | 
 570 |         self._set_state("inactive")
 571 |         if status == 'success':
 572 |             self.last_transcription_bytes = audio_copy
 573 |             return self._preprocess_output(result)
 574 |         else:
 575 |             logging.error(result)
 576 |             raise Exception(result)
 577 | 
 578 |     def _process_wakeword(self, data):
 579 |         """
 580 |         Processes audio data to detect wake words.
 581 |         """
 582 |         if self.wakeword_backend in {'pvp', 'pvporcupine'}:
 583 |             pcm = struct.unpack_from(
 584 |                 "h" * self.buffer_size,
 585 |                 data
 586 |             )
 587 |             porcupine_index = self.porcupine.process(pcm)
 588 |             if self.debug_mode:
 589 |                 print (f"wake words porcupine_index: {porcupine_index}")
 590 |             return self.porcupine.process(pcm)
 591 | 
 592 |         elif self.wakeword_backend in {'oww', 'openwakeword', 'openwakewords'}:
 593 |             pcm = np.frombuffer(data, dtype=np.int16)
 594 |             prediction = self.owwModel.predict(pcm)
 595 |             max_score = -1
 596 |             max_index = -1
 597 |             wake_words_in_prediction = len(self.owwModel.prediction_buffer.keys())
 598 |             self.wake_words_sensitivities
 599 |             if wake_words_in_prediction:
 600 |                 for idx, mdl in enumerate(self.owwModel.prediction_buffer.keys()):
 601 |                     scores = list(self.owwModel.prediction_buffer[mdl])
 602 |                     if scores[-1] >= self.wake_words_sensitivity and scores[-1] > max_score:
 603 |                         max_score = scores[-1]
 604 |                         max_index = idx
 605 |                 if self.debug_mode:
 606 |                     print (f"wake words oww max_index, max_score: {max_index} {max_score}")
 607 |                 return max_index  
 608 |             else:
 609 |                 if self.debug_mode:
 610 |                     print (f"wake words oww_index: -1")
 611 |                 return -1
 612 | 
 613 |         if self.debug_mode:        
 614 |             print("wake words no match")
 615 |         return -1
 616 | 
 617 |     def text(self,
 618 |              on_transcription_finished=None,
 619 |              ):
 620 | 
 621 | 
 622 |         self.interrupt_stop_event.clear()
 623 |         self.was_interrupted.clear()
 624 | 
 625 |         self.wait_audio()
 626 | 
 627 |         if self.is_shut_down or self.interrupt_stop_event.is_set():
 628 |             if self.interrupt_stop_event.is_set():
 629 |                 self.was_interrupted.set()
 630 |             return ""
 631 | 
 632 |         if on_transcription_finished:
 633 |             threading.Thread(target=on_transcription_finished,
 634 |                              args=(self.transcribe(),)).start()
 635 |         else:
 636 |             return self.transcribe()
 637 | 
 638 |     def start(self):
 639 | 
 640 | 
 641 |         # Ensure there's a minimum interval
 642 |         # between stopping and starting recording
 643 |         if (time.time() - self.recording_stop_time
 644 |                 < self.min_gap_between_recordings):
 645 |             logging.info("Attempted to start recording "
 646 |                          "too soon after stopping."
 647 |                          )
 648 |             return self
 649 | 
 650 |         logging.info("recording started")
 651 |         self._set_state("recording")
 652 |         self.text_storage = []
 653 |         self.realtime_stabilized_text = ""
 654 |         self.realtime_stabilized_safetext = ""
 655 |         self.wakeword_detected = False
 656 |         self.wake_word_detect_time = 0
 657 |         self.frames = []
 658 |         self.is_recording = True
 659 |         self.recording_start_time = time.time()
 660 |         self.is_silero_speech_active = False
 661 |         self.is_webrtc_speech_active = False
 662 |         self.stop_recording_event.clear()
 663 |         self.start_recording_event.set()
 664 | 
 665 |         if self.on_recording_start:
 666 |             self.on_recording_start()
 667 | 
 668 |         return self
 669 | 
 670 |     def stop(self):
 671 | 
 672 | 
 673 |         # Ensure there's a minimum interval
 674 |         # between starting and stopping recording
 675 |         if (time.time() - self.recording_start_time
 676 |                 < self.min_length_of_recording):
 677 |             logging.info("Attempted to stop recording "
 678 |                          "too soon after starting."
 679 |                          )
 680 |             return self
 681 | 
 682 |         logging.info("recording stopped")
 683 |         self.is_recording = False
 684 |         self.recording_stop_time = time.time()
 685 |         self.is_silero_speech_active = False
 686 |         self.is_webrtc_speech_active = False
 687 |         self.silero_check_time = 0
 688 |         self.start_recording_event.clear()
 689 |         self.stop_recording_event.set()
 690 | 
 691 |         if self.on_recording_stop:
 692 |             self.on_recording_stop()
 693 | 
 694 |         return self
 695 | 
 696 |     def feed_audio(self, chunk, original_sample_rate=16000):
 697 | 
 698 |         # Check if the buffer attribute exists, if not, initialize it
 699 |         if not hasattr(self, 'buffer'):
 700 |             self.buffer = bytearray()
 701 | 
 702 |         # Check if input is a NumPy array
 703 |         if isinstance(chunk, np.ndarray):
 704 |             # Handle stereo to mono conversion if necessary
 705 |             if chunk.ndim == 2:
 706 |                 chunk = np.mean(chunk, axis=1)
 707 | 
 708 |             # Resample to 16000 Hz if necessary
 709 |             if original_sample_rate != 16000:
 710 |                 num_samples = int(len(chunk) * 16000 / original_sample_rate)
 711 |                 chunk = resample(chunk, num_samples)
 712 | 
 713 |             # Ensure data type is int16
 714 |             chunk = chunk.astype(np.int16)
 715 | 
 716 |             # Convert the NumPy array to bytes
 717 |             chunk = chunk.tobytes()
 718 | 
 719 |         # Append the chunk to the buffer
 720 |         self.buffer += chunk
 721 |         buf_size = 2 * self.buffer_size  # silero complains if too short
 722 | 
 723 |         # Check if the buffer has reached or exceeded the buffer_size
 724 |         while len(self.buffer) >= buf_size:
 725 |             # Extract self.buffer_size amount of data from the buffer
 726 |             to_process = self.buffer[:buf_size]
 727 |             self.buffer = self.buffer[buf_size:]
 728 | 
 729 |             # Feed the extracted data to the audio_queue
 730 |             self.audio_queue.put(to_process)
 731 | 
 732 |     def set_microphone(self, microphone_on=True):
 733 |         """
 734 |         Set the microphone on or off.
 735 |         """
 736 |         logging.info("Setting microphone to: " + str(microphone_on))
 737 |         self.use_microphone.value = microphone_on
 738 | 
 739 |     def shutdown(self):
 740 |    
 741 | 
 742 |         # Force wait_audio() and text() to exit
 743 |         self.is_shut_down = True
 744 |         self.start_recording_event.set()
 745 |         self.stop_recording_event.set()
 746 | 
 747 |         self.shutdown_event.set()
 748 |         self.is_recording = False
 749 |         self.is_running = False
 750 | 
 751 |         logging.debug('Finishing recording thread')
 752 |         if self.recording_thread:
 753 |             self.recording_thread.join()
 754 | 
 755 |         logging.debug('Terminating reader process')
 756 | 
 757 |         # Give it some time to finish the loop and cleanup.
 758 |         if self.use_microphone:
 759 |             self.reader_process.join(timeout=10)
 760 | 
 761 |         if self.reader_process.is_alive():
 762 |             logging.warning("Reader process did not terminate "
 763 |                             "in time. Terminating forcefully."
 764 |                             )
 765 |             self.reader_process.terminate()
 766 | 
 767 |         logging.debug('Terminating transcription process')
 768 |         self.transcript_process.join(timeout=10)
 769 | 
 770 |         if self.transcript_process.is_alive():
 771 |             logging.warning("Transcript process did not terminate "
 772 |                             "in time. Terminating forcefully."
 773 |                             )
 774 |             self.transcript_process.terminate()
 775 | 
 776 |         self.parent_transcription_pipe.close()
 777 | 
 778 |         logging.debug('Finishing realtime thread')
 779 |         if self.realtime_thread:
 780 |             self.realtime_thread.join()
 781 | 
 782 |         if self.enable_realtime_transcription:
 783 |             if self.realtime_model_type:
 784 |                 del self.realtime_model_type
 785 |                 self.realtime_model_type = None
 786 |         gc.collect()
 787 | 
 788 |     def _recording_worker(self):
 789 | 
 790 | 
 791 |         logging.debug('Starting recording worker')
 792 | 
 793 |         try:
 794 |             was_recording = False
 795 |             delay_was_passed = False
 796 | 
 797 |             # Continuously monitor audio for voice activity
 798 |             while self.is_running:
 799 | 
 800 |                 try:
 801 | 
 802 |                     data = self.audio_queue.get()
 803 |                     if self.on_recorded_chunk:
 804 |                         self.on_recorded_chunk(data)
 805 | 
 806 |                     if self.handle_buffer_overflow:
 807 |                         # Handle queue overflow
 808 |                         if (self.audio_queue.qsize() >
 809 |                                 self.allowed_latency_limit):
 810 |                             logging.warning("Audio queue size exceeds "
 811 |                                             "latency limit. Current size: "
 812 |                                             f"{self.audio_queue.qsize()}. "
 813 |                                             "Discarding old audio chunks."
 814 |                                             )
 815 | 
 816 |                         while (self.audio_queue.qsize() >
 817 |                                 self.allowed_latency_limit):
 818 | 
 819 |                             data = self.audio_queue.get()
 820 | 
 821 |                 except BrokenPipeError:
 822 |                     print("BrokenPipeError _recording_worker")
 823 |                     self.is_running = False
 824 |                     break
 825 | 
 826 |                 if not self.is_recording:
 827 |                     # Handle not recording state
 828 |                     time_since_listen_start = (time.time() - self.listen_start
 829 |                                                if self.listen_start else 0)
 830 | 
 831 |                   
 832 | 
 833 |                     # Set state and spinner text
 834 |                     if not self.recording_stop_time:
 835 |                         # if self.use_wake_words \
 836 |                         #         and wake_word_activation_delay_passed \
 837 |                         #         and not self.wakeword_detected:
 838 |                         #     self._set_state("wakeword")
 839 |                         # else:
 840 |                             if self.listen_start:
 841 |                                 self._set_state("listening")
 842 |                             else:
 843 |                                 self._set_state("inactive")
 844 | 
 845 |                   
 846 |                     # Check for voice activity to
 847 |                     # trigger the start of recording
 848 |                     if (self.start_recording_on_voice_activity):
 849 | 
 850 |                         if self._is_voice_active():
 851 |                             logging.info("voice activity detected")
 852 | 
 853 |                             self.start()
 854 | 
 855 |                             if self.is_recording:
 856 |                                 self.start_recording_on_voice_activity = False
 857 | 
 858 |                                 # Add the buffered audio
 859 |                                 # to the recording frames
 860 |                                 self.frames.extend(list(self.audio_buffer))
 861 |                                 self.audio_buffer.clear()
 862 | 
 863 |                             self.silero_vad_model.reset_states()
 864 |                         else:
 865 |                             data_copy = data[:]
 866 |                             self._check_voice_activity(data_copy)
 867 | 
 868 |                     self.speech_end_silence_start = 0
 869 | 
 870 |                 else:
 871 |                     # If we are currently recording
 872 | 
 873 |                     # Stop the recording if silence is detected after speech
 874 |                     if self.stop_recording_on_voice_deactivity:
 875 | 
 876 |                         if not self._is_webrtc_speech(data, True):
 877 | 
 878 |                             # Voice deactivity was detected, so we start
 879 |                             # measuring silence time before stopping recording
 880 |                             if self.speech_end_silence_start == 0:
 881 |                                 self.speech_end_silence_start = time.time()
 882 | 
 883 |                         else:
 884 |                             self.speech_end_silence_start = 0
 885 | 
 886 |                         # Wait for silence to stop recording after speech
 887 |                         if self.speech_end_silence_start and time.time() - \
 888 |                                 self.speech_end_silence_start > \
 889 |                                 self.post_speech_silence_duration:
 890 |                             logging.info("voice deactivity detected")
 891 |                             self.stop()
 892 | 
 893 |                 if not self.is_recording and was_recording:
 894 |                     # Reset after stopping recording to ensure clean state
 895 |                     self.stop_recording_on_voice_deactivity = False
 896 | 
 897 |                 if time.time() - self.silero_check_time > 0.1:
 898 |                     self.silero_check_time = 0
 899 | 
 900 | 
 901 |                 was_recording = self.is_recording
 902 | 
 903 |                 if self.is_recording:
 904 |                     self.frames.append(data)
 905 | 
 906 |                 if not self.is_recording or self.speech_end_silence_start:
 907 |                     self.audio_buffer.append(data)
 908 | 
 909 |         except Exception as e:
 910 |             if not self.interrupt_stop_event.is_set():
 911 |                 logging.error(f"Unhandled exeption in _recording_worker: {e}")
 912 |                 raise
 913 | 
 914 |     def _realtime_worker(self):
 915 | 
 916 |         try:
 917 | 
 918 |             logging.debug('Starting realtime worker')
 919 | 
 920 |             # Return immediately if real-time transcription is not enabled
 921 |             if not self.enable_realtime_transcription:
 922 |                 return
 923 | 
 924 |             # Continue running as long as the main process is active
 925 |             while self.is_running:
 926 | 
 927 |                 # Check if the recording is active
 928 |                 if self.is_recording:
 929 | 
 930 |                     # Sleep for the duration of the transcription resolution
 931 |                     time.sleep(self.realtime_processing_pause)
 932 | 
 933 |                     # Convert the buffer frames to a NumPy array
 934 |                     audio_array = np.frombuffer(
 935 |                         b''.join(self.frames),
 936 |                         dtype=np.int16
 937 |                         )
 938 | 
 939 |                     # Normalize the array to a [-1, 1] range
 940 |                     audio_array = audio_array.astype(np.float32) / \
 941 |                         INT16_MAX_ABS_VALUE
 942 | 
 943 |                     # Perform transcription and assemble the text
 944 |                     segments = self.realtime_model_type.transcribe(
 945 |                         audio_array,
 946 |                         language=self.language if self.language else None,
 947 |                         beam_size=self.beam_size_realtime,
 948 |                         initial_prompt=self.initial_prompt,
 949 |                         suppress_tokens=self.suppress_tokens,
 950 |                     )
 951 | 
 952 |                     # double check recording state
 953 |                     # because it could have changed mid-transcription
 954 |                     if self.is_recording and time.time() - \
 955 |                             self.recording_start_time > 0.5:
 956 | 
 957 |                         logging.debug('Starting realtime transcription')
 958 |                         self.realtime_transcription_text = " ".join(
 959 |                             seg.text for seg in segments[0]
 960 |                         )
 961 |                         self.realtime_transcription_text = \
 962 |                             self.realtime_transcription_text.strip()
 963 | 
 964 |                         self.text_storage.append(
 965 |                             self.realtime_transcription_text
 966 |                             )
 967 | 
 968 |                         # Take the last two texts in storage, if they exist
 969 |                         if len(self.text_storage) >= 2:
 970 |                             last_two_texts = self.text_storage[-2:]
 971 | 
 972 |                             # Find the longest common prefix
 973 |                             # between the two texts
 974 |                             prefix = os.path.commonprefix(
 975 |                                 [last_two_texts[0], last_two_texts[1]]
 976 |                                 )
 977 | 
 978 |                             # This prefix is the text that was transcripted
 979 |                             # two times in the same way
 980 |                             # Store as "safely detected text"
 981 |                             if len(prefix) >= \
 982 |                                     len(self.realtime_stabilized_safetext):
 983 | 
 984 |                                 # Only store when longer than the previous
 985 |                                 # as additional security
 986 |                                 self.realtime_stabilized_safetext = prefix
 987 | 
 988 |                         # Find parts of the stabilized text
 989 |                         # in the freshly transcripted text
 990 |                         matching_pos = self._find_tail_match_in_text(
 991 |                             self.realtime_stabilized_safetext,
 992 |                             self.realtime_transcription_text
 993 |                             )
 994 | 
 995 |                         if matching_pos < 0:
 996 |                             if self.realtime_stabilized_safetext:
 997 |                                 self._on_realtime_transcription_stabilized(
 998 |                                     self._preprocess_output(
 999 |                                         self.realtime_stabilized_safetext,
1000 |                                         True
1001 |                                     )
1002 |                                 )
1003 |                             else:
1004 |                                 self._on_realtime_transcription_stabilized(
1005 |                                     self._preprocess_output(
1006 |                                         self.realtime_transcription_text,
1007 |                                         True
1008 |                                     )
1009 |                                 )
1010 |                         else:
1011 |                             # We found parts of the stabilized text
1012 |                             # in the transcripted text
1013 |                             # We now take the stabilized text
1014 |                             # and add only the freshly transcripted part to it
1015 |                             output_text = self.realtime_stabilized_safetext + \
1016 |                                 self.realtime_transcription_text[matching_pos:]
1017 | 
1018 |                             # This yields us the "left" text part as stabilized
1019 |                             # AND at the same time delivers fresh detected
1020 |                             # parts on the first run without the need for
1021 |                             # two transcriptions
1022 |                             self._on_realtime_transcription_stabilized(
1023 |                                 self._preprocess_output(output_text, True)
1024 |                                 )
1025 | 
1026 |                         # Invoke the callback with the transcribed text
1027 |                         self._on_realtime_transcription_update(
1028 |                             self._preprocess_output(
1029 |                                 self.realtime_transcription_text,
1030 |                                 True
1031 |                             )
1032 |                         )
1033 | 
1034 |                 # If not recording, sleep briefly before checking again
1035 |                 else:
1036 |                     time.sleep(TIME_SLEEP)
1037 | 
1038 |         except Exception as e:
1039 |             logging.error(f"Unhandled exeption in _realtime_worker: {e}")
1040 |             raise
1041 | 
1042 |     def _is_silero_speech(self, chunk):
1043 |       
1044 |         if self.sample_rate != 16000:
1045 |             pcm_data = np.frombuffer(chunk, dtype=np.int16)
1046 |             data_16000 = signal.resample_poly(
1047 |                 pcm_data, 16000, self.sample_rate)
1048 |             chunk = data_16000.astype(np.int16).tobytes()
1049 | 
1050 |         self.silero_working = True
1051 |         audio_chunk = np.frombuffer(chunk, dtype=np.int16)
1052 |         audio_chunk = audio_chunk.astype(np.float32) / INT16_MAX_ABS_VALUE
1053 |         vad_prob = self.silero_vad_model(
1054 |             torch.from_numpy(audio_chunk),
1055 |             SAMPLE_RATE).item()
1056 |         is_silero_speech_active = vad_prob > (1 - self.silero_sensitivity)
1057 |         if is_silero_speech_active:
1058 |             self.is_silero_speech_active = True
1059 |         self.silero_working = False
1060 |         return is_silero_speech_active
1061 | 
1062 |     def _is_webrtc_speech(self, chunk, all_frames_must_be_true=False):
1063 | 
1064 |         if self.sample_rate != 16000:
1065 |             pcm_data = np.frombuffer(chunk, dtype=np.int16)
1066 |             data_16000 = signal.resample_poly(
1067 |                 pcm_data, 16000, self.sample_rate)
1068 |             chunk = data_16000.astype(np.int16).tobytes()
1069 | 
1070 |         # Number of audio frames per millisecond
1071 |         frame_length = int(16000 * 0.01)  # for 10ms frame
1072 |         num_frames = int(len(chunk) / (2 * frame_length))
1073 |         speech_frames = 0
1074 | 
1075 |         for i in range(num_frames):
1076 |             start_byte = i * frame_length * 2
1077 |             end_byte = start_byte + frame_length * 2
1078 |             frame = chunk[start_byte:end_byte]
1079 |             if self.webrtc_vad_model.is_speech(frame, 16000):
1080 |                 speech_frames += 1
1081 |                 if not all_frames_must_be_true:
1082 |                     if self.debug_mode:
1083 |                         print(f"Speech detected in frame {i + 1}"
1084 |                               f" of {num_frames}")
1085 |                     return True
1086 |         if all_frames_must_be_true:
1087 |             if self.debug_mode and speech_frames == num_frames:
1088 |                 print(f"Speech detected in {speech_frames} of "
1089 |                       f"{num_frames} frames")
1090 |             elif self.debug_mode:
1091 |                 print(f"Speech not detected in all {num_frames} frames")
1092 |             return speech_frames == num_frames
1093 |         else:
1094 |             if self.debug_mode:
1095 |                 print(f"Speech not detected in any of {num_frames} frames")
1096 |             return False
1097 | 
1098 |     def _check_voice_activity(self, data):
1099 | 
1100 |         self.is_webrtc_speech_active = self._is_webrtc_speech(data)
1101 | 
1102 |         # First quick performing check for voice activity using WebRTC
1103 |         if self.is_webrtc_speech_active:
1104 | 
1105 |             if not self.silero_working:
1106 |                 self.silero_working = True
1107 | 
1108 |                 # Run the intensive check in a separate thread
1109 |                 threading.Thread(
1110 |                     target=self._is_silero_speech,
1111 |                     args=(data,)).start()
1112 | 
1113 |     def _is_voice_active(self):
1114 |  
1115 |         return self.is_webrtc_speech_active and self.is_silero_speech_active
1116 | 
1117 |     def _set_state(self, new_state):
1118 | 
1119 |         # Check if the state has actually changed
1120 |         if new_state == self.state:
1121 |             return
1122 | 
1123 |         # Store the current state for later comparison
1124 |         old_state = self.state
1125 | 
1126 |         # Update to the new state
1127 |         self.state = new_state
1128 | 
1129 |         # Execute callbacks based on transitioning FROM a particular state
1130 |         if old_state == "listening":
1131 |             if self.on_vad_detect_stop:
1132 |                 self.on_vad_detect_stop()
1133 |         elif old_state == "wakeword":
1134 |             if self.on_wakeword_detection_end:
1135 |                 self.on_wakeword_detection_end()
1136 | 
1137 |         # Execute callbacks based on transitioning TO a particular state
1138 |         if new_state == "listening":
1139 |             if self.on_vad_detect_start:
1140 |                 self.on_vad_detect_start()
1141 |             self._set_spinner("speak now")
1142 |             if self.spinner and self.halo:
1143 |                 self.halo._interval = 250
1144 |         elif new_state == "wakeword":
1145 |             if self.on_wakeword_detection_start:
1146 |                 self.on_wakeword_detection_start()
1147 |             self._set_spinner(f"say {self.wake_words}")
1148 |             if self.spinner and self.halo:
1149 |                 self.halo._interval = 500
1150 |         elif new_state == "transcribing":
1151 |             if self.on_transcription_start:
1152 |                 self.on_transcription_start()
1153 |             self._set_spinner("transcribing")
1154 |             if self.spinner and self.halo:
1155 |                 self.halo._interval = 50
1156 |         elif new_state == "recording":
1157 |             self._set_spinner("recording")
1158 |             if self.spinner and self.halo:
1159 |                 self.halo._interval = 100
1160 |         elif new_state == "inactive":
1161 |             if self.spinner and self.halo:
1162 |                 self.halo.stop()
1163 |                 self.halo = None
1164 | 
1165 |     def _set_spinner(self, text):
1166 | 
1167 |         if self.spinner:
1168 |             # If the Halo spinner doesn't exist, create and start it
1169 |             if self.halo is None:
1170 |                 self.halo = halo.Halo(text=text)
1171 |                 self.halo.start()
1172 |             # If the Halo spinner already exists, just update the text
1173 |             else:
1174 |                 self.halo.text = text
1175 | 
1176 |     def _preprocess_output(self, text, preview=False):
1177 |   
1178 |         text = re.sub(r'\s+', ' ', text.strip())
1179 | 
1180 |         if self.ensure_sentence_starting_uppercase:
1181 |             if text:
1182 |                 text = text[0].upper() + text[1:]
1183 | 
1184 |         # Ensure the text ends with a proper punctuation
1185 |         # if it ends with an alphanumeric character
1186 |         if not preview:
1187 |             if self.ensure_sentence_ends_with_period:
1188 |                 if text and text[-1].isalnum():
1189 |                     text += '.'
1190 | 
1191 |         return text
1192 | 
1193 |     def _find_tail_match_in_text(self, text1, text2, length_of_match=10):
1194 | 
1195 | 
1196 |         # Check if either of the texts is too short
1197 |         if len(text1) < length_of_match or len(text2) < length_of_match:
1198 |             return -1
1199 | 
1200 |         # The end portion of the first text that we want to compare
1201 |         target_substring = text1[-length_of_match:]
1202 | 
1203 |         # Loop through text2 from right to left
1204 |         for i in range(len(text2) - length_of_match + 1):
1205 |             # Extract the substring from text2
1206 |             # to compare with the target_substring
1207 |             current_substring = text2[len(text2) - i - length_of_match:
1208 |                                       len(text2) - i]
1209 | 
1210 |             # Compare the current_substring with the target_substring
1211 |             if current_substring == target_substring:
1212 |                 # Position in text2 where the match starts
1213 |                 return len(text2) - i
1214 | 
1215 |         return -1
1216 | 
1217 |     def _on_realtime_transcription_stabilized(self, text):
1218 | 
1219 |         if self.on_realtime_transcription_stabilized:
1220 |             if self.is_recording:
1221 |                 self.on_realtime_transcription_stabilized(text)
1222 | 
1223 |     def _on_realtime_transcription_update(self, text):
1224 |  
1225 |         if self.on_realtime_transcription_update:
1226 |             if self.is_recording:
1227 |                 self.on_realtime_transcription_update(text)
1228 | 
1229 |     def __enter__(self):
1230 | 
1231 |         return self
1232 | 
1233 |     def __exit__(self, exc_type, exc_value, traceback):
1234 | 
1235 |         self.shutdown()
1236 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 RobotTelevision
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CrowAssistant
 2 | Crow is a Desktop AI Assistant
 3 | 
 4 | [![Crow Demo](https://img.youtube.com/vi/XdR7Uo3DPys/0.jpg)](https://www.youtube.com/watch?v=XdR7Uo3DPys)
 5 | 
 6 | ## Features
 7 | - Pixel Art Crow desktop friend flys and lands on the bottom of whatever window you're focused on.
 8 | - Fast-Whisper Speech to Text and VAD pulled from: https://github.com/KoljaB/RealtimeSTT
 9 | - Piper Text to Speech with over 900 voices to choose from. https://github.com/rhasspy/piper
10 | - Interuptable, by saying his name he can stop and listen.
11 | - Audio Ducking, Lowers the volume while recording so you can play music while talking to Crow.
12 | - Automaticaly pauses the conversation after a long silence and waits to hear his name to start the conversation again.
13 | - Website Interface for conversation logs and settings
14 | 
15 | ## How to Use
16 | 
17 | Double Click on the Crow to open the web interface. Open the settings and get a free plan api key from groq.
18 | Setup your Mic and Speakers, Save the settings and then restart Crow.
19 | To start talking to Crow, just say his name and he should start listening.
20 | 
21 | When Crow is not in conversation he rests above the system tray.
22 | 
23 | ## Running the Code
24 | 
25 | you'll need to download a windows release of Piper: https://github.com/rhasspy/piper/releases
26 | Put the exe and other files right in the base directory... i know its a bit of a messy way to do things, but I'll try to clean it up in future releases.
27 | 
28 | You'll also need to grab the libritts_r onnx and json files for the voice to work: https://huggingface.co/rhasspy/piper-voices/tree/main/en/en_US/libritts_r/medium
29 | 
30 | And that should do it.
31 | 


--------------------------------------------------------------------------------
/Volume.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | class VolumeControl:
 4 |     def __init__(self, device_index=None):
 5 |         self.system = platform.system()
 6 |         self.device_index = device_index
 7 | 
 8 |         if self.system == "Windows":
 9 |             from comtypes import CLSCTX_ALL
10 |             from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume
11 |             device = AudioUtilities.GetSpeakers()
12 |             interface = device.Activate(
13 |                 IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
14 |             self.volume = interface.QueryInterface(IAudioEndpointVolume)
15 |         elif self.system == "Linux":
16 |             import pulsectl
17 |             self.pulse = pulsectl.Pulse('volume-control')
18 |         elif self.system == "Darwin":  # macOS
19 |             import subprocess
20 | 
21 |     def get_volume(self):
22 |         if self.system == "Windows":
23 |             return round(self.volume.GetMasterVolumeLevelScalar() * 100)
24 |         elif self.system == "Linux":
25 |             sinks = self.pulse.sink_list()
26 |             if self.device_index is not None and 0 <= self.device_index < len(sinks):
27 |                 return round(sinks[self.device_index].volume.value_flat * 100)
28 |             elif sinks:
29 |                 return round(sinks[0].volume.value_flat * 100)
30 |         elif self.system == "Darwin":
31 |             cmd = f"osascript -e 'output volume of (get volume settings)'"
32 |             if self.device_index is not None:
33 |                 cmd = f"osascript -e 'tell application \"System Events\" to get volume settings of audio device \"{self.device_index}\"'"
34 |             return int(subprocess.check_output(cmd, shell=True).strip().split(", ")[0].split(":")[1])
35 | 
36 |     def set_volume(self, volume):
37 |         volume = max(0, min(100, volume))  # Ensure volume is between 0 and 100
38 |         if self.system == "Windows":
39 |             self.volume.SetMasterVolumeLevelScalar(volume / 100, None)
40 |         elif self.system == "Linux":
41 |             sinks = self.pulse.sink_list()
42 |             if self.device_index is not None and 0 <= self.device_index < len(sinks):
43 |                 self.pulse.volume_set_all_chans(sinks[self.device_index], volume / 100)
44 |             elif sinks:
45 |                 self.pulse.volume_set_all_chans(sinks[0], volume / 100)
46 |         elif self.system == "Darwin":
47 |             cmd = f"osascript -e 'set volume output volume {volume}'"
48 |             if self.device_index is not None:
49 |                 cmd = f"osascript -e 'tell application \"System Events\" to set volume of audio device \"{self.device_index}\" to {volume}'"
50 |             subprocess.call(cmd, shell=True)
51 | 
52 |     @staticmethod
53 |     def list_devices():
54 |         system = platform.system()
55 |         if system == "Windows":
56 |             from pycaw.pycaw import AudioUtilities
57 |             return [device.FriendlyName for device in AudioUtilities.GetAllDevices()]
58 |         elif system == "Linux":
59 |             import pulsectl
60 |             with pulsectl.Pulse('device-list') as pulse:
61 |                 return [sink.name for sink in pulse.sink_list()]
62 |         elif system == "Darwin":
63 |             import subprocess
64 |             cmd = "system_profiler SPAudioDataType | grep -A 1 'Output:' | grep -v 'Output:' | awk -F: '{print $1}' | sed 's/^[ \t]*//'"
65 |             return subprocess.check_output(cmd, shell=True).decode().strip().split('\n')
66 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": "llama3-8b-8192",
 3 |   "name": "Crow",
 4 |   "voice": 113,
 5 |   "personality": "You are sarcastic, mischievous and slightly annoying, while still being helpful.",
 6 |   "url": "https://api.groq.com/openai/v1",
 7 |   "api_key": "Enter API Key Here",
 8 |   "mic": "Default",
 9 |   "speaker": "Default",
10 |   "scale": 2,
11 |   "port": 5000,
12 |   "maxtoken": 32000,
13 |   "maxmsg": 100
14 | }


--------------------------------------------------------------------------------
/crow.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/crow.ico


--------------------------------------------------------------------------------
/images/crow-idle1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crow-idle1.png


--------------------------------------------------------------------------------
/images/crow-idle2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crow-idle2.png


--------------------------------------------------------------------------------
/images/crow-wingleft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crow-wingleft.png


--------------------------------------------------------------------------------
/images/crow-wingright.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crow-wingright.png


--------------------------------------------------------------------------------
/images/crowfly.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowfly.png


--------------------------------------------------------------------------------
/images/crowhead-blink.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowhead-blink.png


--------------------------------------------------------------------------------
/images/crowhead-lookback.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowhead-lookback.png


--------------------------------------------------------------------------------
/images/crowhead-tilt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowhead-tilt.png


--------------------------------------------------------------------------------
/images/crowhead-tiltold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowhead-tiltold.png


--------------------------------------------------------------------------------
/images/crowhead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RobotTelevision/CrowAssistant/6cc0e6947eb770083373b9c3d98966cb76619e47/images/crowhead.png


--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Crow Web Interface</title>
  7 |     <link rel="icon" href="/static/favicon.png" type="image/png">
  8 |     <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
  9 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
 10 | <style>
 11 |     body {
 12 |         font-family: Arial, sans-serif;
 13 |         margin: 0;
 14 |         padding: 0;
 15 |         background-color: #1b1b1b;
 16 |         display: flex;
 17 |         height: 100vh;
 18 |         color: #ffffff; /* White text color */
 19 |     }
 20 |     #sidebar {
 21 |         border-right: 1px solid #0f0f0f;
 22 |         min-width: 400px;
 23 |         width: 25%;
 24 |         padding: 5px;
 25 |         box-sizing: border-box;
 26 |         background-color: #313131; /* Dark sidebar background */
 27 |     }
 28 |     #chat-area {
 29 |         flex-grow: 1;
 30 |         display: flex;
 31 |         flex-direction: column;
 32 |     }
 33 |     #conversation-log {
 34 |         border: 1px solid #0f0f0f; /* Darker border color */
 35 |         padding: 10px;
 36 |         margin: 10px;
 37 |         flex-grow: 1;
 38 |         overflow-y: auto;
 39 |         background-color: #242424;
 40 |         white-space: pre-line;
 41 |     }
 42 |     .message {
 43 |         margin-bottom: 10px;
 44 |         padding: 5px;
 45 |         border-radius: 5px;
 46 |         color: #000; /* Black text color for messages */
 47 |     }
 48 |     .user {
 49 |         background-color: #1e2533; /* Darker background color for user messages */
 50 |         color: #00ccff; /* White text color */
 51 |     }
 52 |     .assistant {
 53 |         background-color: #311e00; /* Darker background color for assistant messages */
 54 |         color: #ffb325; /* White text color */
 55 |     }
 56 |     #chat-input {
 57 |         margin: 10px;
 58 |         padding: 5px;
 59 |         width: calc(100% - 40px);
 60 |         border: 1px solid #0f0f0f; /* Darker border color */
 61 |         border-radius: 5px;
 62 |         background-color: #1d1d1d; /* Darker input field background */
 63 |         color: #ffffff; /* White text color for input */
 64 |     }
 65 | 
 66 |     .conversation-item {
 67 |         display: flex;
 68 |         align-items: center;
 69 |         justify-content: space-between;
 70 |         padding: 0px;
 71 |         cursor: pointer;
 72 |         margin-bottom: 5px; /* Add space between conversation items */
 73 |     }
 74 | 
 75 |     .conversation-button {
 76 |         background: none;
 77 |         border: 1px solid #2e2e2e; /* Light border for buttons */
 78 |         color: #bbbbbb;
 79 |         padding: 5px 10px; /* Add padding inside the buttons */
 80 |         border-radius: 4px; /* Slightly rounded corners for the buttons */
 81 |         text-align: left;
 82 |         flex-grow: 1;
 83 |         cursor: pointer;
 84 |         margin-right: 10px; /* Add space between button and delete icon */
 85 |     }
 86 | 
 87 |     .conversation-button:hover {
 88 |         border-color: #3e3e3e; /* Slightly lighter border on hover */
 89 |     }
 90 | 
 91 |     .conversation-item span {
 92 |         color: #ff4d4d; /* Red color for delete icon */
 93 |         cursor: pointer;
 94 |         font-size: 14px; /* Adjust as needed */
 95 |         padding: 5px;
 96 |         border-radius: 50%; /* Circular shape for the delete icon */
 97 |         display: flex;
 98 |         align-items: center;
 99 |         justify-content: center;
100 |         width: 20px; /* Fixed width */
101 |         height: 20px; /* Fixed height */
102 |     }
103 | 
104 |     .conversation-item span:hover {
105 |         color: #ff6666; /* Lighter red on hover */
106 |     }
107 |     #settings-link {
108 |         display: flex;
109 |         align-items: center;
110 |         padding: 10px;
111 |         text-decoration: none;
112 |         color: #ffffff;
113 |         margin-top: 20px;
114 |         border-top: 1px solid #ccc;
115 |     }
116 |     #settings-link svg {
117 |         margin-right: 10px;
118 |     }
119 | 
120 | </style>
121 | 
122 | </head>
123 | <body>
124 |     <div id="sidebar">
125 |         
126 |         <ul id="conversation-list">
127 |             <button class="conversation-button" data-conversation-id="new">New Conversation</button>
128 |             <!-- Conversations will be listed here -->
129 |         </ul>
130 |         <a href="/settings" id="settings-link">
131 |                 <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
132 |                     <circle cx="12" cy="12" r="3"></circle>
133 |                     <path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"></path>
134 |                 </svg>
135 |                 Settings
136 |         </a>
137 |     </div>
138 |     <div id="chat-area">
139 |         <div id="conversation-log">
140 | 
141 |         </div>
142 |         <textarea id="chat-input" placeholder="Type your message here..." rows="4" autofocus onkeypress="checkEnter(event)"></textarea>
143 |     </div>
144 | 
145 |     <script>
146 |         var currentConversationId=1;
147 | 
148 |         document.addEventListener('DOMContentLoaded', function() {
149 |            selectConversation(currentConversationId);
150 |         });
151 | 
152 |         $(document).on('click', '.conversation-button', function() {
153 |             var conversationId = $(this).data('conversation-id');
154 |             if (conversationId === "new") {
155 |                 $.post("/new_conversation", function(response) {
156 |                     var newConversationId = response.conversation_id;
157 |                     // Refresh the list of conversations to include the new one
158 |                     refreshConversationsList();
159 | 
160 |                     // Switch to displaying the new conversation
161 |                     selectConversation(newConversationId);
162 |                 });
163 |             } else {
164 |                 // Load the existing conversation
165 |                 selectConversation(conversationId);
166 |             }
167 |         });
168 | 
169 |         function refreshConversationsList() {
170 |             // Request the list of conversations from the server
171 |             $.get("/get_conversations", function(response) {
172 |                 var conversations = response.conversations;
173 |                 
174 |                 // Clear the existing list of buttons
175 |                 $('#conversation-list').empty();
176 |                 
177 |                                 // Add the "New Conversation" button
178 |                 var newButton = $('<button class="conversation-item" data-conversation-id="new">New Conversation</button>');
179 |                 $('#conversation-list').append(newButton);
180 | 
181 |                 // Create a new button for each conversation
182 |                 for (var i = 0; i < conversations.length; i++) {
183 |                     var conversationId = conversations[i].id;
184 |                     console.log("Processing conversation with ID:", conversationId);
185 |                     var button = $('<div class="conversation-item"><button class="conversation-button" data-conversation-id="' + conversationId + '">Conversation ' + conversationId + '</button><span onclick="deleteConversation(\'' + conversationId + '\')">[x]</span></div>');
186 |                     $('#conversation-list').append(button);
187 |                     console.log("Adding conversation item for ID: ", conversationId);
188 | 
189 |                 }
190 |                 
191 |                 // Add the "New Conversation" button
192 |                 var newButton = $('<button class="conversation-button" data-conversation-id="new">New Conversation</button>');
193 |                 $('#conversation-list').append(newButton);
194 |             });
195 |         }
196 | 
197 | 
198 |         function deleteConversation(conversationId) {
199 |             // Prevent the parent button click event from triggering
200 |             event.stopPropagation();
201 | 
202 |             // Confirm deletion
203 |             var confirmDelete = confirm("Are you sure you want to delete this conversation?");
204 |             if (confirmDelete) {
205 |                 // Remove the conversation from the server
206 |                 deleteConversationOnServer(conversationId);
207 | 
208 |                 // Remove the conversation item from the UI
209 |                 $('div.conversation-item button[data-conversation-id="' + conversationId + '"]').parent().remove();
210 |             }
211 |         }
212 | 
213 |         function deleteConversationOnServer(conversationId) {
214 |             $.ajax({
215 |                 url: '/delete_conversation',
216 |                 type: 'POST',
217 |                 data: { conversation_id: conversationId },
218 |                 success: function(response) {
219 |                     if (response.status === 'success') {
220 |                         console.log('Conversation deleted successfully.');
221 |                         // You might want to refresh the conversation list here
222 |                         refreshConversationsList();
223 |                     } else {
224 |                         console.error('Error deleting conversation: ' + response.message);
225 |                     }
226 |                 },
227 |                 error: function(xhr, status, error) {
228 |                     console.error('Error deleting conversation: ' + error);
229 |                 }
230 |             });
231 |         }
232 | 
233 | 
234 | 
235 |         function createNewConversation() {
236 |             var firstMessage = prompt("Enter the first message for the new conversation:");
237 |             if (firstMessage) {
238 |                 $.post("/new_conversation", { first_message: firstMessage }, function(response) {
239 |                     var newConversationId = response.conversation_id;
240 |                     var newConversationName = response.conversation_name;
241 |                     addConversationToList(newConversationId, newConversationName);
242 |                     selectConversation(newConversationId);
243 |                 });
244 |             }
245 |         }
246 | 
247 |         function addConversationToList(conversationId, conversationName) {
248 |             var conversationItem = $('<div class="conversation-item"></div>');
249 |             var conversationButton = $('<button class="conversation-button" data-conversation-id="' + conversationId + '">' + conversationName + '</button>');
250 |             var deleteSpan = $('<span onclick="deleteConversation(\'' + conversationId + '\')"><i class="fas fa-trash-alt" aria-hidden="true"></i></span>');
251 |             
252 |             conversationItem.append(conversationButton);
253 |             conversationItem.append(deleteSpan);
254 |             $('#conversation-list').append(conversationItem);
255 |         }
256 | 
257 | 
258 | 
259 |         function selectConversation(conversationId) {
260 |             currentConversationId = conversationId;
261 |             $.get("/select_conversation", { conversation_id: conversationId }, function(response) {
262 |                 var conversationLog = response.conversation_log;
263 |                 document.getElementById('conversation-log').innerHTML = '';
264 |                 conversationLog.forEach(updateConversationLog);
265 |             });
266 |         }
267 | 
268 |         function updateConversationLog(message) {
269 |             var messageDiv = document.createElement('div');
270 |             messageDiv.className = 'message ' + message.role.toLowerCase();
271 |             messageDiv.textContent = message.content;
272 |             document.getElementById('conversation-log').appendChild(messageDiv);
273 |             document.getElementById('conversation-log').scrollTop = document.getElementById('conversation-log').scrollHeight;
274 |         }
275 | 
276 |         function checkEnter(event) {
277 |             if (event.which == 13 && !event.shiftKey) {
278 |                 event.preventDefault();
279 |                 var user_input = $('#chat-input').val();
280 |                 var user_name = $('#user-name').val();
281 |                 var personalized_input = '[' + user_name + '] ' + user_input;
282 |                 updateConversationLog({role: "user", content: personalized_input});
283 |                 $.post("/generate", { input_text: personalized_input, conversation_id: currentConversationId }, function(response) {
284 |                     updateConversationLog(response);
285 |                 });
286 |                 $('#chat-input').val('');
287 |             }
288 |         }
289 | 
290 |         // Load existing conversations on page load
291 |         $(document).ready(function() {
292 |             $.get("/get_conversations", function(response) {
293 |                 var conversations = response.conversations;
294 |                 conversations.forEach(function(conversation) {
295 |                     addConversationToList(conversation.id, conversation.name);
296 |                 });
297 |             });
298 |         });
299 | 
300 |         function checkForNewMessages() {
301 |             selectConversation(currentConversationId);
302 |         }
303 | 
304 |         // Check for new messages every 5 seconds
305 |         setInterval(checkForNewMessages, 5000);
306 |     </script>
307 | 
308 | </body>
309 | </html>
310 | 


--------------------------------------------------------------------------------
/templates/settings.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>CrowSettings</title>
  7 |     <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
  8 |     <style>
  9 |         body {
 10 |             font-family: Arial, sans-serif;
 11 |             margin: 0;
 12 |             padding: 0;
 13 |             background-color: #1a1a1a;
 14 |             color: #ffffff;
 15 |             display: flex;
 16 |             height: 100vh;
 17 |         }
 18 |         #settings-container {
 19 |             flex-grow: 1;
 20 |             padding: 20px;
 21 |             overflow-y: auto;
 22 |         }
 23 |         h1 {
 24 |             color: #00ccff;
 25 |             margin-bottom: 20px;
 26 |         }
 27 |         form {
 28 |             max-width: 600px;
 29 |             margin: 0 auto;
 30 |         }
 31 |         label {
 32 |             display: block;
 33 |             margin-top: 15px;
 34 |             color: #bbbbbb;
 35 |         }
 36 |         input, textarea {
 37 |             width: 100%;
 38 |             padding: 8px;
 39 |             margin-top: 5px;
 40 |             background-color: #1d1d1d;
 41 |             border: 1px solid #2e2e2e;
 42 |             border-radius: 4px;
 43 |             color: #ffffff;
 44 |             font-size: 14px;
 45 |         }
 46 |         input[type="number"] {
 47 |             width: calc(100% - 16px);
 48 |         }
 49 |         textarea {
 50 |             resize: vertical;
 51 |             min-height: 100px;
 52 |         }
 53 |         button {
 54 |             margin-top: 20px;
 55 |             padding: 10px 20px;
 56 |             background-color: #00ccff;
 57 |             color: #000000;
 58 |             border: none;
 59 |             border-radius: 4px;
 60 |             cursor: pointer;
 61 |             font-size: 16px;
 62 |         }
 63 |         button:hover {
 64 |             background-color: #33d6ff;
 65 |         }
 66 |         #message {
 67 |             margin-top: 20px;
 68 |             padding: 10px;
 69 |             border-radius: 4px;
 70 |             display: none;
 71 |         }
 72 |         .success {
 73 |             background-color: #28a745;
 74 |             color: #ffffff;
 75 |         }
 76 |         .error {
 77 |             background-color: #dc3545;
 78 |             color: #ffffff;
 79 |         }
 80 |         .help-text {
 81 |             font-size: 0.9em;
 82 |             color: #666;
 83 |             margin-top: 5px;
 84 |         }
 85 |     </style>
 86 | </head>
 87 | <body>
 88 |     <!-- <div id="settings-container">
 89 |         <h1>Crow Settings</h1>
 90 |         <a href="/" id="sconv">Return to Conversations</a>
 91 |         <form id="settingsForm">
 92 |             <label for="name">Name:</label>
 93 |             <input type="text" id="name" name="name" value="{{ config['name'] }}" required>
 94 | 
 95 |             <label for="personality">Personality:</label>
 96 |             <textarea id="personality" name="personality" required>{{ config['personality'] }}</textarea>
 97 | 
 98 |             <label for="voice">Voice (1-905):</label>
 99 |             <input type="number" id="voice" name="voice" min="1" max="905" value="{{ config['voice'] }}" required>
100 | 
101 |             <label for="url">URL:</label>
102 |             <input type="url" id="url" name="url" value="{{ config['url'] }}" required>
103 | 
104 |             <label for="api_key">API Key:</label>
105 |             <input type="password" id="api_key" name="api_key" value="{{ config['api_key'] }}" required>
106 | 
107 |             <label for="model">Model:</label>
108 |             <input type="text" id="model" name="model" value="{{ config['model'] }}" required>
109 | 
110 |             <label for="scale">Scale Multiplier:</label>
111 |             <input type="number" id="scale" name="scale" value="{{ config['scale'] }}" required>
112 | 
113 |             <label for="mic">Microphone:</label>
114 |             <select id="mic" name="mic">
115 |                 {% for device in input_devices %}
116 |                 <option value="{{ device }}" {% if device == config.mic %}selected{% endif %}>{{ device }}</option>
117 |                 {% endfor %}
118 |             </select>
119 |             
120 |             <label for="speaker">Speaker:</label>
121 |             <select id="speaker" name="speaker">
122 |                 {% for device in output_devices %}
123 |                 <option value="{{ device }}" {% if device == config.speaker %}selected{% endif %}>{{ device }}</option>
124 |                 {% endfor %}
125 |             </select>
126 | 
127 |             <label for="port">Port:</label>
128 |             <input type="number" id="port" name="port" value="{{ config['port'] }}" required>
129 | 
130 |             <label for="maxtoken">Maximum Conversation Tokens:</label>
131 |             <input type="number" id="maxtoken" name="maxtoken" value="{{ config['maxtoken'] }}" required>
132 | 
133 |             <label for="maxmsg">Maximum Conversation Messages:</label>
134 |             <input type="number" id="maxmsg" name="maxmsg" value="{{ config['maxmsg'] }}" required>
135 | 
136 |             <br>
137 |             <button type="submit">Save</button>
138 |         </form>
139 |         <div id="message"></div> -->
140 | <div id="settings-container">
141 |      <form id="settingsForm">
142 | 
143 | <h1>Crow Settings</h1>
144 |     <a href="/" id="sconv">Return to Conversations</a>
145 |     <p>
146 |     For Crow to work, you'll need to get and enter the API key and URL. <br>Click the link to get a Groq key, there is a free teir you can use.
147 |    
148 | 
149 |         <label for="name">Name:</label>
150 |         <input type="text" id="name" name="name" value="{{ config['name'] }}" required>
151 |         <p class="help-text">Enter the name you'd like to call your AI assistant. Make sure its something the Text To Speech system can understand easily.</p>
152 | 
153 |         <label for="personality">Personality:</label>
154 |         <textarea id="personality" name="personality" required>{{ config['personality'] }}</textarea>
155 |         <p class="help-text">Describe the personality traits you want your AI to exhibit. (ie: You are sarcastic and tell puns constantly.)</p>
156 | 
157 |         <label for="voice">Voice (1-905):</label>
158 |         <input type="number" id="voice" name="voice" min="1" max="905" value="{{ config['voice'] }}" required>
159 |         <p class="help-text">Select a voice ID for text-to-speech. Changing this value lets you preview the voice.</p>
160 | 
161 |         <label for="url">URL:</label>
162 |         <input type="url" id="url" name="url" value="{{ config['url'] }}" required>
163 |         <p class="help-text">Enter the API endpoint URL for the AI service. For Groq this is: https://api.groq.com/openai/v1</p>
164 | 
165 |         <label for="api_key">API Key:</label>
166 |         <input type="password" id="api_key" name="api_key" value="{{ config['api_key'] }}" required>
167 |         <a href="https://console.groq.com" target="_blank" rel="noopener noreferrer">Get your Groq API key</a>
168 |         <p class="help-text">Enter your API key for authentication. Keep this private!</p>
169 | 
170 |         <label for="model">Model:</label>
171 |         <input type="text" id="model" name="model" value="{{ config['model'] }}" required>
172 |         <p class="help-text">Specify the AI model to use. This needs to be the exact model name used by the API. <br>For Groq, try: llama-3.1-8b-instant</p>
173 | 
174 |         <label for="scale">Scale Multiplier:</label>
175 |         <input type="number" id="scale" name="scale" value="{{ config['scale'] }}" required>
176 |         <p class="help-text">Adjust the size of the crow on screen.</p>
177 | 
178 |         <label for="mic">Microphone:</label>
179 |         <select id="mic" name="mic">
180 |             {% for device in input_devices %}
181 |             <option value="{{ device }}" {% if device == config.mic %}selected{% endif %}>{{ device }}</option>
182 |             {% endfor %}
183 |         </select>
184 |         <p class="help-text">Select the microphone device for voice input.</p>
185 |        
186 |         <label for="speaker">Speaker:</label>
187 |         <select id="speaker" name="speaker">
188 |             {% for device in output_devices %}
189 |             <option value="{{ device }}" {% if device == config.speaker %}selected{% endif %}>{{ device }}</option>
190 |             {% endfor %}
191 |         </select>
192 |         <p class="help-text">Choose the speaker device for audio output.</p>
193 | 
194 |         <br>
195 |         If you don't know what these are, don't change them :D
196 |         <label for="port">Port:</label>
197 |         <input type="number" id="port" name="port" value="{{ config['port'] }}" required>
198 |         <p class="help-text">Specify the port number for the application to run on.</p>
199 | 
200 |         <label for="maxtoken">Maximum Conversation Tokens:</label>
201 |         <input type="number" id="maxtoken" name="maxtoken" value="{{ config['maxtoken'] }}" required>
202 |         <p class="help-text">Set the maximum number of tokens allowed in a conversation.</p>
203 | 
204 |         <label for="maxmsg">Maximum Conversation Messages:</label>
205 |         <input type="number" id="maxmsg" name="maxmsg" value="{{ config['maxmsg'] }}" required>
206 |         <p class="help-text">Set the maximum number of messages allowed in a conversation history.</p>
207 | 
208 |         <br>
209 |         <button type="submit">Save</button>
210 |     </form>
211 |     <div id="message"></div>
212 | </div>
213 |     <script>
214 |         $(document).ready(function() {
215 |             $('#settingsForm').on('submit', function(e) {
216 |                 e.preventDefault();
217 |                 $.ajax({
218 |                     url: '/settings',
219 |                     method: 'POST',
220 |                     data: $(this).serialize(),
221 |                     success: function(response) {
222 |                         alert('Settings saved successfully!');
223 |                     },
224 |                     error: function() {
225 |                         alert('Error saving settings.');
226 |                     }
227 |                 });
228 |             });
229 | 
230 |             $('#voice').on('change', function() {
231 |                 $.ajax({
232 |                     url: '/test_voice',
233 |                     method: 'POST',
234 |                     data: { voice: $(this).val() },
235 |                     success: function(response) {
236 |                         console.log('Voice tested');
237 |                     },
238 |                     error: function() {
239 |                         console.error('Error testing voice');
240 |                     }
241 |                 });
242 |             });
243 |         });
244 |     </script>
245 | </body>
246 | </html>
247 | 


--------------------------------------------------------------------------------