├── .gitignore ├── LICENSE ├── README.md ├── Requirements.txt ├── examples └── voice_chat.py ├── sesame_ai ├── __init__.py ├── api.py ├── config.py ├── exceptions.py ├── models.py ├── token_manager.py └── websocket.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | 8 | # Distribution / packaging 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Virtual environments 26 | venv/ 27 | env/ 28 | ENV/ 29 | .env/ 30 | .venv/ 31 | 32 | # Testing 33 | .coverage 34 | htmlcov/ 35 | .pytest_cache/ 36 | .tox/ 37 | .nox/ 38 | 39 | # Documentation 40 | docs/_build/ 41 | site/ 42 | 43 | # IDE specific files 44 | .idea/ 45 | .vscode/ 46 | *.swp 47 | *.swo 48 | .DS_Store 49 | .spyderproject 50 | .spyproject 51 | .ropeproject 52 | 53 | # Project specific 54 | token.json 55 | *.log 56 | *.db 57 | *.sqlite3 58 | 59 | # Jupyter Notebook 60 | .ipynb_checkpoints 61 | 62 | # mypy 63 | .mypy_cache/ 64 | .dmypy.json 65 | dmypy.json 66 | 67 | # Environments 68 | .env 69 | .venv 70 | env/ 71 | venv/ 72 | ENV/ 73 | env.bak/ 74 | venv.bak/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 ijub 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sesame AI Python Client 2 | 3 | An unofficial Python client library for interacting with the [Sesame](https://www.sesame.com) voice conversation API. This package provides easy access to Sesame's voice-based AI characters, allowing developers to create applications with natural voice conversations. 4 | 5 | ## About Sesame 6 | 7 | Sesame is developing conversational AI with "voice presence" - the quality that makes spoken interactions feel real, understood, and valued. Their technology enables voice conversations with AI characters like Miles and Maya that feature emotional intelligence, natural conversational dynamics, and contextual awareness. 8 | 9 | ## Support 10 | 11 | If you find this project helpful, consider buying me a coffee! 12 | 13 | [![Buy Me A Coffee](https://img.shields.io/badge/Buy%20Me%20A%20Coffee-Support-yellow.svg)](https://buymeacoffee.com/ijub) 14 | 15 | ## Installation 16 | 17 | ```bash 18 | # From GitHub 19 | pip install git+https://github.com/ijub/sesame_ai.git 20 | 21 | # For development 22 | git clone https://github.com/ijub/sesame_ai.git 23 | cd sesame_ai 24 | pip install -e . 25 | ``` 26 | 27 | ## Features 28 | 29 | - Authentication and account management 30 | - WebSocket-based real-time voice conversations 31 | - Token management and refresh 32 | - Support for multiple AI characters (Miles, Maya) 33 | - Voice activity detection 34 | - Simple and intuitive API 35 | 36 | ## Available Characters 37 | 38 | The API supports multiple AI characters: 39 | 40 | - **Miles**: A male character (default) 41 | - **Maya**: A female character 42 | 43 | ## Quick Start 44 | 45 | ### Authentication 46 | 47 | ```python 48 | from sesame_ai import SesameAI, TokenManager 49 | 50 | # Create API client 51 | client = SesameAI() 52 | 53 | # Create an anonymous account 54 | signup_response = client.create_anonymous_account() 55 | print(f"ID Token: {signup_response.id_token}") 56 | 57 | # Look up account information 58 | lookup_response = client.get_account_info(signup_response.id_token) 59 | print(f"User ID: {lookup_response.local_id}") 60 | 61 | # For easier token management, use TokenManager 62 | token_manager = TokenManager(client, token_file="token.json") 63 | id_token = token_manager.get_valid_token() 64 | ``` 65 | 66 | ### Voice Chat Example 67 | 68 | ```python 69 | from sesame_ai import SesameAI, SesameWebSocket, TokenManager 70 | import pyaudio 71 | import threading 72 | import time 73 | import numpy as np 74 | 75 | # Get authentication token using TokenManager 76 | api_client = SesameAI() 77 | token_manager = TokenManager(api_client, token_file="token.json") 78 | id_token = token_manager.get_valid_token() 79 | 80 | # Connect to WebSocket (choose character: "Miles" or "Maya") 81 | ws = SesameWebSocket(id_token=id_token, character="Maya") 82 | 83 | # Set up connection callbacks 84 | def on_connect(): 85 | print("Connected to SesameAI!") 86 | 87 | def on_disconnect(): 88 | print("Disconnected from SesameAI") 89 | 90 | ws.set_connect_callback(on_connect) 91 | ws.set_disconnect_callback(on_disconnect) 92 | 93 | # Connect to the server 94 | ws.connect() 95 | 96 | # Audio settings 97 | CHUNK = 1024 98 | FORMAT = pyaudio.paInt16 99 | CHANNELS = 1 100 | RATE = 16000 101 | 102 | # Initialize PyAudio 103 | p = pyaudio.PyAudio() 104 | 105 | # Open microphone stream 106 | mic_stream = p.open(format=FORMAT, 107 | channels=CHANNELS, 108 | rate=RATE, 109 | input=True, 110 | frames_per_buffer=CHUNK) 111 | 112 | # Open speaker stream (using server's sample rate) 113 | speaker_stream = p.open(format=FORMAT, 114 | channels=CHANNELS, 115 | rate=ws.server_sample_rate, 116 | output=True) 117 | 118 | # Function to capture and send microphone audio 119 | def capture_microphone(): 120 | print("Microphone capture started...") 121 | try: 122 | while True: 123 | if ws.is_connected(): 124 | data = mic_stream.read(CHUNK, exception_on_overflow=False) 125 | ws.send_audio_data(data) 126 | else: 127 | time.sleep(0.1) 128 | except KeyboardInterrupt: 129 | print("Microphone capture stopped") 130 | 131 | # Function to play received audio 132 | def play_audio(): 133 | print("Audio playback started...") 134 | try: 135 | while True: 136 | audio_chunk = ws.get_next_audio_chunk(timeout=0.01) 137 | if audio_chunk: 138 | speaker_stream.write(audio_chunk) 139 | except KeyboardInterrupt: 140 | print("Audio playback stopped") 141 | 142 | # Start audio threads 143 | mic_thread = threading.Thread(target=capture_microphone) 144 | mic_thread.daemon = True 145 | mic_thread.start() 146 | 147 | playback_thread = threading.Thread(target=play_audio) 148 | playback_thread.daemon = True 149 | playback_thread.start() 150 | 151 | # Keep the main thread alive 152 | try: 153 | while True: 154 | time.sleep(1) 155 | except KeyboardInterrupt: 156 | print("Disconnecting...") 157 | ws.disconnect() 158 | mic_stream.stop_stream() 159 | mic_stream.close() 160 | speaker_stream.stop_stream() 161 | speaker_stream.close() 162 | p.terminate() 163 | ``` 164 | 165 | The package also includes a full-featured voice chat example that you can run: 166 | 167 | ```bash 168 | # Chat with Miles (default) 169 | python examples/voice_chat.py 170 | 171 | # Chat with Maya 172 | python examples/voice_chat.py --character Maya 173 | ``` 174 | 175 | Command-line options: 176 | - `--character`: Character to chat with (default: Miles, options: Miles, Maya) 177 | - `--input-device`: Input device index 178 | - `--output-device`: Output device index 179 | - `--list-devices`: List audio devices and exit 180 | - `--token-file`: Path to token storage file 181 | - `--debug`: Enable debug logging 182 | 183 | ## API Reference 184 | 185 | ### SesameAI 186 | 187 | The main API client for authentication. 188 | 189 | - `SesameAI(api_key=None)` - Create a new API client 190 | - `create_anonymous_account()` - Create an anonymous account 191 | - `get_account_info(id_token)` - Look up account information 192 | - `refresh_authentication_token(refresh_token)` - Refresh an ID token 193 | 194 | ### TokenManager 195 | 196 | Manages authentication tokens with automatic refresh and persistence. 197 | 198 | - `TokenManager(api_client=None, token_file=None)` - Create a token manager 199 | - `get_valid_token(force_new=False)` - Get a valid token, refreshing if needed 200 | - `clear_tokens()` - Clear stored tokens 201 | 202 | ### SesameWebSocket 203 | 204 | WebSocket client for real-time voice conversation. 205 | 206 | - `SesameWebSocket(id_token, character="Miles", client_name="RP-Web")` - Create a new WebSocket client 207 | - `connect(blocking=True)` - Connect to the server 208 | - `send_audio_data(raw_audio_bytes)` - Send raw audio data 209 | - `get_next_audio_chunk(timeout=None)` - Get the next audio chunk 210 | - `disconnect()` - Disconnect from the server 211 | - `is_connected()` - Check if connected 212 | 213 | ## Error Handling 214 | 215 | The library provides several exception classes for error handling: 216 | 217 | - `SesameAIError` - Base exception class 218 | - `InvalidTokenError` - Invalid token errors 219 | - `APIError` - API errors with code and message 220 | - `NetworkError` - Network communication errors 221 | 222 | Example: 223 | 224 | ```python 225 | from sesame_ai import SesameAI, InvalidTokenError, APIError, NetworkError 226 | 227 | client = SesameAI() 228 | 229 | try: 230 | # Try to use an invalid token 231 | client.get_account_info("invalid_token") 232 | except InvalidTokenError: 233 | print("The token is invalid or expired") 234 | except APIError as e: 235 | print(f"API error: {e.code} - {e.message}") 236 | except NetworkError as e: 237 | print(f"Network error: {e}") 238 | ``` 239 | 240 | ## Troubleshooting 241 | 242 | ### Audio Device Problems 243 | 244 | If you encounter audio device issues: 245 | 246 | 1. Use `--list-devices` to see available audio devices 247 | 2. Specify input/output devices with `--input-device` and `--output-device` 248 | 3. Ensure PyAudio is properly installed with all dependencies 249 | 250 | ### Audio Feedback Issues 251 | 252 | Currently, the voice chat example doesn't block audio coming from the AI (through your speakers) from being picked up by your microphone, which can cause feedback loops. For the best experience: 253 | 254 | 1. Use headphones to prevent the AI from hearing itself 255 | 2. Keep speaker volume at a moderate level 256 | 3. Position your microphone away from speakers if not using headphones 257 | 258 | **Note:** I'm working on updating the `voice_chat.py` example to implement echo cancellation and audio filtering to address this issue in a future update. 259 | 260 | ### Connection Issues 261 | 262 | If you have trouble connecting: 263 | 264 | 1. Check your internet connection 265 | 2. Verify your authentication token is valid 266 | 3. Ensure the SesameAI service is available 267 | 268 | ## Legal Disclaimer 269 | 270 | This is an unofficial API wrapper and is not affiliated with, maintained, authorized, endorsed, or sponsored by Sesame. or any of its affiliates. This wrapper is intended for personal, educational, and non-commercial use only. 271 | 272 | Users of this library assume all legal responsibility for its use. The author(s) are not responsible for any violations of Sesame Terms of Service or applicable laws. 273 | 274 | ## License 275 | 276 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 277 | 278 | ## Support 279 | 280 | If you find this project helpful, consider buying me a coffee! 281 | 282 | Buy Me A Coffee -------------------------------------------------------------------------------- /Requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | requests>=2.25.0 3 | websocket-client>=1.2.0 4 | numpy>=1.19.0 5 | PyAudio>=0.2.11 6 | 7 | # Optional dependencies for development 8 | pytest>=6.0.0 9 | black>=21.5b2 10 | flake8>=3.9.0 -------------------------------------------------------------------------------- /examples/voice_chat.py: -------------------------------------------------------------------------------- 1 | # examples/voice_chat.py 2 | 3 | 4 | """ 5 | SesameAI Voice Chat Example 6 | 7 | This example demonstrates how to use the SesameAI API for real-time voice conversations. 8 | It handles: 9 | - Authentication 10 | - WebSocket connection 11 | - Microphone input 12 | - Speaker output 13 | - Voice activity detection 14 | - Graceful disconnection 15 | 16 | Available characters: 17 | - Miles (default) 18 | - Maya 19 | """ 20 | 21 | import sys 22 | import os 23 | import time 24 | import threading 25 | import argparse 26 | import queue 27 | import logging 28 | import numpy as np 29 | import pyaudio 30 | from sesame_ai import SesameAI, SesameWebSocket, TokenManager, InvalidTokenError, NetworkError, APIError 31 | 32 | logger = logging.getLogger('sesame.examples.voice_chat') 33 | 34 | class VoiceChat: 35 | """Voice chat application using SesameAI""" 36 | 37 | # Available characters 38 | AVAILABLE_CHARACTERS = ["Miles", "Maya"] 39 | 40 | def __init__(self, character="Miles", input_device=None, output_device=None, 41 | token_file=None): 42 | """ 43 | Initialize the voice chat application 44 | 45 | Args: 46 | character (str): Character to chat with ("Miles" or "Maya") 47 | input_device (int, optional): Input device index 48 | output_device (int, optional): Output device index 49 | use_saved_token (bool): Whether to use a saved token 50 | force_new_token (bool): Whether to force creation of a new token 51 | token_file (str, optional): Path to token storage file. If None, tokens won't be saved. 52 | """ 53 | # Validate character 54 | if character not in self.AVAILABLE_CHARACTERS: 55 | print(f"Warning: '{character}' is not in the list of known characters. Using anyway.") 56 | print(f"Known characters: {', '.join(self.AVAILABLE_CHARACTERS)}") 57 | 58 | self.character = character 59 | self.input_device_index = input_device 60 | self.output_device_index = output_device 61 | self.token_file = token_file 62 | 63 | # Audio settings 64 | self.chunk_size = 1024 65 | self.sample_format = pyaudio.paInt16 66 | self.channels = 1 67 | self.input_rate = 16000 68 | self.output_rate = 24000 # Will be updated from server 69 | 70 | # Voice activity detection 71 | self.amplitude_threshold = 500 72 | self.silence_counter = 0 73 | self.silence_limit = 50 # Number of consecutive silent chunks before sending silence 74 | 75 | # PyAudio instance 76 | self.p = pyaudio.PyAudio() 77 | 78 | # Streams 79 | self.input_stream = None 80 | self.output_stream = None 81 | 82 | # SesameAI client 83 | self.api_client = SesameAI() 84 | 85 | # Initialize token manager with token_file (which may be None) 86 | self.token_manager = TokenManager(self.api_client, token_file=self.token_file) 87 | 88 | self.id_token = None 89 | self.ws = None 90 | 91 | # Thread control 92 | self.running = False 93 | self.threads = [] 94 | 95 | # Logging 96 | logger.debug(f"VoiceChat initialized with character: {character}") 97 | 98 | def authenticate(self): 99 | """Authenticate with SesameAI and get a token""" 100 | logger.info("Authenticating with SesameAI...") 101 | try: 102 | # If no token file is specified, force a new token 103 | force_new = self.token_file is None 104 | 105 | # Get a valid token using the token manager 106 | self.id_token = self.token_manager.get_valid_token(force_new=force_new) 107 | logger.info("Authentication successful!") 108 | return True 109 | except InvalidTokenError: 110 | logger.error("Authentication failed: Token expired and couldn't be refreshed") 111 | return False 112 | except (NetworkError, APIError) as e: 113 | logger.error(f"Authentication failed: {e}") 114 | return False 115 | 116 | def list_audio_devices(self): 117 | """List available audio devices""" 118 | logger.info("Listing available audio devices") 119 | print("\nAvailable audio devices:") 120 | print("-" * 60) 121 | 122 | for i in range(self.p.get_device_count()): 123 | dev_info = self.p.get_device_info_by_index(i) 124 | name = dev_info.get('name', 'Unknown') 125 | inputs = dev_info.get('maxInputChannels', 0) 126 | outputs = dev_info.get('maxOutputChannels', 0) 127 | 128 | if inputs > 0: 129 | print(f"ID {i}: {name} (Input)") 130 | if outputs > 0: 131 | print(f"ID {i}: {name} (Output)") 132 | 133 | print("-" * 60) 134 | 135 | def select_devices(self): 136 | """Select input and output devices""" 137 | self.list_audio_devices() 138 | 139 | # If devices weren't specified in constructor, ask user 140 | if self.input_device_index is None: 141 | try: 142 | self.input_device_index = int(input("Select input device ID: ")) 143 | logger.debug(f"Selected input device ID: {self.input_device_index}") 144 | except ValueError: 145 | logger.warning("Invalid input. Using default device.") 146 | self.input_device_index = None 147 | 148 | if self.output_device_index is None: 149 | try: 150 | self.output_device_index = int(input("Select output device ID: ")) 151 | logger.debug(f"Selected output device ID: {self.output_device_index}") 152 | except ValueError: 153 | logger.warning("Invalid input. Using default device.") 154 | self.output_device_index = None 155 | 156 | def on_connect(self): 157 | """Callback when WebSocket connection is established""" 158 | logger.info(f"Connected to {self.character}!") 159 | # Update output rate from server 160 | self.output_rate = self.ws.server_sample_rate 161 | logger.debug(f"Server sample rate: {self.output_rate}") 162 | 163 | # Initialize audio streams after connection 164 | self.setup_audio_streams() 165 | 166 | # Start audio threads 167 | self.start_audio_threads() 168 | 169 | def on_disconnect(self): 170 | """Callback when WebSocket connection is disconnected""" 171 | logger.info(f"Disconnected from {self.character}") 172 | 173 | # Stop the application if it's still running 174 | if self.running: 175 | self.stop() 176 | 177 | def connect(self): 178 | """Connect to SesameAI WebSocket""" 179 | logger.info(f"Connecting to SesameAI as character '{self.character}'...") 180 | 181 | # Create WebSocket client 182 | self.ws = SesameWebSocket( 183 | id_token=self.id_token, 184 | character=self.character 185 | ) 186 | 187 | # Set up callbacks 188 | self.ws.set_connect_callback(self.on_connect) 189 | self.ws.set_disconnect_callback(self.on_disconnect) 190 | 191 | # Connect to server 192 | if self.ws.connect(): 193 | logger.debug("WebSocket connection established") 194 | return True 195 | else: 196 | logger.error("Failed to connect to SesameAI") 197 | return False 198 | 199 | def setup_audio_streams(self): 200 | """Set up audio input and output streams""" 201 | logger.debug("Setting up audio streams") 202 | 203 | # Input stream (microphone) 204 | self.input_stream = self.p.open( 205 | format=self.sample_format, 206 | channels=self.channels, 207 | rate=self.input_rate, 208 | input=True, 209 | frames_per_buffer=self.chunk_size, 210 | input_device_index=self.input_device_index 211 | ) 212 | 213 | # Output stream (speaker) 214 | self.output_stream = self.p.open( 215 | format=self.sample_format, 216 | channels=self.channels, 217 | rate=self.output_rate, 218 | output=True, 219 | output_device_index=self.output_device_index 220 | ) 221 | 222 | logger.debug("Audio streams initialized") 223 | 224 | def capture_microphone(self): 225 | """Capture audio from microphone and send to SesameAI""" 226 | logger.debug("Microphone capture started") 227 | 228 | while self.running: 229 | if not self.ws.is_connected(): 230 | time.sleep(0.1) 231 | continue 232 | 233 | try: 234 | # Read audio data from microphone 235 | data = self.input_stream.read(self.chunk_size, exception_on_overflow=False) 236 | 237 | # Check audio level for voice activity detection 238 | audio_samples = np.frombuffer(data, dtype=np.int16) 239 | rms_val = np.sqrt(np.mean(audio_samples.astype(np.float32) ** 2)) 240 | 241 | if rms_val > self.amplitude_threshold: 242 | # Voice detected 243 | self.silence_counter = 0 244 | self.ws.send_audio_data(data) 245 | else: 246 | # Silence detected 247 | self.silence_counter += 1 248 | if self.silence_counter >= self.silence_limit: 249 | # Send completely silent audio after silence threshold 250 | # This is more efficient than sending the actual low-level audio 251 | silent_data = np.zeros(self.chunk_size, dtype=np.int16).tobytes() 252 | self.ws.send_audio_data(silent_data) 253 | else: 254 | # Continue sending actual audio during brief pauses 255 | self.ws.send_audio_data(data) 256 | except Exception as e: 257 | if self.running: 258 | logger.error(f"Error capturing microphone: {e}", exc_info=True) 259 | time.sleep(0.1) 260 | 261 | def play_audio(self): 262 | """Play audio received from SesameAI""" 263 | logger.debug("Audio playback started") 264 | 265 | while self.running: 266 | try: 267 | # Get audio chunk from WebSocket buffer with a short timeout 268 | audio_chunk = self.ws.get_next_audio_chunk(timeout=0.01) 269 | if audio_chunk: 270 | # Play audio immediately when received 271 | self.output_stream.write(audio_chunk) 272 | except Exception as e: 273 | if self.running: 274 | logger.error(f"Error playing audio: {e}", exc_info=True) 275 | 276 | def start_audio_threads(self): 277 | """Start audio capture and playback threads""" 278 | # Microphone capture thread 279 | mic_thread = threading.Thread(target=self.capture_microphone) 280 | mic_thread.daemon = True 281 | mic_thread.start() 282 | self.threads.append(mic_thread) 283 | 284 | # Audio playback thread 285 | playback_thread = threading.Thread(target=self.play_audio) 286 | playback_thread.daemon = True 287 | playback_thread.start() 288 | self.threads.append(playback_thread) 289 | 290 | logger.debug("Audio threads started") 291 | 292 | def start(self): 293 | """Start the voice chat""" 294 | # Authenticate 295 | if not self.authenticate(): 296 | return False 297 | 298 | # Select audio devices 299 | self.select_devices() 300 | 301 | # Set running flag 302 | self.running = True 303 | 304 | # Connect to WebSocket (will trigger on_connect callback) 305 | if not self.connect(): 306 | self.running = False 307 | return False 308 | 309 | logger.info(f"Voice chat with {self.character} started! Press Ctrl+C to exit.") 310 | return True 311 | 312 | def stop(self): 313 | """Stop the voice chat""" 314 | if not self.running: 315 | return 316 | 317 | self.running = False 318 | logger.info("Stopping voice chat...") 319 | 320 | # Disconnect from WebSocket 321 | if self.ws and self.ws.is_connected(): 322 | self.ws.disconnect() 323 | 324 | # Close audio streams 325 | if self.input_stream: 326 | self.input_stream.stop_stream() 327 | self.input_stream.close() 328 | 329 | if self.output_stream: 330 | self.output_stream.stop_stream() 331 | self.output_stream.close() 332 | 333 | # Terminate PyAudio 334 | self.p.terminate() 335 | 336 | logger.info("Voice chat stopped") 337 | 338 | def run(self): 339 | """Run the voice chat application""" 340 | try: 341 | if self.start(): 342 | # Keep main thread alive 343 | while self.running: 344 | time.sleep(0.1) 345 | except KeyboardInterrupt: 346 | logger.debug("Interrupted by user") 347 | finally: 348 | self.stop() 349 | 350 | 351 | 352 | def main(): 353 | """Main function""" 354 | # Configure logging 355 | logging.basicConfig( 356 | level=logging.INFO, 357 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 358 | datefmt='%Y-%m-%d %H:%M:%S' 359 | ) 360 | 361 | # Set websocket-client logger to DEBUG level 362 | logging.getLogger('websocket').setLevel(logging.WARNING) 363 | 364 | parser = argparse.ArgumentParser(description="SesameAI Voice Chat Example") 365 | parser.add_argument("--character", default="Miles", choices=VoiceChat.AVAILABLE_CHARACTERS, 366 | help=f"Character to chat with (default: Miles, options: {', '.join(VoiceChat.AVAILABLE_CHARACTERS)})") 367 | parser.add_argument("--input-device", type=int, help="Input device index") 368 | parser.add_argument("--output-device", type=int, help="Output device index") 369 | parser.add_argument("--list-devices", action="store_true", help="List audio devices and exit") 370 | parser.add_argument("--token-file", help="Path to token storage file") 371 | parser.add_argument("--debug", action="store_true", help="Enable debug logging") 372 | 373 | args = parser.parse_args() 374 | 375 | # Set debug level if requested 376 | if args.debug: 377 | logging.getLogger('sesame').setLevel(logging.DEBUG) 378 | 379 | # Create voice chat instance 380 | voice_chat = VoiceChat( 381 | character=args.character, 382 | input_device=args.input_device, 383 | output_device=args.output_device, 384 | token_file=args.token_file 385 | ) 386 | 387 | # List devices and exit if requested 388 | if args.list_devices: 389 | voice_chat.list_audio_devices() 390 | return 391 | 392 | # Run the voice chat 393 | voice_chat.run() 394 | 395 | if __name__ == "__main__": 396 | main() -------------------------------------------------------------------------------- /sesame_ai/__init__.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/__init__.py 2 | 3 | from .api import SesameAI 4 | from .websocket import SesameWebSocket 5 | from .exceptions import SesameAIError, AuthenticationError, APIError, InvalidTokenError, NetworkError 6 | from .models import SignupResponse, LookupResponse, RefreshTokenResponse 7 | from .token_manager import TokenManager 8 | 9 | 10 | __version__ = "0.1.0" 11 | __author__ = "ijub" 12 | __license__ = "MIT" 13 | 14 | # Export public classes and functions 15 | __all__ = [ 16 | 'SesameAI', 17 | 'SesameWebSocket', 18 | 'TokenManager', 19 | 'SesameAIError', 20 | 'AuthenticationError', 21 | 'APIError', 22 | 'InvalidTokenError', 23 | 'NetworkError', 24 | 'SignupResponse', 25 | 'LookupResponse', 26 | 'RefreshTokenResponse', 27 | ] -------------------------------------------------------------------------------- /sesame_ai/api.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/api.py 2 | 3 | import requests 4 | from .config import get_headers, get_params, get_endpoint_url 5 | from .models import SignupResponse, LookupResponse, RefreshTokenResponse 6 | from .exceptions import APIError, InvalidTokenError, NetworkError 7 | 8 | class SesameAI: 9 | """ 10 | SesameAI API Client - Unofficial Python client for the SesameAI API 11 | 12 | Provides authentication and account management functionality for SesameAI services. 13 | """ 14 | 15 | def __init__(self, api_key=None): 16 | """ 17 | Initialize the SesameAI API client 18 | 19 | Args: 20 | api_key (str, optional): Firebase API key. If not provided, 21 | will use the default key from config. 22 | """ 23 | self.api_key = api_key 24 | 25 | def _make_auth_request(self, request_type, payload, is_form_data=False): 26 | """ 27 | Make a request to the Firebase Authentication API 28 | 29 | Args: 30 | request_type (str): Type of request ('signup', 'lookup', etc.) 31 | payload (dict): Request payload 32 | is_form_data (bool): Whether payload should be sent as form data 33 | 34 | Returns: 35 | dict: API response as JSON 36 | 37 | Raises: 38 | NetworkError: If a network error occurs 39 | APIError: If the API returns an error response 40 | InvalidTokenError: If a token is invalid 41 | """ 42 | headers = get_headers(request_type) 43 | params = get_params(request_type, self.api_key) 44 | url = get_endpoint_url(request_type) 45 | 46 | try: 47 | if is_form_data: 48 | response = requests.post( 49 | url, 50 | params=params, 51 | headers=headers, 52 | data=payload, 53 | ) 54 | else: 55 | response = requests.post( 56 | url, 57 | params=params, 58 | headers=headers, 59 | json=payload, 60 | ) 61 | 62 | # Check for HTTP errors 63 | response.raise_for_status() 64 | 65 | # Parse the response 66 | response_json = response.json() 67 | 68 | # Check for API errors 69 | if 'error' in response_json: 70 | self._handle_api_error(response_json['error']) 71 | 72 | return response_json 73 | 74 | except requests.exceptions.RequestException as e: 75 | raise NetworkError(f"Network error: {str(e)}") 76 | 77 | def _handle_api_error(self, error): 78 | """ 79 | Handle API error responses 80 | 81 | Args: 82 | error (dict): Error information from API 83 | 84 | Raises: 85 | InvalidTokenError: If a token is invalid 86 | APIError: For other API errors 87 | """ 88 | error_code = error.get('code', 400) 89 | error_message = error.get('message', 'Unknown error') 90 | error_details = error.get('errors', []) 91 | 92 | # Handle specific error types 93 | if error_message in ('INVALID_ID_TOKEN', 'INVALID_REFRESH_TOKEN'): 94 | raise InvalidTokenError() 95 | 96 | # Generic API error 97 | raise APIError(error_code, error_message, error_details) 98 | 99 | def create_anonymous_account(self): 100 | """ 101 | Create an anonymous account 102 | 103 | Returns: 104 | SignupResponse: Object containing authentication tokens 105 | 106 | Raises: 107 | NetworkError: If a network error occurs 108 | APIError: If the API returns an error response 109 | """ 110 | payload = { 111 | 'returnSecureToken': True, 112 | } 113 | response_json = self._make_auth_request('signup', payload) 114 | return SignupResponse(response_json) 115 | 116 | def refresh_authentication_token(self, refresh_token): 117 | """ 118 | Refresh an ID token using a refresh token 119 | 120 | Args: 121 | refresh_token (str): Firebase refresh token 122 | 123 | Returns: 124 | RefreshTokenResponse: Object containing new tokens 125 | 126 | Raises: 127 | NetworkError: If a network error occurs 128 | APIError: If the API returns an error response 129 | InvalidTokenError: If the refresh token is invalid 130 | """ 131 | payload = { 132 | 'grant_type': 'refresh_token', 133 | 'refresh_token': refresh_token 134 | } 135 | 136 | response_json = self._make_auth_request('refresh', payload, is_form_data=True) 137 | return RefreshTokenResponse(response_json) 138 | 139 | def get_account_info(self, id_token): 140 | """ 141 | Get account information using an ID token 142 | 143 | Args: 144 | id_token (str): Firebase ID token 145 | 146 | Returns: 147 | LookupResponse: Object containing account information 148 | 149 | Raises: 150 | NetworkError: If a network error occurs 151 | APIError: If the API returns an error response 152 | InvalidTokenError: If the ID token is invalid 153 | """ 154 | payload = { 155 | 'idToken': id_token 156 | } 157 | 158 | response_json = self._make_auth_request('lookup', payload) 159 | return LookupResponse(response_json) 160 | -------------------------------------------------------------------------------- /sesame_ai/config.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/config.py 2 | 3 | import json 4 | import base64 5 | from datetime import datetime 6 | 7 | # Default Firebase API key 8 | DEFAULT_API_KEY = "AIzaSyDtC7Uwb5pGAsdmrH2T4Gqdk5Mga07jYPM" 9 | 10 | # API endpoints 11 | FIREBASE_AUTH_BASE_URL = "https://identitytoolkit.googleapis.com/v1/accounts" 12 | FIREBASE_TOKEN_URL = "https://securetoken.googleapis.com/v1/token" 13 | 14 | def get_firebase_client_header(): 15 | """ 16 | Generate the x-firebase-client header value 17 | 18 | Returns: 19 | str: Base64 encoded Firebase client info 20 | """ 21 | x_firebase_client = { 22 | "version": 2, 23 | "heartbeats": [ 24 | { 25 | "agent": "fire-core/0.11.1 fire-core-esm2017/0.11.1 fire-js/ fire-js-all-app/11.3.1 fire-auth/1.9.0 fire-auth-esm2017/1.9.0", 26 | "dates": [f"{datetime.now().strftime('%Y-%m-%d')}"] 27 | } 28 | ] 29 | } 30 | x_firebase_client_json = json.dumps(x_firebase_client, separators=(",", ":")) 31 | return base64.b64encode(x_firebase_client_json.encode()).decode() 32 | 33 | def get_user_agent(): 34 | """ 35 | Get the standard user agent string 36 | 37 | Returns: 38 | str: User agent string 39 | """ 40 | return 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36' 41 | 42 | def get_headers(request_type): 43 | """ 44 | Get headers for API requests 45 | 46 | Args: 47 | request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.) 48 | 49 | Returns: 50 | dict: Headers for the request 51 | """ 52 | common_headers = { 53 | 'accept': '*/*', 54 | 'accept-language': 'en-US,en;q=0.9', 55 | 'content-type': 'application/json', 56 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36', 57 | 'x-firebase-client': get_firebase_client_header(), 58 | 'x-client-data': 'COKQywE=', 59 | 'x-client-version': 'Chrome/JsCore/11.3.1/FirebaseCore-web', 60 | 'x-firebase-gmpid': '1:1072000975600:web:75b0bf3a9bb8d92e767835', 61 | } 62 | 63 | # Add request-specific headers if needed 64 | if request_type == 'signup': 65 | return common_headers 66 | elif request_type == 'lookup': 67 | return common_headers 68 | elif request_type == 'refresh': 69 | return common_headers 70 | else: 71 | return common_headers 72 | 73 | def get_params(request_type, api_key=None): 74 | """ 75 | Get URL parameters for API requests 76 | 77 | Args: 78 | request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.) 79 | api_key (str, optional): API key to use. If None, uses default key. 80 | 81 | Returns: 82 | dict: URL parameters for the request 83 | """ 84 | # Use provided API key or fall back to default 85 | key = api_key if api_key else DEFAULT_API_KEY 86 | 87 | common_params = { 88 | 'key': key, 89 | } 90 | 91 | # Add request-specific parameters if needed 92 | if request_type == 'signup': 93 | return common_params 94 | elif request_type == 'lookup': 95 | return common_params 96 | elif request_type == 'refresh': 97 | return common_params 98 | else: 99 | return common_params 100 | 101 | def get_endpoint_url(request_type): 102 | """ 103 | Get the full URL for a specific request type 104 | 105 | Args: 106 | request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.) 107 | 108 | Returns: 109 | str: Full URL for the request 110 | """ 111 | if request_type == 'refresh': 112 | return FIREBASE_TOKEN_URL 113 | else: 114 | endpoint = 'signUp' if request_type == 'signup' else request_type 115 | return f"{FIREBASE_AUTH_BASE_URL}:{endpoint}" -------------------------------------------------------------------------------- /sesame_ai/exceptions.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/exceptions.py 2 | 3 | class SesameAIError(Exception): 4 | """Base exception for SesameAI API errors""" 5 | pass 6 | 7 | 8 | class AuthenticationError(SesameAIError): 9 | """Raised when authentication fails (invalid tokens, etc.)""" 10 | pass 11 | 12 | 13 | class APIError(SesameAIError): 14 | """Raised when the API returns an error response""" 15 | 16 | def __init__(self, code, message, errors=None): 17 | """ 18 | Initialize with error details 19 | 20 | Args: 21 | code (int): Error code 22 | message (str): Error message 23 | errors (list, optional): Detailed error information 24 | """ 25 | self.code = code 26 | self.message = message 27 | self.errors = errors or [] 28 | super().__init__(f"API Error {code}: {message}") 29 | 30 | 31 | class InvalidTokenError(AuthenticationError): 32 | """Raised when an ID token is invalid or expired""" 33 | 34 | def __init__(self): 35 | super().__init__("Invalid or expired ID token") 36 | 37 | 38 | class NetworkError(SesameAIError): 39 | """Raised when network communication fails""" 40 | pass -------------------------------------------------------------------------------- /sesame_ai/models.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/models.py 2 | 3 | class BaseResponse: 4 | """Base class for API responses""" 5 | 6 | def __init__(self, response_json): 7 | """ 8 | Initialize with raw JSON response 9 | 10 | Args: 11 | response_json (dict): Raw JSON response from API 12 | """ 13 | self.raw_response = response_json 14 | 15 | def __repr__(self): 16 | """String representation of the response object""" 17 | class_name = self.__class__.__name__ 18 | attributes = ', '.join(f"{k}={v}" for k, v in self.__dict__.items() 19 | if k != 'raw_response' and not k.startswith('_')) 20 | return f"{class_name}({attributes})" 21 | 22 | 23 | 24 | class SignupResponse(BaseResponse): 25 | """Response from the signup endpoint""" 26 | 27 | def __init__(self, response_json): 28 | """ 29 | Initialize with signup response data 30 | 31 | Args: 32 | response_json (dict): Raw JSON response from API 33 | """ 34 | super().__init__(response_json) 35 | self.kind = response_json.get('kind') 36 | self.id_token = response_json.get('idToken') 37 | self.refresh_token = response_json.get('refreshToken') 38 | self.expires_in = response_json.get('expiresIn') 39 | self.local_id = response_json.get('localId') 40 | 41 | class RefreshTokenResponse(BaseResponse): 42 | """Response from the token refresh endpoint""" 43 | 44 | def __init__(self, response_json): 45 | """ 46 | Initialize with refresh token response data 47 | 48 | Args: 49 | response_json (dict): Raw JSON response from API 50 | """ 51 | super().__init__(response_json) 52 | self.access_token = response_json.get('access_token') 53 | self.expires_in = response_json.get('expires_in') 54 | self.token_type = response_json.get('token_type') 55 | self.refresh_token = response_json.get('refresh_token') 56 | self.id_token = response_json.get('id_token') 57 | self.user_id = response_json.get('user_id') 58 | self.project_id = response_json.get('project_id') 59 | 60 | class LookupResponse(BaseResponse): 61 | """Response from the account lookup endpoint""" 62 | 63 | def __init__(self, response_json): 64 | """ 65 | Initialize with lookup response data 66 | 67 | Args: 68 | response_json (dict): Raw JSON response from API 69 | """ 70 | super().__init__(response_json) 71 | self.kind = response_json.get('kind') 72 | 73 | # Extract users data if available 74 | users = response_json.get('users', []) 75 | if users and len(users) > 0: 76 | user = users[0] 77 | self.local_id = user.get('localId') 78 | self.last_login_at = user.get('lastLoginAt') 79 | self.created_at = user.get('createdAt') 80 | self.last_refresh_at = user.get('lastRefreshAt') -------------------------------------------------------------------------------- /sesame_ai/token_manager.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/token_manager.py 2 | 3 | import os 4 | import json 5 | import time 6 | import logging 7 | from .api import SesameAI 8 | from .exceptions import InvalidTokenError, NetworkError, APIError 9 | 10 | logger = logging.getLogger('sesame.token_manager') 11 | 12 | class TokenManager: 13 | """ 14 | Manages authentication tokens for SesameAI API 15 | 16 | Handles: 17 | - Token storage and retrieval 18 | - Token validation 19 | - Automatic token refresh 20 | """ 21 | 22 | def __init__(self, api_client=None, token_file=None): 23 | """ 24 | Initialize the token manager 25 | 26 | Args: 27 | api_client (SesameAI, optional): API client instance. If None, creates a new one. 28 | token_file (str, optional): Path to token storage file. 29 | """ 30 | self.api_client = api_client if api_client else SesameAI() 31 | self.token_file = token_file if token_file else None 32 | self.tokens = self._load_tokens() 33 | 34 | def _load_tokens(self): 35 | """ 36 | Load tokens from storage file 37 | 38 | Returns: 39 | dict: Token data or empty dict if file doesn't exist 40 | """ 41 | if self.token_file and os.path.exists(self.token_file): 42 | try: 43 | with open(self.token_file, 'r') as f: 44 | logger.debug(f"Loading tokens from {self.token_file}") 45 | return json.load(f) 46 | except (json.JSONDecodeError, IOError) as e: 47 | logger.warning(f"Failed to load tokens: {e}") 48 | return {} 49 | return {} 50 | 51 | def _save_tokens(self): 52 | """Save tokens to storage file""" 53 | try: 54 | # If no token file is specified, return early 55 | if self.token_file is None: 56 | return 57 | 58 | # Make sure the directory exists 59 | directory = os.path.dirname(self.token_file) 60 | if directory: # Only try to create directory if there is one 61 | os.makedirs(directory, exist_ok=True) 62 | 63 | # Write the tokens to the file 64 | with open(self.token_file, 'w') as f: 65 | logger.debug(f"Saving tokens to {self.token_file}") 66 | json.dump(self.tokens, f) 67 | logger.debug(f"Tokens successfully saved to {self.token_file}") 68 | except Exception as e: 69 | logger.warning(f"Could not save tokens: {e}", exc_info=True) 70 | 71 | def _is_token_expired(self, id_token): 72 | """ 73 | Check if an ID token is expired 74 | 75 | Args: 76 | id_token (str): Firebase ID token 77 | 78 | Returns: 79 | bool: True if token is expired or invalid 80 | """ 81 | try: 82 | # Try to look up the token 83 | self.api_client.get_account_info(id_token) 84 | return False 85 | except InvalidTokenError: 86 | return True 87 | except (NetworkError, APIError) as e: 88 | # If lookup fails, raise the error 89 | raise e 90 | 91 | def get_valid_token(self, force_new=False): 92 | """ 93 | Get a valid ID token, refreshing if necessary 94 | 95 | Args: 96 | force_new (bool): If True, creates a new account regardless of existing tokens 97 | 98 | Returns: 99 | str: Valid ID token 100 | 101 | Raises: 102 | InvalidTokenError: If token refresh fails 103 | NetworkError: If a network error occurs 104 | APIError: If the API returns an error 105 | """ 106 | # If force_new is True, create a new account 107 | if force_new: 108 | logger.debug("Forcing creation of new account") 109 | return self._create_new_account() 110 | 111 | # Check if we have an existing ID token 112 | id_token = self.tokens.get('id_token') 113 | refresh_token = self.tokens.get('refresh_token') 114 | 115 | if id_token: 116 | # Check if the token is still valid 117 | try: 118 | logger.debug("Checking if existing token is valid") 119 | if not self._is_token_expired(id_token): 120 | logger.info("Using existing valid token") 121 | return id_token 122 | except (NetworkError, APIError) as e: 123 | logger.warning(f"Error checking token validity: {e}") 124 | # If we can't check, assume it's still valid 125 | return id_token 126 | 127 | # Token is expired, try to refresh 128 | if refresh_token: 129 | try: 130 | logger.info("Refreshing expired token") 131 | refresh_response = self.api_client.refresh_authentication_token(refresh_token) 132 | 133 | # Update tokens 134 | self.tokens = { 135 | 'id_token': refresh_response.id_token, 136 | 'refresh_token': refresh_response.refresh_token, 137 | 'user_id': refresh_response.user_id, 138 | 'expires_in': refresh_response.expires_in, 139 | 'timestamp': int(time.time()) 140 | } 141 | self._save_tokens() 142 | 143 | logger.info("Token refreshed successfully") 144 | return refresh_response.id_token 145 | except (InvalidTokenError, NetworkError, APIError) as e: 146 | logger.error(f"Token refresh failed: {e}") 147 | raise InvalidTokenError("Token refresh failed") 148 | else: 149 | logger.warning("Token expired and no refresh token available") 150 | raise InvalidTokenError("Token expired and no refresh token available") 151 | else: 152 | # No existing token, create a new account 153 | logger.debug("No existing token, creating new account") 154 | return self._create_new_account() 155 | 156 | def _create_new_account(self): 157 | """ 158 | Create a new anonymous account 159 | 160 | Returns: 161 | str: New ID token 162 | 163 | Raises: 164 | NetworkError: If a network error occurs 165 | APIError: If the API returns an error 166 | """ 167 | logger.debug("Creating new anonymous account") 168 | signup_response = self.api_client.create_anonymous_account() 169 | 170 | # Save the new tokens 171 | self.tokens = { 172 | 'id_token': signup_response.id_token, 173 | 'refresh_token': signup_response.refresh_token, 174 | 'user_id': signup_response.local_id, 175 | 'expires_in': signup_response.expires_in, 176 | 'timestamp': int(time.time()) 177 | } 178 | self._save_tokens() 179 | 180 | logger.debug("New account created successfully") 181 | return signup_response.id_token 182 | 183 | def clear_tokens(self): 184 | """Clear stored tokens""" 185 | logger.info("Clearing stored tokens") 186 | self.tokens = {} 187 | self._save_tokens() -------------------------------------------------------------------------------- /sesame_ai/websocket.py: -------------------------------------------------------------------------------- 1 | # sesame_ai/websocket.py 2 | 3 | import json 4 | import base64 5 | import uuid 6 | import ssl 7 | import urllib.parse 8 | import threading 9 | import queue 10 | import time 11 | import logging 12 | import websocket as websocket_module 13 | 14 | logger = logging.getLogger('sesame.websocket') 15 | 16 | class SesameWebSocket: 17 | """ 18 | WebSocket client for real-time communication with SesameAI 19 | """ 20 | 21 | def __init__(self, id_token, character="Miles", client_name="RP-Web"): 22 | """ 23 | Initialize the WebSocket client 24 | 25 | Args: 26 | id_token (str): Firebase ID token for authentication 27 | character (str, optional): Character to interact with. Defaults to "Miles". 28 | client_name (str, optional): Client identifier. Defaults to "RP-Web". 29 | """ 30 | self.id_token = id_token 31 | self.character = character 32 | self.client_name = client_name 33 | 34 | # WebSocket connection 35 | self.ws = None 36 | self.session_id = None 37 | self.call_id = None 38 | 39 | # Audio settings 40 | self.client_sample_rate = 16000 41 | self.server_sample_rate = 24000 # Default, will be updated from server 42 | self.audio_codec = "none" 43 | 44 | # Connection state 45 | self.reconnect = False 46 | self.is_private = False 47 | self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36" 48 | 49 | # Audio buffer for received audio 50 | self.audio_buffer = queue.Queue(maxsize=1000) 51 | 52 | # Message tracking 53 | self.last_sent_message_type = None 54 | self.received_since_last_sent = False 55 | self.first_audio_received = False 56 | 57 | # Event for tracking connection state 58 | self.connected_event = threading.Event() 59 | 60 | # Callbacks 61 | self.on_connect_callback = None 62 | self.on_disconnect_callback = None 63 | 64 | def connect(self, blocking=True): 65 | """ 66 | Connect to the SesameAI WebSocket server 67 | 68 | Args: 69 | blocking (bool, optional): If True, blocks until connected. Defaults to True. 70 | 71 | Returns: 72 | bool: True if connection was successful 73 | """ 74 | # Reset connection state 75 | self.connected_event.clear() 76 | 77 | # Start connection in a separate thread 78 | connection_thread = threading.Thread(target=self._connect_websocket) 79 | connection_thread.daemon = True 80 | connection_thread.start() 81 | 82 | if blocking: 83 | # Wait for connection to be established 84 | return self.connected_event.wait(timeout=10) 85 | 86 | return True 87 | 88 | def _connect_websocket(self): 89 | """Internal method to establish WebSocket connection""" 90 | headers = { 91 | 'Origin': 'https://www.sesame.com', 92 | 'User-Agent': self.user_agent, 93 | } 94 | 95 | params = { 96 | 'id_token': self.id_token, 97 | 'client_name': self.client_name, 98 | 'usercontext': json.dumps({"timezone": "America/Chicago"}), 99 | 'character': self.character, 100 | } 101 | 102 | # Construct the WebSocket URL with query parameters 103 | base_url = 'wss://sesameai.app/agent-service-0/v1/connect' 104 | 105 | # Convert params to URL query string 106 | query_string = '&'.join([f"{key}={urllib.parse.quote(value)}" for key, value in params.items()]) 107 | ws_url = f"{base_url}?{query_string}" 108 | 109 | # Create WebSocket connection 110 | self.ws = websocket_module.WebSocketApp( 111 | ws_url, 112 | header=headers, 113 | on_open=self._on_open, 114 | on_message=self._on_message, 115 | on_error=self._on_error, 116 | on_close=self._on_close 117 | ) 118 | 119 | # Run the WebSocket 120 | self.ws.run_forever( 121 | sslopt={"cert_reqs": ssl.CERT_NONE}, 122 | skip_utf8_validation=True, 123 | suppress_origin=False 124 | ) 125 | 126 | def _on_open(self, ws): 127 | """Callback when WebSocket connection is opened""" 128 | logger.debug("WebSocket connection opened") 129 | 130 | def _on_message(self, ws, message): 131 | """Callback when a message is received from the WebSocket""" 132 | try: 133 | # Parse the message as JSON 134 | data = json.loads(message) 135 | 136 | # Handle different message types 137 | message_type = data.get('type') 138 | 139 | if message_type == 'initialize': 140 | self._handle_initialize(data) 141 | elif message_type == 'call_connect_response': 142 | self._handle_call_connect_response(data) 143 | elif message_type == 'ping_response': 144 | self._handle_ping_response(data) 145 | elif message_type == 'audio': 146 | self._handle_audio(data) 147 | elif message_type == 'call_disconnect_response': 148 | self._handle_call_disconnect_response(data) 149 | else: 150 | logger.debug(f"Received message type: {message_type}") 151 | 152 | except json.JSONDecodeError: 153 | logger.warning(f"Received non-JSON message: {message}") 154 | except Exception as e: 155 | logger.error(f"Error handling message: {e}", exc_info=True) 156 | 157 | def _on_error(self, ws, error): 158 | """Callback when a WebSocket error occurs""" 159 | logger.error(f"WebSocket error: {error}") 160 | self.connected_event.clear() 161 | 162 | def _on_close(self, ws, close_status_code, close_msg): 163 | """Callback when the WebSocket connection is closed""" 164 | logger.debug(f"WebSocket closed: {close_status_code} - {close_msg}") 165 | self.connected_event.clear() 166 | 167 | # Call the disconnect callback if set 168 | if self.on_disconnect_callback: 169 | self.on_disconnect_callback() 170 | 171 | # Message handlers 172 | def _handle_initialize(self, data): 173 | """Handle initialize message from server""" 174 | self.session_id = data.get('session_id') 175 | logger.debug(f"Session ID: {self.session_id}") 176 | 177 | # Send location and call_connect 178 | self._send_client_location_state() 179 | self._send_call_connect() 180 | 181 | def _handle_call_connect_response(self, data): 182 | """Handle call_connect_response message from server""" 183 | self.session_id = data.get('session_id') 184 | self.call_id = data.get('call_id') 185 | content = data.get('content', {}) 186 | self.server_sample_rate = content.get('sample_rate', self.server_sample_rate) 187 | self.audio_codec = content.get('audio_codec', 'none') 188 | 189 | logger.debug(f"Connected: Session ID: {self.session_id}, Call ID: {self.call_id}") 190 | 191 | # Signal that we're connected 192 | self.connected_event.set() 193 | 194 | # Call the connect callback if set 195 | if self.on_connect_callback: 196 | self.on_connect_callback() 197 | 198 | 199 | def _handle_ping_response(self, data): 200 | """Handle ping_response message from server""" 201 | pass 202 | 203 | def _handle_audio(self, data): 204 | """Handle audio message from server""" 205 | audio_data = data.get('content', {}).get('audio_data', '') 206 | if audio_data: 207 | try: 208 | audio_bytes = base64.b64decode(audio_data) 209 | # Use put_nowait to avoid blocking if buffer is full 210 | # This prevents audio processing delays 211 | try: 212 | self.audio_buffer.put_nowait(audio_bytes) 213 | except queue.Full: 214 | # If buffer is full, discard oldest audio to make room 215 | try: 216 | self.audio_buffer.get_nowait() 217 | self.audio_buffer.put_nowait(audio_bytes) 218 | except queue.Empty: 219 | pass 220 | 221 | if not self.first_audio_received: 222 | self.first_audio_received = True 223 | logger.debug("First audio received, sending initialization chunks") 224 | # Send 2 all-A chunks to initialize audio stream 225 | chunk_of_As = "A" * 1707 + "=" 226 | self._send_audio(chunk_of_As) 227 | self._send_audio(chunk_of_As) 228 | except Exception as e: 229 | logger.error(f"Error processing audio: {e}", exc_info=True) 230 | 231 | def _handle_call_disconnect_response(self, data): 232 | """Handle call_disconnect_response message from server""" 233 | logger.debug("Call disconnected") 234 | self.call_id = None 235 | 236 | # Call the disconnect callback if set 237 | if self.on_disconnect_callback: 238 | self.on_disconnect_callback() 239 | 240 | # Methods to send messages 241 | def _send_ping(self): 242 | """Send ping message to server""" 243 | if not self.session_id: 244 | return 245 | 246 | message = { 247 | "type": "ping", 248 | "session_id": self.session_id, 249 | "call_id": self.call_id, 250 | "request_id": self._generate_request_id(), 251 | "content": "ping" 252 | } 253 | 254 | self._send_data(message) 255 | 256 | def _send_client_location_state(self): 257 | """Send client_location_state message to server""" 258 | if not self.session_id: 259 | return 260 | 261 | message = { 262 | "type": "client_location_state", 263 | "session_id": self.session_id, 264 | "call_id": None, 265 | "content": { 266 | "latitude": 0, 267 | "longitude": 0, 268 | "address": "", 269 | "timezone": "America/Chicago" 270 | } 271 | } 272 | self._send_data(message) 273 | 274 | def _send_audio(self, data): 275 | """ 276 | Send audio data to server 277 | 278 | Args: 279 | data (str): Base64-encoded audio data 280 | """ 281 | if not self.session_id or not self.call_id: 282 | return 283 | 284 | message = { 285 | "type": "audio", 286 | "session_id": self.session_id, 287 | "call_id": self.call_id, 288 | "content": { 289 | "audio_data": data 290 | } 291 | } 292 | 293 | self._send_data(message) 294 | 295 | def _send_call_connect(self): 296 | """Send call_connect message to server""" 297 | if not self.session_id: 298 | return 299 | 300 | message = { 301 | "type": "call_connect", 302 | "session_id": self.session_id, 303 | "call_id": None, 304 | "request_id": self._generate_request_id(), 305 | "content": { 306 | "sample_rate": self.client_sample_rate, 307 | "audio_codec": "none", 308 | "reconnect": self.reconnect, 309 | "is_private": self.is_private, 310 | "client_name": self.client_name, 311 | "settings": { 312 | "preset": f"{self.character}" 313 | }, 314 | "client_metadata": { 315 | "language": "en-US", 316 | "user_agent": self.user_agent, 317 | "mobile_browser": False, 318 | "media_devices": self._get_media_devices() 319 | } 320 | } 321 | } 322 | 323 | self._send_data(message) 324 | 325 | def send_audio_data(self, raw_audio_bytes): 326 | """ 327 | Send raw audio data to the AI 328 | 329 | Args: 330 | raw_audio_bytes (bytes): Raw audio data (16-bit PCM) 331 | 332 | Returns: 333 | bool: True if audio was sent successfully 334 | """ 335 | if not self.session_id or not self.call_id: 336 | return False 337 | 338 | # Encode the raw audio data in base64 339 | encoded_data = base64.b64encode(raw_audio_bytes).decode('utf-8') 340 | self._send_audio(encoded_data) 341 | return True 342 | 343 | def disconnect(self): 344 | """ 345 | Disconnect from the server 346 | 347 | Returns: 348 | bool: True if disconnect message was sent successfully 349 | """ 350 | if not self.session_id or not self.call_id: 351 | logger.warning("Cannot disconnect: Not connected") 352 | return False 353 | 354 | message = { 355 | "type": "call_disconnect", 356 | "session_id": self.session_id, 357 | "call_id": self.call_id, 358 | "request_id": self._generate_request_id(), 359 | "content": { 360 | "reason": "user_request" 361 | } 362 | } 363 | 364 | logger.debug("Sending disconnect request") 365 | self._send_data(message) 366 | return True 367 | 368 | def _send_message(self, message): 369 | """Send a raw message to the WebSocket""" 370 | if self.ws and self.ws.sock and self.ws.sock.connected: 371 | message_str = json.dumps(message) 372 | self.ws.send(message_str) 373 | return True 374 | else: 375 | logger.warning("WebSocket is not connected") 376 | return False 377 | 378 | def _send_data(self, message): 379 | """Send data with proper ping handling""" 380 | try: 381 | data_type = message.get("type") 382 | 383 | # Send pings for non-control messages after connection is established 384 | if self.call_id is not None and data_type not in ["ping", "call_connect", "call_disconnect"]: 385 | if (self.last_sent_message_type is None 386 | or self.received_since_last_sent 387 | or (data_type != self.last_sent_message_type)): 388 | self._send_ping() 389 | 390 | self.last_sent_message_type = data_type 391 | self.received_since_last_sent = False 392 | 393 | return self._send_message(message) 394 | 395 | except Exception as e: 396 | logger.error(f"Error sending data: {e}", exc_info=True) 397 | return False 398 | 399 | def _generate_request_id(self): 400 | """Generate a unique request ID""" 401 | return str(uuid.uuid4()) 402 | 403 | def _get_media_devices(self): 404 | """Get a list of media devices for the client metadata""" 405 | # Simplified version - in a real implementation, this would detect actual devices 406 | return [ 407 | { 408 | "deviceId": "default", 409 | "kind": "audioinput", 410 | "label": "Default - Microphone", 411 | "groupId": "default" 412 | }, 413 | { 414 | "deviceId": "default", 415 | "kind": "audiooutput", 416 | "label": "Default - Speaker", 417 | "groupId": "default" 418 | } 419 | ] 420 | 421 | def get_next_audio_chunk(self, timeout=None): 422 | """ 423 | Get the next audio chunk from the buffer 424 | 425 | Args: 426 | timeout (float, optional): Timeout in seconds. None means block indefinitely. 427 | 428 | Returns: 429 | bytes: Audio data, or None if timeout occurred 430 | """ 431 | try: 432 | return self.audio_buffer.get(timeout=timeout) 433 | except queue.Empty: 434 | return None 435 | 436 | def set_connect_callback(self, callback): 437 | """ 438 | Set callback for connection established events 439 | 440 | Args: 441 | callback (callable): Function with no arguments 442 | """ 443 | self.on_connect_callback = callback 444 | 445 | def set_disconnect_callback(self, callback): 446 | """ 447 | Set callback for disconnection events 448 | 449 | Args: 450 | callback (callable): Function with no arguments 451 | """ 452 | self.on_disconnect_callback = callback 453 | 454 | def is_connected(self): 455 | """ 456 | Check if the WebSocket is connected 457 | 458 | Returns: 459 | bool: True if connected 460 | """ 461 | return self.session_id is not None and self.call_id is not None -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open("README.md", "r", encoding="utf-8") as fh: 6 | long_description = fh.read() 7 | 8 | setup( 9 | name="sesame_ai", 10 | version="0.1.0", 11 | author="ijub", 12 | author_email="ijubgithub@gmail.com", 13 | description="Unofficial Python API wrapper for SesameAI", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/ijub/sesame-ai", 17 | packages=find_packages(), 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | "Development Status :: 3 - Alpha", 23 | "Intended Audience :: Developers", 24 | "Topic :: Software Development :: Libraries :: Python Modules", 25 | ], 26 | python_requires=">=3.6", 27 | install_requires=[ 28 | "requests>=2.25.0", 29 | "websocket-client>=1.2.0", 30 | "numpy>=1.19.0", 31 | "PyAudio>=0.2.11", 32 | ], 33 | extras_require={ 34 | "dev": [ 35 | "pytest>=6.0.0", 36 | "black>=21.5b2", 37 | "flake8>=3.9.0", 38 | ], 39 | }, 40 | keywords="sesame, ai, voice, api, wrapper, chatbot", 41 | ) --------------------------------------------------------------------------------