├── .gitignore
├── LICENSE
├── README.md
├── Requirements.txt
├── examples
    └── voice_chat.py
├── sesame_ai
    ├── __init__.py
    ├── api.py
    ├── config.py
    ├── exceptions.py
    ├── models.py
    ├── token_manager.py
    └── websocket.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | 
 8 | # Distribution / packaging
 9 | build/
10 | develop-eggs/
11 | dist/
12 | downloads/
13 | eggs/
14 | .eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Virtual environments
26 | venv/
27 | env/
28 | ENV/
29 | .env/
30 | .venv/
31 | 
32 | # Testing
33 | .coverage
34 | htmlcov/
35 | .pytest_cache/
36 | .tox/
37 | .nox/
38 | 
39 | # Documentation
40 | docs/_build/
41 | site/
42 | 
43 | # IDE specific files
44 | .idea/
45 | .vscode/
46 | *.swp
47 | *.swo
48 | .DS_Store
49 | .spyderproject
50 | .spyproject
51 | .ropeproject
52 | 
53 | # Project specific
54 | token.json
55 | *.log
56 | *.db
57 | *.sqlite3
58 | 
59 | # Jupyter Notebook
60 | .ipynb_checkpoints
61 | 
62 | # mypy
63 | .mypy_cache/
64 | .dmypy.json
65 | dmypy.json
66 | 
67 | # Environments
68 | .env
69 | .venv
70 | env/
71 | venv/
72 | ENV/
73 | env.bak/
74 | venv.bak/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 ijub
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sesame AI Python Client
  2 | 
  3 | An unofficial Python client library for interacting with the [Sesame](https://www.sesame.com) voice conversation API. This package provides easy access to Sesame's voice-based AI characters, allowing developers to create applications with natural voice conversations.
  4 | 
  5 | ## About Sesame
  6 | 
  7 | Sesame is developing conversational AI with "voice presence" - the quality that makes spoken interactions feel real, understood, and valued. Their technology enables voice conversations with AI characters like Miles and Maya that feature emotional intelligence, natural conversational dynamics, and contextual awareness.
  8 | 
  9 | ## Support
 10 | 
 11 | If you find this project helpful, consider buying me a coffee!
 12 | 
 13 | [![Buy Me A Coffee](https://img.shields.io/badge/Buy%20Me%20A%20Coffee-Support-yellow.svg)](https://buymeacoffee.com/ijub)
 14 | 
 15 | ## Installation
 16 | 
 17 | ```bash
 18 | # From GitHub
 19 | pip install git+https://github.com/ijub/sesame_ai.git
 20 | 
 21 | # For development
 22 | git clone https://github.com/ijub/sesame_ai.git
 23 | cd sesame_ai
 24 | pip install -e .
 25 | ```
 26 | 
 27 | ## Features
 28 | 
 29 | - Authentication and account management
 30 | - WebSocket-based real-time voice conversations
 31 | - Token management and refresh
 32 | - Support for multiple AI characters (Miles, Maya)
 33 | - Voice activity detection
 34 | - Simple and intuitive API
 35 | 
 36 | ## Available Characters
 37 | 
 38 | The API supports multiple AI characters:
 39 | 
 40 | - **Miles**: A male character (default)
 41 | - **Maya**: A female character
 42 | 
 43 | ## Quick Start
 44 | 
 45 | ### Authentication
 46 | 
 47 | ```python
 48 | from sesame_ai import SesameAI, TokenManager
 49 | 
 50 | # Create API client
 51 | client = SesameAI()
 52 | 
 53 | # Create an anonymous account
 54 | signup_response = client.create_anonymous_account()
 55 | print(f"ID Token: {signup_response.id_token}")
 56 | 
 57 | # Look up account information
 58 | lookup_response = client.get_account_info(signup_response.id_token)
 59 | print(f"User ID: {lookup_response.local_id}")
 60 | 
 61 | # For easier token management, use TokenManager
 62 | token_manager = TokenManager(client, token_file="token.json")
 63 | id_token = token_manager.get_valid_token()
 64 | ```
 65 | 
 66 | ### Voice Chat Example
 67 | 
 68 | ```python
 69 | from sesame_ai import SesameAI, SesameWebSocket, TokenManager
 70 | import pyaudio
 71 | import threading
 72 | import time
 73 | import numpy as np
 74 | 
 75 | # Get authentication token using TokenManager
 76 | api_client = SesameAI()
 77 | token_manager = TokenManager(api_client, token_file="token.json")
 78 | id_token = token_manager.get_valid_token()
 79 | 
 80 | # Connect to WebSocket (choose character: "Miles" or "Maya")
 81 | ws = SesameWebSocket(id_token=id_token, character="Maya")
 82 | 
 83 | # Set up connection callbacks
 84 | def on_connect():
 85 |     print("Connected to SesameAI!")
 86 | 
 87 | def on_disconnect():
 88 |     print("Disconnected from SesameAI")
 89 | 
 90 | ws.set_connect_callback(on_connect)
 91 | ws.set_disconnect_callback(on_disconnect)
 92 | 
 93 | # Connect to the server
 94 | ws.connect()
 95 | 
 96 | # Audio settings
 97 | CHUNK = 1024
 98 | FORMAT = pyaudio.paInt16
 99 | CHANNELS = 1
100 | RATE = 16000
101 | 
102 | # Initialize PyAudio
103 | p = pyaudio.PyAudio()
104 | 
105 | # Open microphone stream
106 | mic_stream = p.open(format=FORMAT,
107 |                     channels=CHANNELS,
108 |                     rate=RATE,
109 |                     input=True,
110 |                     frames_per_buffer=CHUNK)
111 | 
112 | # Open speaker stream (using server's sample rate)
113 | speaker_stream = p.open(format=FORMAT,
114 |                         channels=CHANNELS,
115 |                         rate=ws.server_sample_rate,
116 |                         output=True)
117 | 
118 | # Function to capture and send microphone audio
119 | def capture_microphone():
120 |     print("Microphone capture started...")
121 |     try:
122 |         while True:
123 |             if ws.is_connected():
124 |                 data = mic_stream.read(CHUNK, exception_on_overflow=False)
125 |                 ws.send_audio_data(data)
126 |             else:
127 |                 time.sleep(0.1)
128 |     except KeyboardInterrupt:
129 |         print("Microphone capture stopped")
130 | 
131 | # Function to play received audio
132 | def play_audio():
133 |     print("Audio playback started...")
134 |     try:
135 |         while True:
136 |             audio_chunk = ws.get_next_audio_chunk(timeout=0.01)
137 |             if audio_chunk:
138 |                 speaker_stream.write(audio_chunk)
139 |     except KeyboardInterrupt:
140 |         print("Audio playback stopped")
141 | 
142 | # Start audio threads
143 | mic_thread = threading.Thread(target=capture_microphone)
144 | mic_thread.daemon = True
145 | mic_thread.start()
146 | 
147 | playback_thread = threading.Thread(target=play_audio)
148 | playback_thread.daemon = True
149 | playback_thread.start()
150 | 
151 | # Keep the main thread alive
152 | try:
153 |     while True:
154 |         time.sleep(1)
155 | except KeyboardInterrupt:
156 |     print("Disconnecting...")
157 |     ws.disconnect()
158 |     mic_stream.stop_stream()
159 |     mic_stream.close()
160 |     speaker_stream.stop_stream()
161 |     speaker_stream.close()
162 |     p.terminate()
163 | ```
164 | 
165 | The package also includes a full-featured voice chat example that you can run:
166 | 
167 | ```bash
168 | # Chat with Miles (default)
169 | python examples/voice_chat.py
170 | 
171 | # Chat with Maya
172 | python examples/voice_chat.py --character Maya
173 | ```
174 | 
175 | Command-line options:
176 | - `--character`: Character to chat with (default: Miles, options: Miles, Maya)
177 | - `--input-device`: Input device index
178 | - `--output-device`: Output device index
179 | - `--list-devices`: List audio devices and exit
180 | - `--token-file`: Path to token storage file
181 | - `--debug`: Enable debug logging
182 | 
183 | ## API Reference
184 | 
185 | ### SesameAI
186 | 
187 | The main API client for authentication.
188 | 
189 | - `SesameAI(api_key=None)` - Create a new API client
190 | - `create_anonymous_account()` - Create an anonymous account
191 | - `get_account_info(id_token)` - Look up account information
192 | - `refresh_authentication_token(refresh_token)` - Refresh an ID token
193 | 
194 | ### TokenManager
195 | 
196 | Manages authentication tokens with automatic refresh and persistence.
197 | 
198 | - `TokenManager(api_client=None, token_file=None)` - Create a token manager
199 | - `get_valid_token(force_new=False)` - Get a valid token, refreshing if needed
200 | - `clear_tokens()` - Clear stored tokens
201 | 
202 | ### SesameWebSocket
203 | 
204 | WebSocket client for real-time voice conversation.
205 | 
206 | - `SesameWebSocket(id_token, character="Miles", client_name="RP-Web")` - Create a new WebSocket client
207 | - `connect(blocking=True)` - Connect to the server
208 | - `send_audio_data(raw_audio_bytes)` - Send raw audio data
209 | - `get_next_audio_chunk(timeout=None)` - Get the next audio chunk
210 | - `disconnect()` - Disconnect from the server
211 | - `is_connected()` - Check if connected
212 | 
213 | ## Error Handling
214 | 
215 | The library provides several exception classes for error handling:
216 | 
217 | - `SesameAIError` - Base exception class
218 | - `InvalidTokenError` - Invalid token errors
219 | - `APIError` - API errors with code and message
220 | - `NetworkError` - Network communication errors
221 | 
222 | Example:
223 | 
224 | ```python
225 | from sesame_ai import SesameAI, InvalidTokenError, APIError, NetworkError
226 | 
227 | client = SesameAI()
228 | 
229 | try:
230 |     # Try to use an invalid token
231 |     client.get_account_info("invalid_token")
232 | except InvalidTokenError:
233 |     print("The token is invalid or expired")
234 | except APIError as e:
235 |     print(f"API error: {e.code} - {e.message}")
236 | except NetworkError as e:
237 |     print(f"Network error: {e}")
238 | ```
239 | 
240 | ## Troubleshooting
241 | 
242 | ### Audio Device Problems
243 | 
244 | If you encounter audio device issues:
245 | 
246 | 1. Use `--list-devices` to see available audio devices
247 | 2. Specify input/output devices with `--input-device` and `--output-device`
248 | 3. Ensure PyAudio is properly installed with all dependencies
249 | 
250 | ### Audio Feedback Issues
251 | 
252 | Currently, the voice chat example doesn't block audio coming from the AI (through your speakers) from being picked up by your microphone, which can cause feedback loops. For the best experience:
253 | 
254 | 1. Use headphones to prevent the AI from hearing itself
255 | 2. Keep speaker volume at a moderate level
256 | 3. Position your microphone away from speakers if not using headphones
257 | 
258 | **Note:** I'm working on updating the `voice_chat.py` example to implement echo cancellation and audio filtering to address this issue in a future update.
259 | 
260 | ### Connection Issues
261 | 
262 | If you have trouble connecting:
263 | 
264 | 1. Check your internet connection
265 | 2. Verify your authentication token is valid
266 | 3. Ensure the SesameAI service is available
267 | 
268 | ## Legal Disclaimer
269 | 
270 | This is an unofficial API wrapper and is not affiliated with, maintained, authorized, endorsed, or sponsored by Sesame. or any of its affiliates. This wrapper is intended for personal, educational, and non-commercial use only.
271 | 
272 | Users of this library assume all legal responsibility for its use. The author(s) are not responsible for any violations of Sesame Terms of Service or applicable laws.
273 | 
274 | ## License
275 | 
276 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
277 | 
278 | ## Support
279 | 
280 | If you find this project helpful, consider buying me a coffee!
281 | 
282 | <a href="https://buymeacoffee.com/ijub" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 60px !important;width: 217px !important;" ></a>


--------------------------------------------------------------------------------
/Requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core dependencies
 2 | requests>=2.25.0
 3 | websocket-client>=1.2.0
 4 | numpy>=1.19.0
 5 | PyAudio>=0.2.11
 6 | 
 7 | # Optional dependencies for development
 8 | pytest>=6.0.0
 9 | black>=21.5b2
10 | flake8>=3.9.0


--------------------------------------------------------------------------------
/examples/voice_chat.py:
--------------------------------------------------------------------------------
  1 | # examples/voice_chat.py
  2 | 
  3 | 
  4 | """
  5 | SesameAI Voice Chat Example
  6 | 
  7 | This example demonstrates how to use the SesameAI API for real-time voice conversations.
  8 | It handles:
  9 | - Authentication
 10 | - WebSocket connection
 11 | - Microphone input
 12 | - Speaker output
 13 | - Voice activity detection
 14 | - Graceful disconnection
 15 | 
 16 | Available characters:
 17 | - Miles (default)
 18 | - Maya
 19 | """
 20 | 
 21 | import sys
 22 | import os
 23 | import time
 24 | import threading
 25 | import argparse
 26 | import queue
 27 | import logging
 28 | import numpy as np
 29 | import pyaudio
 30 | from sesame_ai import SesameAI, SesameWebSocket, TokenManager, InvalidTokenError, NetworkError, APIError
 31 | 
 32 | logger = logging.getLogger('sesame.examples.voice_chat')
 33 | 
 34 | class VoiceChat:
 35 |     """Voice chat application using SesameAI"""
 36 |     
 37 |     # Available characters
 38 |     AVAILABLE_CHARACTERS = ["Miles", "Maya"]
 39 |     
 40 |     def __init__(self, character="Miles", input_device=None, output_device=None, 
 41 |                  token_file=None):
 42 |         """
 43 |         Initialize the voice chat application
 44 |         
 45 |         Args:
 46 |             character (str): Character to chat with ("Miles" or "Maya")
 47 |             input_device (int, optional): Input device index
 48 |             output_device (int, optional): Output device index
 49 |             use_saved_token (bool): Whether to use a saved token
 50 |             force_new_token (bool): Whether to force creation of a new token
 51 |             token_file (str, optional): Path to token storage file. If None, tokens won't be saved.
 52 |         """
 53 |         # Validate character
 54 |         if character not in self.AVAILABLE_CHARACTERS:
 55 |             print(f"Warning: '{character}' is not in the list of known characters. Using anyway.")
 56 |             print(f"Known characters: {', '.join(self.AVAILABLE_CHARACTERS)}")
 57 |             
 58 |         self.character = character
 59 |         self.input_device_index = input_device
 60 |         self.output_device_index = output_device
 61 |         self.token_file = token_file
 62 |         
 63 |         # Audio settings
 64 |         self.chunk_size = 1024
 65 |         self.sample_format = pyaudio.paInt16
 66 |         self.channels = 1
 67 |         self.input_rate = 16000
 68 |         self.output_rate = 24000  # Will be updated from server
 69 |         
 70 |         # Voice activity detection
 71 |         self.amplitude_threshold = 500
 72 |         self.silence_counter = 0
 73 |         self.silence_limit = 50  # Number of consecutive silent chunks before sending silence
 74 |         
 75 |         # PyAudio instance
 76 |         self.p = pyaudio.PyAudio()
 77 |         
 78 |         # Streams
 79 |         self.input_stream = None
 80 |         self.output_stream = None
 81 |         
 82 |         # SesameAI client
 83 |         self.api_client = SesameAI()
 84 |         
 85 |         # Initialize token manager with token_file (which may be None)
 86 |         self.token_manager = TokenManager(self.api_client, token_file=self.token_file)
 87 |         
 88 |         self.id_token = None
 89 |         self.ws = None
 90 |         
 91 |         # Thread control
 92 |         self.running = False
 93 |         self.threads = []
 94 | 
 95 |         # Logging
 96 |         logger.debug(f"VoiceChat initialized with character: {character}")
 97 |     
 98 |     def authenticate(self):
 99 |         """Authenticate with SesameAI and get a token"""
100 |         logger.info("Authenticating with SesameAI...")
101 |         try:
102 |             # If no token file is specified, force a new token
103 |             force_new = self.token_file is None
104 |             
105 |             # Get a valid token using the token manager
106 |             self.id_token = self.token_manager.get_valid_token(force_new=force_new)
107 |             logger.info("Authentication successful!")
108 |             return True
109 |         except InvalidTokenError:
110 |             logger.error("Authentication failed: Token expired and couldn't be refreshed")
111 |             return False
112 |         except (NetworkError, APIError) as e:
113 |             logger.error(f"Authentication failed: {e}")
114 |             return False
115 |         
116 |     def list_audio_devices(self):
117 |         """List available audio devices"""
118 |         logger.info("Listing available audio devices")
119 |         print("\nAvailable audio devices:")
120 |         print("-" * 60)
121 |         
122 |         for i in range(self.p.get_device_count()):
123 |             dev_info = self.p.get_device_info_by_index(i)
124 |             name = dev_info.get('name', 'Unknown')
125 |             inputs = dev_info.get('maxInputChannels', 0)
126 |             outputs = dev_info.get('maxOutputChannels', 0)
127 |             
128 |             if inputs > 0:
129 |                 print(f"ID {i}: {name} (Input)")
130 |             if outputs > 0:
131 |                 print(f"ID {i}: {name} (Output)")
132 |         
133 |         print("-" * 60)
134 |     
135 |     def select_devices(self):
136 |         """Select input and output devices"""
137 |         self.list_audio_devices()
138 |         
139 |         # If devices weren't specified in constructor, ask user
140 |         if self.input_device_index is None:
141 |             try:
142 |                 self.input_device_index = int(input("Select input device ID: "))
143 |                 logger.debug(f"Selected input device ID: {self.input_device_index}")
144 |             except ValueError:
145 |                 logger.warning("Invalid input. Using default device.")
146 |                 self.input_device_index = None
147 |         
148 |         if self.output_device_index is None:
149 |             try:
150 |                 self.output_device_index = int(input("Select output device ID: "))
151 |                 logger.debug(f"Selected output device ID: {self.output_device_index}")
152 |             except ValueError:
153 |                 logger.warning("Invalid input. Using default device.")
154 |                 self.output_device_index = None
155 |     
156 |     def on_connect(self):
157 |         """Callback when WebSocket connection is established"""
158 |         logger.info(f"Connected to {self.character}!")
159 |         # Update output rate from server
160 |         self.output_rate = self.ws.server_sample_rate
161 |         logger.debug(f"Server sample rate: {self.output_rate}")
162 |         
163 |         # Initialize audio streams after connection
164 |         self.setup_audio_streams()
165 |         
166 |         # Start audio threads
167 |         self.start_audio_threads()
168 |     
169 |     def on_disconnect(self):
170 |         """Callback when WebSocket connection is disconnected"""
171 |         logger.info(f"Disconnected from {self.character}")
172 |         
173 |         # Stop the application if it's still running
174 |         if self.running:
175 |             self.stop()
176 |     
177 |     def connect(self):
178 |         """Connect to SesameAI WebSocket"""
179 |         logger.info(f"Connecting to SesameAI as character '{self.character}'...")
180 |         
181 |         # Create WebSocket client
182 |         self.ws = SesameWebSocket(
183 |             id_token=self.id_token,
184 |             character=self.character
185 |         )
186 |         
187 |         # Set up callbacks
188 |         self.ws.set_connect_callback(self.on_connect)
189 |         self.ws.set_disconnect_callback(self.on_disconnect)
190 |         
191 |         # Connect to server
192 |         if self.ws.connect():
193 |             logger.debug("WebSocket connection established")
194 |             return True
195 |         else:
196 |             logger.error("Failed to connect to SesameAI")
197 |             return False
198 |     
199 |     def setup_audio_streams(self):
200 |         """Set up audio input and output streams"""
201 |         logger.debug("Setting up audio streams")
202 |         
203 |         # Input stream (microphone)
204 |         self.input_stream = self.p.open(
205 |             format=self.sample_format,
206 |             channels=self.channels,
207 |             rate=self.input_rate,
208 |             input=True,
209 |             frames_per_buffer=self.chunk_size,
210 |             input_device_index=self.input_device_index
211 |         )
212 |         
213 |         # Output stream (speaker)
214 |         self.output_stream = self.p.open(
215 |             format=self.sample_format,
216 |             channels=self.channels,
217 |             rate=self.output_rate,
218 |             output=True,
219 |             output_device_index=self.output_device_index
220 |         )
221 |         
222 |         logger.debug("Audio streams initialized")
223 |     
224 |     def capture_microphone(self):
225 |         """Capture audio from microphone and send to SesameAI"""
226 |         logger.debug("Microphone capture started")
227 |         
228 |         while self.running:
229 |             if not self.ws.is_connected():
230 |                 time.sleep(0.1)
231 |                 continue
232 |             
233 |             try:
234 |                 # Read audio data from microphone
235 |                 data = self.input_stream.read(self.chunk_size, exception_on_overflow=False)
236 |                 
237 |                 # Check audio level for voice activity detection
238 |                 audio_samples = np.frombuffer(data, dtype=np.int16)
239 |                 rms_val = np.sqrt(np.mean(audio_samples.astype(np.float32) ** 2))
240 |                 
241 |                 if rms_val > self.amplitude_threshold:
242 |                     # Voice detected
243 |                     self.silence_counter = 0
244 |                     self.ws.send_audio_data(data)
245 |                 else:
246 |                     # Silence detected
247 |                     self.silence_counter += 1
248 |                     if self.silence_counter >= self.silence_limit:
249 |                         # Send completely silent audio after silence threshold
250 |                         # This is more efficient than sending the actual low-level audio
251 |                         silent_data = np.zeros(self.chunk_size, dtype=np.int16).tobytes()
252 |                         self.ws.send_audio_data(silent_data)
253 |                     else:
254 |                         # Continue sending actual audio during brief pauses
255 |                         self.ws.send_audio_data(data)
256 |             except Exception as e:
257 |                 if self.running:
258 |                     logger.error(f"Error capturing microphone: {e}", exc_info=True)
259 |                     time.sleep(0.1)
260 |     
261 |     def play_audio(self):
262 |         """Play audio received from SesameAI"""
263 |         logger.debug("Audio playback started")
264 |         
265 |         while self.running:
266 |             try:
267 |                 # Get audio chunk from WebSocket buffer with a short timeout
268 |                 audio_chunk = self.ws.get_next_audio_chunk(timeout=0.01)
269 |                 if audio_chunk:
270 |                     # Play audio immediately when received
271 |                     self.output_stream.write(audio_chunk)
272 |             except Exception as e:
273 |                 if self.running:
274 |                     logger.error(f"Error playing audio: {e}", exc_info=True)
275 |     
276 |     def start_audio_threads(self):
277 |         """Start audio capture and playback threads"""
278 |         # Microphone capture thread
279 |         mic_thread = threading.Thread(target=self.capture_microphone)
280 |         mic_thread.daemon = True
281 |         mic_thread.start()
282 |         self.threads.append(mic_thread)
283 |         
284 |         # Audio playback thread
285 |         playback_thread = threading.Thread(target=self.play_audio)
286 |         playback_thread.daemon = True
287 |         playback_thread.start()
288 |         self.threads.append(playback_thread)
289 |         
290 |         logger.debug("Audio threads started")
291 |     
292 |     def start(self):
293 |         """Start the voice chat"""
294 |         # Authenticate
295 |         if not self.authenticate():
296 |             return False
297 |         
298 |         # Select audio devices
299 |         self.select_devices()
300 |         
301 |         # Set running flag
302 |         self.running = True
303 |         
304 |         # Connect to WebSocket (will trigger on_connect callback)
305 |         if not self.connect():
306 |             self.running = False
307 |             return False
308 |         
309 |         logger.info(f"Voice chat with {self.character} started! Press Ctrl+C to exit.")
310 |         return True
311 |     
312 |     def stop(self):
313 |         """Stop the voice chat"""
314 |         if not self.running:
315 |             return
316 |         
317 |         self.running = False
318 |         logger.info("Stopping voice chat...")
319 |         
320 |         # Disconnect from WebSocket
321 |         if self.ws and self.ws.is_connected():
322 |             self.ws.disconnect()
323 |         
324 |         # Close audio streams
325 |         if self.input_stream:
326 |             self.input_stream.stop_stream()
327 |             self.input_stream.close()
328 |         
329 |         if self.output_stream:
330 |             self.output_stream.stop_stream()
331 |             self.output_stream.close()
332 |         
333 |         # Terminate PyAudio
334 |         self.p.terminate()
335 |         
336 |         logger.info("Voice chat stopped")
337 |     
338 |     def run(self):
339 |         """Run the voice chat application"""
340 |         try:
341 |             if self.start():
342 |                 # Keep main thread alive
343 |                 while self.running:
344 |                     time.sleep(0.1)
345 |         except KeyboardInterrupt:
346 |             logger.debug("Interrupted by user")
347 |         finally:
348 |             self.stop()
349 | 
350 | 
351 | 
352 | def main():
353 |     """Main function"""
354 |     # Configure logging
355 |     logging.basicConfig(
356 |         level=logging.INFO,
357 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
358 |         datefmt='%Y-%m-%d %H:%M:%S'
359 |     )
360 | 
361 |     # Set websocket-client logger to DEBUG level
362 |     logging.getLogger('websocket').setLevel(logging.WARNING)
363 |     
364 |     parser = argparse.ArgumentParser(description="SesameAI Voice Chat Example")
365 |     parser.add_argument("--character", default="Miles", choices=VoiceChat.AVAILABLE_CHARACTERS,
366 |                     help=f"Character to chat with (default: Miles, options: {', '.join(VoiceChat.AVAILABLE_CHARACTERS)})")
367 |     parser.add_argument("--input-device", type=int, help="Input device index")
368 |     parser.add_argument("--output-device", type=int, help="Output device index")
369 |     parser.add_argument("--list-devices", action="store_true", help="List audio devices and exit")
370 |     parser.add_argument("--token-file", help="Path to token storage file")
371 |     parser.add_argument("--debug", action="store_true", help="Enable debug logging")
372 |     
373 |     args = parser.parse_args()
374 |     
375 |     # Set debug level if requested
376 |     if args.debug:
377 |         logging.getLogger('sesame').setLevel(logging.DEBUG)
378 |     
379 |     # Create voice chat instance
380 |     voice_chat = VoiceChat(
381 |         character=args.character,
382 |         input_device=args.input_device,
383 |         output_device=args.output_device,
384 |         token_file=args.token_file
385 |     )
386 |     
387 |     # List devices and exit if requested
388 |     if args.list_devices:
389 |         voice_chat.list_audio_devices()
390 |         return
391 |     
392 |     # Run the voice chat
393 |     voice_chat.run()
394 | 
395 | if __name__ == "__main__":
396 |     main()


--------------------------------------------------------------------------------
/sesame_ai/__init__.py:
--------------------------------------------------------------------------------
 1 | # sesame_ai/__init__.py
 2 | 
 3 | from .api import SesameAI
 4 | from .websocket import SesameWebSocket
 5 | from .exceptions import SesameAIError, AuthenticationError, APIError, InvalidTokenError, NetworkError
 6 | from .models import SignupResponse, LookupResponse, RefreshTokenResponse
 7 | from .token_manager import TokenManager
 8 | 
 9 | 
10 | __version__ = "0.1.0"
11 | __author__ = "ijub"
12 | __license__ = "MIT"
13 | 
14 | # Export public classes and functions
15 | __all__ = [
16 |     'SesameAI',
17 |     'SesameWebSocket',
18 |     'TokenManager',
19 |     'SesameAIError',
20 |     'AuthenticationError',
21 |     'APIError',
22 |     'InvalidTokenError',
23 |     'NetworkError',
24 |     'SignupResponse',
25 |     'LookupResponse',
26 |     'RefreshTokenResponse',
27 | ]


--------------------------------------------------------------------------------
/sesame_ai/api.py:
--------------------------------------------------------------------------------
  1 | # sesame_ai/api.py
  2 | 
  3 | import requests
  4 | from .config import get_headers, get_params, get_endpoint_url
  5 | from .models import SignupResponse, LookupResponse, RefreshTokenResponse
  6 | from .exceptions import APIError, InvalidTokenError, NetworkError
  7 | 
  8 | class SesameAI:
  9 |     """
 10 |     SesameAI API Client - Unofficial Python client for the SesameAI API
 11 |     
 12 |     Provides authentication and account management functionality for SesameAI services.
 13 |     """
 14 |     
 15 |     def __init__(self, api_key=None):
 16 |         """
 17 |         Initialize the SesameAI API client
 18 |         
 19 |         Args:
 20 |             api_key (str, optional): Firebase API key. If not provided, 
 21 |                                      will use the default key from config.
 22 |         """
 23 |         self.api_key = api_key
 24 |     
 25 |     def _make_auth_request(self, request_type, payload, is_form_data=False):
 26 |         """
 27 |         Make a request to the Firebase Authentication API
 28 |         
 29 |         Args:
 30 |             request_type (str): Type of request ('signup', 'lookup', etc.)
 31 |             payload (dict): Request payload
 32 |             is_form_data (bool): Whether payload should be sent as form data
 33 |             
 34 |         Returns:
 35 |             dict: API response as JSON
 36 |             
 37 |         Raises:
 38 |             NetworkError: If a network error occurs
 39 |             APIError: If the API returns an error response
 40 |             InvalidTokenError: If a token is invalid
 41 |         """
 42 |         headers = get_headers(request_type)
 43 |         params = get_params(request_type, self.api_key)
 44 |         url = get_endpoint_url(request_type)
 45 |         
 46 |         try:
 47 |             if is_form_data:
 48 |                 response = requests.post(
 49 |                     url,
 50 |                     params=params,
 51 |                     headers=headers,
 52 |                     data=payload,
 53 |                 )
 54 |             else:
 55 |                 response = requests.post(
 56 |                     url,
 57 |                     params=params,
 58 |                     headers=headers,
 59 |                     json=payload,
 60 |                 )
 61 |             
 62 |             # Check for HTTP errors
 63 |             response.raise_for_status()
 64 |             
 65 |             # Parse the response
 66 |             response_json = response.json()
 67 |             
 68 |             # Check for API errors
 69 |             if 'error' in response_json:
 70 |                 self._handle_api_error(response_json['error'])
 71 |                 
 72 |             return response_json
 73 |             
 74 |         except requests.exceptions.RequestException as e:
 75 |             raise NetworkError(f"Network error: {str(e)}")
 76 |     
 77 |     def _handle_api_error(self, error):
 78 |         """
 79 |         Handle API error responses
 80 |         
 81 |         Args:
 82 |             error (dict): Error information from API
 83 |             
 84 |         Raises:
 85 |             InvalidTokenError: If a token is invalid
 86 |             APIError: For other API errors
 87 |         """
 88 |         error_code = error.get('code', 400)
 89 |         error_message = error.get('message', 'Unknown error')
 90 |         error_details = error.get('errors', [])
 91 |         
 92 |         # Handle specific error types
 93 |         if error_message in ('INVALID_ID_TOKEN', 'INVALID_REFRESH_TOKEN'):
 94 |             raise InvalidTokenError()
 95 |         
 96 |         # Generic API error
 97 |         raise APIError(error_code, error_message, error_details)
 98 |     
 99 |     def create_anonymous_account(self):
100 |         """
101 |         Create an anonymous account
102 |         
103 |         Returns:
104 |             SignupResponse: Object containing authentication tokens
105 |             
106 |         Raises:
107 |             NetworkError: If a network error occurs
108 |             APIError: If the API returns an error response
109 |         """
110 |         payload = {
111 |             'returnSecureToken': True,
112 |         }
113 |         response_json = self._make_auth_request('signup', payload)
114 |         return SignupResponse(response_json)
115 |     
116 |     def refresh_authentication_token(self, refresh_token):
117 |         """
118 |         Refresh an ID token using a refresh token
119 |         
120 |         Args:
121 |             refresh_token (str): Firebase refresh token
122 |             
123 |         Returns:
124 |             RefreshTokenResponse: Object containing new tokens
125 |             
126 |         Raises:
127 |             NetworkError: If a network error occurs
128 |             APIError: If the API returns an error response
129 |             InvalidTokenError: If the refresh token is invalid
130 |         """
131 |         payload = {
132 |             'grant_type': 'refresh_token',
133 |             'refresh_token': refresh_token
134 |         }
135 |         
136 |         response_json = self._make_auth_request('refresh', payload, is_form_data=True)
137 |         return RefreshTokenResponse(response_json)
138 |         
139 |     def get_account_info(self, id_token):
140 |         """
141 |         Get account information using an ID token
142 |         
143 |         Args:
144 |             id_token (str): Firebase ID token
145 |             
146 |         Returns:
147 |             LookupResponse: Object containing account information
148 |             
149 |         Raises:
150 |             NetworkError: If a network error occurs
151 |             APIError: If the API returns an error response
152 |             InvalidTokenError: If the ID token is invalid
153 |         """
154 |         payload = {
155 |             'idToken': id_token
156 |         }
157 |         
158 |         response_json = self._make_auth_request('lookup', payload)
159 |         return LookupResponse(response_json)
160 |     


--------------------------------------------------------------------------------
/sesame_ai/config.py:
--------------------------------------------------------------------------------
  1 | # sesame_ai/config.py
  2 | 
  3 | import json
  4 | import base64
  5 | from datetime import datetime
  6 | 
  7 | # Default Firebase API key
  8 | DEFAULT_API_KEY = "AIzaSyDtC7Uwb5pGAsdmrH2T4Gqdk5Mga07jYPM"
  9 | 
 10 | # API endpoints
 11 | FIREBASE_AUTH_BASE_URL = "https://identitytoolkit.googleapis.com/v1/accounts"
 12 | FIREBASE_TOKEN_URL = "https://securetoken.googleapis.com/v1/token"
 13 | 
 14 | def get_firebase_client_header():
 15 |     """
 16 |     Generate the x-firebase-client header value
 17 |     
 18 |     Returns:
 19 |         str: Base64 encoded Firebase client info
 20 |     """
 21 |     x_firebase_client = {
 22 |         "version": 2,
 23 |         "heartbeats": [
 24 |             {
 25 |                 "agent": "fire-core/0.11.1 fire-core-esm2017/0.11.1 fire-js/ fire-js-all-app/11.3.1 fire-auth/1.9.0 fire-auth-esm2017/1.9.0",
 26 |                 "dates": [f"{datetime.now().strftime('%Y-%m-%d')}"]
 27 |             }
 28 |         ]
 29 |     }
 30 |     x_firebase_client_json = json.dumps(x_firebase_client, separators=(",", ":"))
 31 |     return base64.b64encode(x_firebase_client_json.encode()).decode()
 32 | 
 33 | def get_user_agent():
 34 |     """
 35 |     Get the standard user agent string
 36 |     
 37 |     Returns:
 38 |         str: User agent string
 39 |     """
 40 |     return 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'
 41 | 
 42 | def get_headers(request_type):
 43 |     """
 44 |     Get headers for API requests
 45 |     
 46 |     Args:
 47 |         request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.)
 48 |         
 49 |     Returns:
 50 |         dict: Headers for the request
 51 |     """
 52 |     common_headers = {
 53 |         'accept': '*/*',
 54 |         'accept-language': 'en-US,en;q=0.9',
 55 |         'content-type': 'application/json',
 56 |         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
 57 |         'x-firebase-client': get_firebase_client_header(),
 58 |         'x-client-data': 'COKQywE=',
 59 |         'x-client-version': 'Chrome/JsCore/11.3.1/FirebaseCore-web',
 60 |         'x-firebase-gmpid': '1:1072000975600:web:75b0bf3a9bb8d92e767835',
 61 |     }
 62 | 
 63 |     # Add request-specific headers if needed
 64 |     if request_type == 'signup':
 65 |         return common_headers
 66 |     elif request_type == 'lookup':
 67 |         return common_headers
 68 |     elif request_type == 'refresh':
 69 |         return common_headers
 70 |     else:
 71 |         return common_headers
 72 | 
 73 | def get_params(request_type, api_key=None):
 74 |     """
 75 |     Get URL parameters for API requests
 76 |     
 77 |     Args:
 78 |         request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.)
 79 |         api_key (str, optional): API key to use. If None, uses default key.
 80 |         
 81 |     Returns:
 82 |         dict: URL parameters for the request
 83 |     """
 84 |     # Use provided API key or fall back to default
 85 |     key = api_key if api_key else DEFAULT_API_KEY
 86 |     
 87 |     common_params = {
 88 |         'key': key,
 89 |     }
 90 |     
 91 |     # Add request-specific parameters if needed
 92 |     if request_type == 'signup':
 93 |         return common_params
 94 |     elif request_type == 'lookup':
 95 |         return common_params
 96 |     elif request_type == 'refresh':
 97 |         return common_params
 98 |     else:
 99 |         return common_params
100 | 
101 | def get_endpoint_url(request_type):
102 |     """
103 |     Get the full URL for a specific request type
104 |     
105 |     Args:
106 |         request_type (str): Type of request ('signup', 'lookup', 'refresh', etc.)
107 |         
108 |     Returns:
109 |         str: Full URL for the request
110 |     """
111 |     if request_type == 'refresh':
112 |         return FIREBASE_TOKEN_URL
113 |     else:
114 |         endpoint = 'signUp' if request_type == 'signup' else request_type
115 |         return f"{FIREBASE_AUTH_BASE_URL}:{endpoint}"


--------------------------------------------------------------------------------
/sesame_ai/exceptions.py:
--------------------------------------------------------------------------------
 1 | # sesame_ai/exceptions.py
 2 | 
 3 | class SesameAIError(Exception):
 4 |     """Base exception for SesameAI API errors"""
 5 |     pass
 6 | 
 7 | 
 8 | class AuthenticationError(SesameAIError):
 9 |     """Raised when authentication fails (invalid tokens, etc.)"""
10 |     pass
11 | 
12 | 
13 | class APIError(SesameAIError):
14 |     """Raised when the API returns an error response"""
15 |     
16 |     def __init__(self, code, message, errors=None):
17 |         """
18 |         Initialize with error details
19 |         
20 |         Args:
21 |             code (int): Error code
22 |             message (str): Error message
23 |             errors (list, optional): Detailed error information
24 |         """
25 |         self.code = code
26 |         self.message = message
27 |         self.errors = errors or []
28 |         super().__init__(f"API Error {code}: {message}")
29 | 
30 | 
31 | class InvalidTokenError(AuthenticationError):
32 |     """Raised when an ID token is invalid or expired"""
33 |     
34 |     def __init__(self):
35 |         super().__init__("Invalid or expired ID token")
36 | 
37 | 
38 | class NetworkError(SesameAIError):
39 |     """Raised when network communication fails"""
40 |     pass


--------------------------------------------------------------------------------
/sesame_ai/models.py:
--------------------------------------------------------------------------------
 1 | # sesame_ai/models.py
 2 | 
 3 | class BaseResponse:
 4 |     """Base class for API responses"""
 5 |     
 6 |     def __init__(self, response_json):
 7 |         """
 8 |         Initialize with raw JSON response
 9 |         
10 |         Args:
11 |             response_json (dict): Raw JSON response from API
12 |         """
13 |         self.raw_response = response_json
14 |     
15 |     def __repr__(self):
16 |         """String representation of the response object"""
17 |         class_name = self.__class__.__name__
18 |         attributes = ', '.join(f"{k}={v}" for k, v in self.__dict__.items() 
19 |                               if k != 'raw_response' and not k.startswith('_'))
20 |         return f"{class_name}({attributes})"
21 | 
22 | 
23 | 
24 | class SignupResponse(BaseResponse):
25 |     """Response from the signup endpoint"""
26 |     
27 |     def __init__(self, response_json):
28 |         """
29 |         Initialize with signup response data
30 |         
31 |         Args:
32 |             response_json (dict): Raw JSON response from API
33 |         """
34 |         super().__init__(response_json)
35 |         self.kind = response_json.get('kind')
36 |         self.id_token = response_json.get('idToken')
37 |         self.refresh_token = response_json.get('refreshToken')
38 |         self.expires_in = response_json.get('expiresIn')
39 |         self.local_id = response_json.get('localId')
40 | 
41 | class RefreshTokenResponse(BaseResponse):
42 |     """Response from the token refresh endpoint"""
43 |     
44 |     def __init__(self, response_json):
45 |         """
46 |         Initialize with refresh token response data
47 |         
48 |         Args:
49 |             response_json (dict): Raw JSON response from API
50 |         """
51 |         super().__init__(response_json)
52 |         self.access_token = response_json.get('access_token')
53 |         self.expires_in = response_json.get('expires_in')
54 |         self.token_type = response_json.get('token_type')
55 |         self.refresh_token = response_json.get('refresh_token')
56 |         self.id_token = response_json.get('id_token')
57 |         self.user_id = response_json.get('user_id')
58 |         self.project_id = response_json.get('project_id')
59 | 
60 | class LookupResponse(BaseResponse):
61 |     """Response from the account lookup endpoint"""
62 |     
63 |     def __init__(self, response_json):
64 |         """
65 |         Initialize with lookup response data
66 |         
67 |         Args:
68 |             response_json (dict): Raw JSON response from API
69 |         """
70 |         super().__init__(response_json)
71 |         self.kind = response_json.get('kind')
72 |         
73 |         # Extract users data if available
74 |         users = response_json.get('users', [])
75 |         if users and len(users) > 0:
76 |             user = users[0]
77 |             self.local_id = user.get('localId')
78 |             self.last_login_at = user.get('lastLoginAt')
79 |             self.created_at = user.get('createdAt')
80 |             self.last_refresh_at = user.get('lastRefreshAt')


--------------------------------------------------------------------------------
/sesame_ai/token_manager.py:
--------------------------------------------------------------------------------
  1 | # sesame_ai/token_manager.py
  2 | 
  3 | import os
  4 | import json
  5 | import time
  6 | import logging
  7 | from .api import SesameAI
  8 | from .exceptions import InvalidTokenError, NetworkError, APIError
  9 | 
 10 | logger = logging.getLogger('sesame.token_manager')
 11 | 
 12 | class TokenManager:
 13 |     """
 14 |     Manages authentication tokens for SesameAI API
 15 |     
 16 |     Handles:
 17 |     - Token storage and retrieval
 18 |     - Token validation
 19 |     - Automatic token refresh
 20 |     """
 21 |     
 22 |     def __init__(self, api_client=None, token_file=None):
 23 |         """
 24 |         Initialize the token manager
 25 |         
 26 |         Args:
 27 |             api_client (SesameAI, optional): API client instance. If None, creates a new one.
 28 |             token_file (str, optional): Path to token storage file.
 29 |         """
 30 |         self.api_client = api_client if api_client else SesameAI()
 31 |         self.token_file = token_file if token_file else None
 32 |         self.tokens = self._load_tokens()
 33 |     
 34 |     def _load_tokens(self):
 35 |         """
 36 |         Load tokens from storage file
 37 |         
 38 |         Returns:
 39 |             dict: Token data or empty dict if file doesn't exist
 40 |         """
 41 |         if self.token_file and os.path.exists(self.token_file):
 42 |             try:
 43 |                 with open(self.token_file, 'r') as f:
 44 |                     logger.debug(f"Loading tokens from {self.token_file}")
 45 |                     return json.load(f)
 46 |             except (json.JSONDecodeError, IOError) as e:
 47 |                 logger.warning(f"Failed to load tokens: {e}")
 48 |                 return {}
 49 |         return {}
 50 |     
 51 |     def _save_tokens(self):
 52 |         """Save tokens to storage file"""
 53 |         try:
 54 |             # If no token file is specified, return early
 55 |             if self.token_file is None:
 56 |                 return
 57 |                 
 58 |             # Make sure the directory exists
 59 |             directory = os.path.dirname(self.token_file)
 60 |             if directory:  # Only try to create directory if there is one
 61 |                 os.makedirs(directory, exist_ok=True)
 62 |             
 63 |             # Write the tokens to the file
 64 |             with open(self.token_file, 'w') as f:
 65 |                 logger.debug(f"Saving tokens to {self.token_file}")
 66 |                 json.dump(self.tokens, f)
 67 |                 logger.debug(f"Tokens successfully saved to {self.token_file}")
 68 |         except Exception as e:
 69 |             logger.warning(f"Could not save tokens: {e}", exc_info=True)
 70 |     
 71 |     def _is_token_expired(self, id_token):
 72 |         """
 73 |         Check if an ID token is expired
 74 |         
 75 |         Args:
 76 |             id_token (str): Firebase ID token
 77 |             
 78 |         Returns:
 79 |             bool: True if token is expired or invalid
 80 |         """
 81 |         try:
 82 |             # Try to look up the token
 83 |             self.api_client.get_account_info(id_token)
 84 |             return False
 85 |         except InvalidTokenError:
 86 |             return True
 87 |         except (NetworkError, APIError) as e:
 88 |             # If lookup fails, raise the error
 89 |             raise e
 90 |     
 91 |     def get_valid_token(self, force_new=False):
 92 |         """
 93 |         Get a valid ID token, refreshing if necessary
 94 |         
 95 |         Args:
 96 |             force_new (bool): If True, creates a new account regardless of existing tokens
 97 |             
 98 |         Returns:
 99 |             str: Valid ID token
100 |             
101 |         Raises:
102 |             InvalidTokenError: If token refresh fails
103 |             NetworkError: If a network error occurs
104 |             APIError: If the API returns an error
105 |         """
106 |         # If force_new is True, create a new account
107 |         if force_new:
108 |             logger.debug("Forcing creation of new account")
109 |             return self._create_new_account()
110 |             
111 |         # Check if we have an existing ID token
112 |         id_token = self.tokens.get('id_token')
113 |         refresh_token = self.tokens.get('refresh_token')
114 |         
115 |         if id_token:
116 |             # Check if the token is still valid
117 |             try:
118 |                 logger.debug("Checking if existing token is valid")
119 |                 if not self._is_token_expired(id_token):
120 |                     logger.info("Using existing valid token")
121 |                     return id_token
122 |             except (NetworkError, APIError) as e:
123 |                 logger.warning(f"Error checking token validity: {e}")
124 |                 # If we can't check, assume it's still valid
125 |                 return id_token
126 |             
127 |             # Token is expired, try to refresh
128 |             if refresh_token:
129 |                 try:
130 |                     logger.info("Refreshing expired token")
131 |                     refresh_response = self.api_client.refresh_authentication_token(refresh_token)
132 |                     
133 |                     # Update tokens
134 |                     self.tokens = {
135 |                         'id_token': refresh_response.id_token,
136 |                         'refresh_token': refresh_response.refresh_token,
137 |                         'user_id': refresh_response.user_id,
138 |                         'expires_in': refresh_response.expires_in,
139 |                         'timestamp': int(time.time())
140 |                     }
141 |                     self._save_tokens()
142 |                     
143 |                     logger.info("Token refreshed successfully")
144 |                     return refresh_response.id_token
145 |                 except (InvalidTokenError, NetworkError, APIError) as e:
146 |                     logger.error(f"Token refresh failed: {e}")
147 |                     raise InvalidTokenError("Token refresh failed")
148 |             else:
149 |                 logger.warning("Token expired and no refresh token available")
150 |                 raise InvalidTokenError("Token expired and no refresh token available")
151 |         else:
152 |             # No existing token, create a new account
153 |             logger.debug("No existing token, creating new account")
154 |             return self._create_new_account()
155 | 
156 |     def _create_new_account(self):
157 |         """
158 |         Create a new anonymous account
159 |         
160 |         Returns:
161 |             str: New ID token
162 |             
163 |         Raises:
164 |             NetworkError: If a network error occurs
165 |             APIError: If the API returns an error
166 |         """
167 |         logger.debug("Creating new anonymous account")
168 |         signup_response = self.api_client.create_anonymous_account()
169 |         
170 |         # Save the new tokens
171 |         self.tokens = {
172 |             'id_token': signup_response.id_token,
173 |             'refresh_token': signup_response.refresh_token,
174 |             'user_id': signup_response.local_id,
175 |             'expires_in': signup_response.expires_in,
176 |             'timestamp': int(time.time())
177 |         }
178 |         self._save_tokens()
179 |         
180 |         logger.debug("New account created successfully")
181 |         return signup_response.id_token
182 | 
183 |     def clear_tokens(self):
184 |         """Clear stored tokens"""
185 |         logger.info("Clearing stored tokens")
186 |         self.tokens = {}
187 |         self._save_tokens()


--------------------------------------------------------------------------------
/sesame_ai/websocket.py:
--------------------------------------------------------------------------------
  1 | # sesame_ai/websocket.py
  2 | 
  3 | import json
  4 | import base64
  5 | import uuid
  6 | import ssl
  7 | import urllib.parse
  8 | import threading
  9 | import queue
 10 | import time
 11 | import logging
 12 | import websocket as websocket_module
 13 | 
 14 | logger = logging.getLogger('sesame.websocket')
 15 | 
 16 | class SesameWebSocket:
 17 |     """
 18 |     WebSocket client for real-time communication with SesameAI
 19 |     """
 20 |     
 21 |     def __init__(self, id_token, character="Miles", client_name="RP-Web"):
 22 |         """
 23 |         Initialize the WebSocket client
 24 |         
 25 |         Args:
 26 |             id_token (str): Firebase ID token for authentication
 27 |             character (str, optional): Character to interact with. Defaults to "Miles".
 28 |             client_name (str, optional): Client identifier. Defaults to "RP-Web".
 29 |         """
 30 |         self.id_token = id_token
 31 |         self.character = character
 32 |         self.client_name = client_name
 33 |         
 34 |         # WebSocket connection
 35 |         self.ws = None
 36 |         self.session_id = None
 37 |         self.call_id = None
 38 |         
 39 |         # Audio settings
 40 |         self.client_sample_rate = 16000
 41 |         self.server_sample_rate = 24000  # Default, will be updated from server
 42 |         self.audio_codec = "none"
 43 |         
 44 |         # Connection state
 45 |         self.reconnect = False
 46 |         self.is_private = False
 47 |         self.user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36"
 48 |         
 49 |         # Audio buffer for received audio
 50 |         self.audio_buffer = queue.Queue(maxsize=1000)
 51 |         
 52 |         # Message tracking
 53 |         self.last_sent_message_type = None
 54 |         self.received_since_last_sent = False
 55 |         self.first_audio_received = False
 56 |         
 57 |         # Event for tracking connection state
 58 |         self.connected_event = threading.Event()
 59 |         
 60 |         # Callbacks
 61 |         self.on_connect_callback = None
 62 |         self.on_disconnect_callback = None
 63 |     
 64 |     def connect(self, blocking=True):
 65 |         """
 66 |         Connect to the SesameAI WebSocket server
 67 |         
 68 |         Args:
 69 |             blocking (bool, optional): If True, blocks until connected. Defaults to True.
 70 |             
 71 |         Returns:
 72 |             bool: True if connection was successful
 73 |         """
 74 |         # Reset connection state
 75 |         self.connected_event.clear()
 76 |         
 77 |         # Start connection in a separate thread
 78 |         connection_thread = threading.Thread(target=self._connect_websocket)
 79 |         connection_thread.daemon = True
 80 |         connection_thread.start()
 81 |         
 82 |         if blocking:
 83 |             # Wait for connection to be established
 84 |             return self.connected_event.wait(timeout=10)
 85 |         
 86 |         return True
 87 |     
 88 |     def _connect_websocket(self):
 89 |         """Internal method to establish WebSocket connection"""
 90 |         headers = {
 91 |             'Origin': 'https://www.sesame.com',
 92 |             'User-Agent': self.user_agent,
 93 |         }
 94 | 
 95 |         params = {
 96 |             'id_token': self.id_token,
 97 |             'client_name': self.client_name,
 98 |             'usercontext': json.dumps({"timezone": "America/Chicago"}),
 99 |             'character': self.character,
100 |         }
101 | 
102 |         # Construct the WebSocket URL with query parameters
103 |         base_url = 'wss://sesameai.app/agent-service-0/v1/connect'
104 |         
105 |         # Convert params to URL query string
106 |         query_string = '&'.join([f"{key}={urllib.parse.quote(value)}" for key, value in params.items()])
107 |         ws_url = f"{base_url}?{query_string}"
108 |         
109 |         # Create WebSocket connection
110 |         self.ws = websocket_module.WebSocketApp(
111 |             ws_url,
112 |             header=headers,
113 |             on_open=self._on_open,
114 |             on_message=self._on_message,
115 |             on_error=self._on_error,
116 |             on_close=self._on_close
117 |         )
118 | 
119 |         # Run the WebSocket
120 |         self.ws.run_forever(
121 |             sslopt={"cert_reqs": ssl.CERT_NONE}, 
122 |             skip_utf8_validation=True,
123 |             suppress_origin=False
124 |         )
125 |     
126 |     def _on_open(self, ws):
127 |         """Callback when WebSocket connection is opened"""
128 |         logger.debug("WebSocket connection opened")
129 |     
130 |     def _on_message(self, ws, message):
131 |         """Callback when a message is received from the WebSocket"""
132 |         try:
133 |             # Parse the message as JSON
134 |             data = json.loads(message)
135 |             
136 |             # Handle different message types
137 |             message_type = data.get('type')
138 |             
139 |             if message_type == 'initialize':
140 |                 self._handle_initialize(data)
141 |             elif message_type == 'call_connect_response':
142 |                 self._handle_call_connect_response(data)
143 |             elif message_type == 'ping_response':
144 |                 self._handle_ping_response(data)
145 |             elif message_type == 'audio':
146 |                 self._handle_audio(data)
147 |             elif message_type == 'call_disconnect_response':
148 |                 self._handle_call_disconnect_response(data)
149 |             else:
150 |                 logger.debug(f"Received message type: {message_type}")
151 |                 
152 |         except json.JSONDecodeError:
153 |             logger.warning(f"Received non-JSON message: {message}")
154 |         except Exception as e:
155 |             logger.error(f"Error handling message: {e}", exc_info=True)
156 |     
157 |     def _on_error(self, ws, error):
158 |             """Callback when a WebSocket error occurs"""
159 |             logger.error(f"WebSocket error: {error}")
160 |             self.connected_event.clear()
161 |     
162 |     def _on_close(self, ws, close_status_code, close_msg):
163 |         """Callback when the WebSocket connection is closed"""
164 |         logger.debug(f"WebSocket closed: {close_status_code} - {close_msg}")
165 |         self.connected_event.clear()
166 |         
167 |         # Call the disconnect callback if set
168 |         if self.on_disconnect_callback:
169 |             self.on_disconnect_callback()
170 |     
171 |     # Message handlers
172 |     def _handle_initialize(self, data):
173 |         """Handle initialize message from server"""
174 |         self.session_id = data.get('session_id')
175 |         logger.debug(f"Session ID: {self.session_id}")
176 | 
177 |         # Send location and call_connect
178 |         self._send_client_location_state()
179 |         self._send_call_connect()
180 |     
181 |     def _handle_call_connect_response(self, data):
182 |         """Handle call_connect_response message from server"""
183 |         self.session_id = data.get('session_id')
184 |         self.call_id = data.get('call_id')
185 |         content = data.get('content', {})
186 |         self.server_sample_rate = content.get('sample_rate', self.server_sample_rate)
187 |         self.audio_codec = content.get('audio_codec', 'none')
188 | 
189 |         logger.debug(f"Connected: Session ID: {self.session_id}, Call ID: {self.call_id}")
190 |         
191 |         # Signal that we're connected
192 |         self.connected_event.set()
193 |         
194 |         # Call the connect callback if set
195 |         if self.on_connect_callback:
196 |             self.on_connect_callback()
197 |     
198 |     
199 |     def _handle_ping_response(self, data):
200 |         """Handle ping_response message from server"""
201 |         pass
202 |     
203 |     def _handle_audio(self, data):
204 |         """Handle audio message from server"""
205 |         audio_data = data.get('content', {}).get('audio_data', '')
206 |         if audio_data:
207 |             try:
208 |                 audio_bytes = base64.b64decode(audio_data)
209 |                 # Use put_nowait to avoid blocking if buffer is full
210 |                 # This prevents audio processing delays
211 |                 try:
212 |                     self.audio_buffer.put_nowait(audio_bytes)
213 |                 except queue.Full:
214 |                     # If buffer is full, discard oldest audio to make room
215 |                     try:
216 |                         self.audio_buffer.get_nowait()
217 |                         self.audio_buffer.put_nowait(audio_bytes)
218 |                     except queue.Empty:
219 |                         pass
220 |                 
221 |                 if not self.first_audio_received:
222 |                     self.first_audio_received = True
223 |                     logger.debug("First audio received, sending initialization chunks")
224 |                     # Send 2 all-A chunks to initialize audio stream
225 |                     chunk_of_As = "A" * 1707 + "="
226 |                     self._send_audio(chunk_of_As)
227 |                     self._send_audio(chunk_of_As)
228 |             except Exception as e:
229 |                 logger.error(f"Error processing audio: {e}", exc_info=True)
230 |     
231 |     def _handle_call_disconnect_response(self, data):
232 |         """Handle call_disconnect_response message from server"""
233 |         logger.debug("Call disconnected")
234 |         self.call_id = None
235 |         
236 |         # Call the disconnect callback if set
237 |         if self.on_disconnect_callback:
238 |             self.on_disconnect_callback()
239 |     
240 |     # Methods to send messages
241 |     def _send_ping(self):
242 |         """Send ping message to server"""
243 |         if not self.session_id:
244 |             return
245 | 
246 |         message = {
247 |             "type": "ping",
248 |             "session_id": self.session_id,
249 |             "call_id": self.call_id,
250 |             "request_id": self._generate_request_id(),
251 |             "content": "ping"
252 |         }
253 | 
254 |         self._send_data(message)
255 |     
256 |     def _send_client_location_state(self):
257 |         """Send client_location_state message to server"""
258 |         if not self.session_id:
259 |             return
260 | 
261 |         message = {
262 |             "type": "client_location_state",
263 |             "session_id": self.session_id,
264 |             "call_id": None,
265 |             "content": {
266 |                 "latitude": 0,
267 |                 "longitude": 0,
268 |                 "address": "",
269 |                 "timezone": "America/Chicago"
270 |             }
271 |         }
272 |         self._send_data(message)
273 |     
274 |     def _send_audio(self, data):
275 |         """
276 |         Send audio data to server
277 |         
278 |         Args:
279 |             data (str): Base64-encoded audio data
280 |         """
281 |         if not self.session_id or not self.call_id:
282 |             return
283 | 
284 |         message = {
285 |             "type": "audio",
286 |             "session_id": self.session_id,
287 |             "call_id": self.call_id,
288 |             "content": {
289 |                 "audio_data": data
290 |             }
291 |         }
292 | 
293 |         self._send_data(message)
294 |     
295 |     def _send_call_connect(self):
296 |         """Send call_connect message to server"""
297 |         if not self.session_id:
298 |             return
299 |             
300 |         message = {
301 |             "type": "call_connect",
302 |             "session_id": self.session_id,
303 |             "call_id": None,
304 |             "request_id": self._generate_request_id(),
305 |             "content": {
306 |                 "sample_rate": self.client_sample_rate,
307 |                 "audio_codec": "none",
308 |                 "reconnect": self.reconnect,
309 |                 "is_private": self.is_private,
310 |                 "client_name": self.client_name,
311 |                 "settings": {
312 |                     "preset": f"{self.character}"
313 |                 },
314 |                 "client_metadata": {
315 |                     "language": "en-US",
316 |                     "user_agent": self.user_agent,
317 |                     "mobile_browser": False,
318 |                     "media_devices": self._get_media_devices()
319 |                 }
320 |             }
321 |         }
322 |         
323 |         self._send_data(message)
324 |     
325 |     def send_audio_data(self, raw_audio_bytes):
326 |         """
327 |         Send raw audio data to the AI
328 |         
329 |         Args:
330 |             raw_audio_bytes (bytes): Raw audio data (16-bit PCM)
331 |             
332 |         Returns:
333 |             bool: True if audio was sent successfully
334 |         """
335 |         if not self.session_id or not self.call_id:
336 |             return False
337 |             
338 |         # Encode the raw audio data in base64
339 |         encoded_data = base64.b64encode(raw_audio_bytes).decode('utf-8')
340 |         self._send_audio(encoded_data)
341 |         return True
342 |     
343 |     def disconnect(self):
344 |         """
345 |         Disconnect from the server
346 |         
347 |         Returns:
348 |             bool: True if disconnect message was sent successfully
349 |         """
350 |         if not self.session_id or not self.call_id:
351 |             logger.warning("Cannot disconnect: Not connected")
352 |             return False
353 |             
354 |         message = {
355 |             "type": "call_disconnect",
356 |             "session_id": self.session_id,
357 |             "call_id": self.call_id,
358 |             "request_id": self._generate_request_id(),
359 |             "content": {
360 |                 "reason": "user_request"
361 |             }
362 |         }
363 |         
364 |         logger.debug("Sending disconnect request")
365 |         self._send_data(message)
366 |         return True
367 |     
368 |     def _send_message(self, message):
369 |         """Send a raw message to the WebSocket"""
370 |         if self.ws and self.ws.sock and self.ws.sock.connected:
371 |             message_str = json.dumps(message)
372 |             self.ws.send(message_str)
373 |             return True
374 |         else:
375 |             logger.warning("WebSocket is not connected")
376 |             return False
377 |     
378 |     def _send_data(self, message):
379 |         """Send data with proper ping handling"""
380 |         try:
381 |             data_type = message.get("type")
382 | 
383 |             # Send pings for non-control messages after connection is established
384 |             if self.call_id is not None and data_type not in ["ping", "call_connect", "call_disconnect"]:
385 |                 if (self.last_sent_message_type is None 
386 |                     or self.received_since_last_sent 
387 |                     or (data_type != self.last_sent_message_type)):
388 |                     self._send_ping()
389 |                     
390 |                 self.last_sent_message_type = data_type
391 |                 self.received_since_last_sent = False
392 | 
393 |             return self._send_message(message)
394 |             
395 |         except Exception as e:
396 |             logger.error(f"Error sending data: {e}", exc_info=True)
397 |             return False
398 |     
399 |     def _generate_request_id(self):
400 |         """Generate a unique request ID"""
401 |         return str(uuid.uuid4())
402 |     
403 |     def _get_media_devices(self):
404 |         """Get a list of media devices for the client metadata"""
405 |         # Simplified version - in a real implementation, this would detect actual devices
406 |         return [
407 |             {
408 |                 "deviceId": "default",
409 |                 "kind": "audioinput",
410 |                 "label": "Default - Microphone",
411 |                 "groupId": "default"
412 |             },
413 |             {
414 |                 "deviceId": "default",
415 |                 "kind": "audiooutput",
416 |                 "label": "Default - Speaker",
417 |                 "groupId": "default"
418 |             }
419 |         ]
420 |     
421 |     def get_next_audio_chunk(self, timeout=None):
422 |         """
423 |         Get the next audio chunk from the buffer
424 |         
425 |         Args:
426 |             timeout (float, optional): Timeout in seconds. None means block indefinitely.
427 |             
428 |         Returns:
429 |             bytes: Audio data, or None if timeout occurred
430 |         """
431 |         try:
432 |             return self.audio_buffer.get(timeout=timeout)
433 |         except queue.Empty:
434 |             return None
435 |     
436 |     def set_connect_callback(self, callback):
437 |         """
438 |         Set callback for connection established events
439 |         
440 |         Args:
441 |             callback (callable): Function with no arguments
442 |         """
443 |         self.on_connect_callback = callback
444 |     
445 |     def set_disconnect_callback(self, callback):
446 |         """
447 |         Set callback for disconnection events
448 |         
449 |         Args:
450 |             callback (callable): Function with no arguments
451 |         """
452 |         self.on_disconnect_callback = callback
453 |     
454 |     def is_connected(self):
455 |         """
456 |         Check if the WebSocket is connected
457 |         
458 |         Returns:
459 |             bool: True if connected
460 |         """
461 |         return self.session_id is not None and self.call_id is not None


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # setup.py
 2 | 
 3 | from setuptools import setup, find_packages
 4 | 
 5 | with open("README.md", "r", encoding="utf-8") as fh:
 6 |     long_description = fh.read()
 7 | 
 8 | setup(
 9 |     name="sesame_ai",
10 |     version="0.1.0",
11 |     author="ijub",
12 |     author_email="ijubgithub@gmail.com",
13 |     description="Unofficial Python API wrapper for SesameAI",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     url="https://github.com/ijub/sesame-ai",
17 |     packages=find_packages(),
18 |     classifiers=[
19 |         "Programming Language :: Python :: 3",
20 |         "License :: OSI Approved :: MIT License",
21 |         "Operating System :: OS Independent",
22 |         "Development Status :: 3 - Alpha",
23 |         "Intended Audience :: Developers",
24 |         "Topic :: Software Development :: Libraries :: Python Modules",
25 |     ],
26 |     python_requires=">=3.6",
27 |     install_requires=[
28 |         "requests>=2.25.0",
29 |         "websocket-client>=1.2.0",
30 |         "numpy>=1.19.0",
31 |         "PyAudio>=0.2.11",
32 |     ],
33 |     extras_require={
34 |         "dev": [
35 |             "pytest>=6.0.0",
36 |             "black>=21.5b2",
37 |             "flake8>=3.9.0",
38 |         ],
39 |     },
40 |     keywords="sesame, ai, voice, api, wrapper, chatbot",
41 | )


--------------------------------------------------------------------------------