├── assets └── sample_terminal.png ├── requirements.txt ├── .gitignore ├── .sample_env ├── main.py ├── start_uttertype.sh ├── LICENSE ├── utils.py ├── key_listener.py ├── table_interface.py ├── README.md └── transcriber.py /assets/sample_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dhruvyad/uttertype/HEAD/assets/sample_terminal.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | PyAudio 3 | PyAutoGUI 4 | pynput 5 | pyperclip 6 | python-dotenv 7 | rich 8 | webrtcvad 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # macOS 2 | .DS_Store 3 | .AppleDouble 4 | .LSOverride 5 | 6 | # Ignore audio.wav 7 | audio.wav 8 | 9 | # Ignore env variables file 10 | .env 11 | 12 | __pycache__/ 13 | dist 14 | build 15 | main.spec -------------------------------------------------------------------------------- /.sample_env: -------------------------------------------------------------------------------- 1 | # Defaults for OpenAI API: 2 | # OPENAI_API_KEY="sk-" 3 | # OPENAI_BASE_URL="https://api.openai.com/v1" 4 | # OPENAI_MODEL_NAME="whisper-1" 5 | 6 | 7 | # Defaults for local faster_whisper_server: 8 | OPENAI_API_KEY="sk-" 9 | OPENAI_BASE_URL="http://localhost:7000/v1" 10 | 11 | OPENAI_MODEL_NAME="Systran/faster-distil-whisper-large-v3" 12 | # OPENAI_MODEL_NAME="deepdml/faster-whisper-large-v3-turbo-ct2" 13 | 14 | UTTERTYPE_RECORD_HOTKEYS="++v" 15 | # UTTERTYPE_RECORD_HOTKEYS="+" 16 | 17 | # Minimum duration of speech to send to API in case of silence 18 | UTTERTYPE_MIN_TRANSCRIPTION_SIZE_MS=10000 # defaults to: 1500 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from pynput import keyboard 3 | from transcriber import WhisperAPITranscriber 4 | from table_interface import ConsoleTable 5 | from key_listener import create_keylistener 6 | from dotenv import load_dotenv 7 | from utils import manual_type 8 | 9 | 10 | async def main(): 11 | load_dotenv() 12 | 13 | transcriber = WhisperAPITranscriber.create() 14 | hotkey = create_keylistener(transcriber) 15 | 16 | keyboard.Listener(on_press=hotkey.press, on_release=hotkey.release).start() 17 | console_table = ConsoleTable() 18 | with console_table: 19 | async for transcription, audio_duration_ms in transcriber.get_transcriptions(): 20 | manual_type(transcription.strip()) 21 | console_table.insert( 22 | transcription, 23 | round(0.0001 * audio_duration_ms / 1000, 6), 24 | ) 25 | 26 | 27 | if __name__ == "__main__": 28 | asyncio.run(main()) 29 | -------------------------------------------------------------------------------- /start_uttertype.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get directory of the script 4 | SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 5 | 6 | # Check if tmux is installed 7 | if ! command -v tmux &> /dev/null; then 8 | echo "tmux is not installed. Please install it first." 9 | exit 1 10 | fi 11 | 12 | # Check if pipenv is installed and create virtual environment if needed 13 | if command -v pipenv &> /dev/null; then 14 | cd "$SCRIPT_DIR" 15 | # Create/update virtual environment if needed 16 | pipenv install --quiet 17 | # Get the path to the virtual environment's Python 18 | VENV_PYTHON=$(pipenv --py) 19 | else 20 | echo "pipenv is not installed. Using system Python." 21 | VENV_PYTHON=$(which python) 22 | fi 23 | 24 | # Create new tmux session if it doesn't exist 25 | if ! tmux has-session -t uttertype 2>/dev/null; then 26 | tmux new-session -s uttertype -d 27 | tmux send-keys -t uttertype "cd '$SCRIPT_DIR' && '$VENV_PYTHON' main.py" C-m 28 | fi -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Dhruv Yadav 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import pyperclip 4 | import pyautogui 5 | from typing import List 6 | from pynput import keyboard 7 | 8 | keyboard_writer = keyboard.Controller() 9 | 10 | 11 | def clipboard_type(text): 12 | """ 13 | Instead of typing each key, just copy to clipboard and paste 14 | Probably won't work for some fields that don't accept pasting 15 | """ 16 | original_clipboard_content = pyperclip.paste() 17 | pyperclip.copy(text) 18 | print("Pasting:", text) 19 | pyautogui.hotkey("command" if sys.platform == "darwin" else "ctrl", "v") 20 | pyperclip.copy(original_clipboard_content) 21 | 22 | 23 | def manual_type(text: str, delay: float = 0.0042): 24 | """ 25 | Type each key manually with delay to prevent overwhelming the target 26 | Copied from keyboard.Controller.type() to add delay 27 | """ 28 | for i, character in enumerate(text): 29 | key = keyboard._CONTROL_CODES.get(character, character) 30 | try: 31 | keyboard_writer.press(key) 32 | keyboard_writer.release(key) 33 | time.sleep(delay) 34 | except (ValueError, keyboard_writer.InvalidKeyException): 35 | raise keyboard_writer.InvalidCharacterException(i, character) 36 | 37 | 38 | def transcription_concat(transcriptions: List[str]) -> str: 39 | return " ".join([_t.strip() for _t in transcriptions]) # Simple concat for now 40 | -------------------------------------------------------------------------------- /key_listener.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from pynput.keyboard import HotKey 4 | 5 | 6 | class HoldHotKey(HotKey): 7 | def __init__(self, keys, on_activate, on_deactivate): 8 | self.active = False 9 | 10 | def _mod_on_activate(): 11 | self.active = True 12 | on_activate() 13 | 14 | def _mod_on_deactivate(): 15 | self.active = False 16 | on_deactivate() 17 | 18 | super().__init__(keys, _mod_on_activate) 19 | self._on_deactivate = _mod_on_deactivate 20 | 21 | def release(self, key): 22 | super().release(key) 23 | if self.active and self._state != self._keys: 24 | self._on_deactivate() 25 | 26 | 27 | class HoldGlobeKey: 28 | """ 29 | For macOS only, globe key requires special handling 30 | """ 31 | 32 | def __init__(self, on_activate, on_deactivate): 33 | self.held = False 34 | self._on_activate = on_activate 35 | self._on_deactivate = on_deactivate 36 | 37 | def press(self, key): 38 | if hasattr(key, "vk") and key.vk == 63: 39 | if self.held: # hold ended 40 | self._on_deactivate() 41 | else: # hold started 42 | self._on_activate() 43 | self.held = not self.held 44 | 45 | def release(self, key): 46 | """Press and release signals are mixed for globe key""" 47 | self.press(key) 48 | 49 | 50 | def create_keylistener(transcriber, env_var="UTTERTYPE_RECORD_HOTKEYS"): 51 | key_code = os.getenv(env_var, "") 52 | 53 | if (sys.platform == "darwin") and (key_code in ["", ""]): 54 | return HoldGlobeKey( 55 | on_activate=transcriber.start_recording, 56 | on_deactivate=transcriber.stop_recording, 57 | ) 58 | 59 | key_code = key_code if key_code else "++v" 60 | 61 | return HoldHotKey( 62 | HoldHotKey.parse(key_code), 63 | on_activate=transcriber.start_recording, 64 | on_deactivate=transcriber.stop_recording, 65 | ) 66 | -------------------------------------------------------------------------------- /table_interface.py: -------------------------------------------------------------------------------- 1 | from rich import box 2 | from rich.align import Align 3 | from rich.console import Console 4 | from rich.live import Live 5 | from rich.table import Table 6 | from rich.text import Text 7 | from datetime import datetime 8 | 9 | 10 | class ConsoleTable: 11 | def __init__(self, total_cost_decimals: int = 6): 12 | self.console = Console() 13 | self.table = Table(show_footer=False) 14 | self.total_cost = 0 15 | self.total_cost_decimals = total_cost_decimals 16 | 17 | def _update_cost(self, cost: float): 18 | self.total_cost += cost 19 | self.table.columns[2].footer = ( 20 | f"${round(self.total_cost, self.total_cost_decimals)}" 21 | ) 22 | 23 | def _setup_table(self): 24 | self.centered_table = Align.center(self.table) 25 | self.console.clear() 26 | self.table.add_column("Date", no_wrap=True) 27 | self.table.add_column( 28 | "Transcription", Text.from_markup("[b]Total:", justify="right") 29 | ) 30 | self.table.add_column( 31 | "Cost", Text.from_markup("[b]$0", justify="right"), no_wrap=True 32 | ) 33 | self.table.show_footer = True 34 | 35 | self.table.columns[0].header_style = "bold green" 36 | self.table.columns[0].style = "green" 37 | self.table.columns[1].header_style = "bold blue" 38 | self.table.columns[1].style = "blue" 39 | self.table.columns[1].footer = "Total" 40 | self.table.columns[2].header_style = "bold cyan" 41 | self.table.columns[2].style = "cyan" 42 | self.table.row_styles = ["none", "dim"] 43 | self.table.box = box.SIMPLE_HEAD 44 | 45 | def __enter__(self): 46 | self._setup_table() 47 | self.live_rendering = Live( 48 | self.centered_table, 49 | console=self.console, 50 | screen=False, 51 | refresh_per_second=5, 52 | vertical_overflow="visible", 53 | ) 54 | self.live_rendering.__enter__() 55 | 56 | def __exit__(self, *args, **kwargs): 57 | self.live_rendering.__exit__(*args, **kwargs) 58 | 59 | def insert(self, transcription: str, cost: float): 60 | current_datetime = datetime.now() 61 | formatted_datetime = current_datetime.strftime("%dth %B, %I:%M%p") 62 | self.table.add_row(formatted_datetime, transcription, f"${cost}") 63 | self._update_cost(cost) 64 | # Text("API Error", style="bold red") 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # uttertype ([demo](https://www.youtube.com/watch?v=eSDYIFzU_fY)) 2 | 3 | alt text 4 | 5 | ## Setup 6 | 7 | ### 1. [Install PortAudio/PyAudio](https://people.csail.mit.edu/hubert/pyaudio/) 8 | #### macOS 9 | Installing portaudio on macOS can be somewhat tricky, especially on M1+ chips (Apple Silicon). In general, using conda seems to be the safest way to install portaudio 10 | ``` 11 | conda install portaudio 12 | ``` 13 | If that doesn't work, try installing using Homebrew 14 | ```sh 15 | brew install portaudio 16 | ``` 17 | 18 | #### Windows 19 | ``` 20 | python -m pip install pyaudio 21 | ``` 22 | #### Linux 23 | ``` 24 | sudo apt-get install python3-pyaudio 25 | ``` 26 | ### 2. Add a HotKey 27 | For macOS, the hotkey is automatically set to the globe key by default (🌐 bottom left key). For Windows and Linux, you can configure the hotkey by setting the `UTTERTYPE_RECORD_HOTKEYS` environment variable in `.env`: 28 | ```env 29 | UTTERTYPE_RECORD_HOTKEYS="++v" 30 | ``` 31 | 32 | For more context, view the [pynput documentation for using HotKeys](https://pynput.readthedocs.io/en/latest/keyboard.html#global-hotkeys) (HoldHotKey is extended from this class). 33 | 34 | ### 3. Install Dependencies 35 | Choose one of the following methods to install the required dependencies: 36 | 37 | #### Option A: Using pip 38 | ```shell 39 | python -m pip install -r requirements.txt 40 | ``` 41 | 42 | #### Option B: Using pipenv 43 | First, install pipenv if you haven't already: 44 | ```shell 45 | pip install pipenv 46 | ``` 47 | 48 | Then, install dependencies using pipenv: 49 | ```shell 50 | pipenv install 51 | ``` 52 | 53 | This will create a virtual environment and install all dependencies from the Pipfile. To activate the environment: 54 | ```shell 55 | pipenv shell 56 | ``` 57 | 58 | 59 | If during/after installation on Linux you see error similar to: 60 | ``` 61 | ImportError: /home/soul/anaconda3/lib/libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by /lib/x86_64-linux-gnu/libjack.so.0) 62 | ``` 63 | Check out [StackOverflow](https://stackoverflow.com/questions/72540359/glibcxx-3-4-30-not-found-for-librosa-in-conda-virtual-environment-after-tryin) and [Berkley](https://bcourses.berkeley.edu/courses/1478831/pages/glibcxx-missing) 64 | 65 | 66 | ### 4. Configure OpenAI Settings 67 | 68 | You can configure uttertype to work with either OpenAI's official API or a local Whisper server. There are two ways to set this up: 69 | 70 | #### Option A: Using a .env file (Recommended) 71 | Create a `.env` file in the project directory with these settings: 72 | 73 | ```env 74 | # 1. Required: Your API key 75 | OPENAI_API_KEY="sk-your-key-here" 76 | 77 | # 2. Optional: Choose your API endpoint 78 | # For OpenAI's official API (default): 79 | OPENAI_BASE_URL="https://api.openai.com/v1" 80 | # OR for a local [Faster Whisper server](https://github.com/fedirz/faster-whisper-server): 81 | OPENAI_BASE_URL="http://localhost:7000/v1" 82 | 83 | # 3. Optional: Select your preferred model 84 | # For OpenAI's official API: 85 | OPENAI_MODEL_NAME="whisper-1" 86 | # OR for local Whisper server, some options include: 87 | OPENAI_MODEL_NAME="Systran/faster-whisper-small" 88 | OPENAI_MODEL_NAME="Systran/faster-distil-whisper-large-v3" 89 | OPENAI_MODEL_NAME="deepdml/faster-whisper-large-v3-turbo-ct2" 90 | ``` 91 | 92 | #### Option B: Using Environment Variables 93 | You can also set these values directly in your terminal: 94 | 95 | For Linux/macOS: 96 | ```shell 97 | export OPENAI_API_KEY="sk-your-key-here" 98 | export OPENAI_BASE_URL="https://api.openai.com/v1" # optional 99 | export OPENAI_MODEL_NAME="whisper-1" # optional 100 | ``` 101 | 102 | For Windows: 103 | ```shell 104 | $env:OPENAI_API_KEY = "sk-your-key-here" 105 | $env:OPENAI_BASE_URL = "https://api.openai.com/v1" # optional 106 | $env:OPENAI_MODEL_NAME = "whisper-1" # optional 107 | ``` 108 | 109 | See [`.sample_env`](.sample_env) in the repository for example configurations. 110 | 111 | #### Using a Local Whisper Server 112 | For faster and cheaper transcription, you can set up a local [faster-whisper-server](https://github.com/fedirz/faster-whisper-server). When using a local server: 113 | 114 | 1. Set `OPENAI_BASE_URL` to your server's address (e.g., `http://localhost:7000/v1`) 115 | 2. Choose from supported local models like: 116 | - `Systran/faster-whisper-small` (fastest) 117 | - `Systran/faster-distil-whisper-large-v3` (most accurate) 118 | - `deepdml/faster-whisper-large-v3-turbo-ct2` (almost as good, but faster) 119 | 120 | ### 5. Final run and permissions 121 | Finally, run main.py 122 | ```shell 123 | python main.py 124 | ``` 125 | OR 126 | ```shell 127 | ./start_uttertype.sh # installed and configured pipenv environment would be needed 128 | ``` 129 | 130 | When the program first runs, you will likely need to give it sufficient permissions. On macOS, this will include adding terminal to accessibility under `Privacy and Security > Accessibility`, giving it permission to monitor the keyboard, and finally giving it permission to record using the microphone. 131 | 132 | ## Usage 133 | To start transcription, press and hold the registered hotkey to start recording. To stop the recording, lift your registered hotkey. On macOS, the registered hotkey is the globe icon by default. For other operating systems, this will have to by manually configured in `main.py` as described earlier. 134 | -------------------------------------------------------------------------------- /transcriber.py: -------------------------------------------------------------------------------- 1 | import os 2 | import io 3 | from typing import List, Tuple 4 | import pyaudio 5 | import wave 6 | from openai import OpenAI 7 | import asyncio 8 | from threading import Thread, Event 9 | import webrtcvad 10 | from utils import transcription_concat 11 | import tempfile 12 | 13 | FORMAT = pyaudio.paInt16 # Audio format 14 | CHANNELS = 1 # Mono audio 15 | RATE = 16000 # Sample rate 16 | CHUNK_DURATION_MS = 30 # Frame duration in milliseconds 17 | CHUNK = int(RATE * CHUNK_DURATION_MS / 1000) 18 | MIN_TRANSCRIPTION_SIZE_MS = int( 19 | os.getenv('UTTERTYPE_MIN_TRANSCRIPTION_SIZE_MS', 1500) # Minimum duration of speech to send to API in case of silence 20 | ) 21 | 22 | 23 | class AudioTranscriber: 24 | def __init__(self): 25 | self.audio = pyaudio.PyAudio() 26 | self.recording_finished = Event() # Threading event to end recording 27 | self.recording_finished.set() # Initialize as finished 28 | self.frames = [] 29 | self.audio_duration = 0 30 | self.rolling_transcriptions: List[Tuple[int, str]] = [] # (idx, transcription) 31 | self.rolling_requests: List[Thread] = [] # list of pending requests 32 | self.event_loop = asyncio.get_event_loop() 33 | self.vad = webrtcvad.Vad(1) # Voice Activity Detector, mode can be 0 to 3 34 | self.transcriptions = asyncio.Queue() 35 | 36 | def start_recording(self): 37 | """Start recording audio from the microphone.""" 38 | 39 | # Start a new recording in the background, do not block 40 | def _record(): 41 | self.recording_finished = Event() 42 | stream = self.audio.open( 43 | format=FORMAT, 44 | channels=CHANNELS, 45 | rate=RATE, 46 | input=True, 47 | frames_per_buffer=CHUNK, 48 | ) 49 | intermediate_trancriptions_idx = 0 50 | while ( 51 | not self.recording_finished.is_set() 52 | ): # Keep recording until interrupted 53 | data = stream.read(CHUNK) 54 | self.audio_duration += CHUNK_DURATION_MS 55 | is_speech = self.vad.is_speech(data, RATE) 56 | current_audio_duration = len(self.frames) * CHUNK_DURATION_MS 57 | if ( 58 | not is_speech 59 | and current_audio_duration >= MIN_TRANSCRIPTION_SIZE_MS 60 | ): # silence 61 | rolling_request = Thread( 62 | target=self._intermediate_transcription, 63 | args=( 64 | intermediate_trancriptions_idx, 65 | self._frames_to_wav(), 66 | ), 67 | ) 68 | self.frames = [] 69 | self.rolling_requests.append(rolling_request) 70 | rolling_request.start() 71 | intermediate_trancriptions_idx += 1 72 | self.frames.append(data) 73 | 74 | # start recording in a new non-blocking thread 75 | Thread(target=_record).start() 76 | 77 | def stop_recording(self): 78 | """Stop the recording and reset variables""" 79 | self.recording_finished.set() 80 | self._finish_transcription() 81 | self.frames = [] 82 | self.audio_duration = 0 83 | self.rolling_requests = [] 84 | self.rolling_transcriptions = [] 85 | 86 | def _intermediate_transcription(self, idx, audio): 87 | intermediate_transcription = self.transcribe_audio(audio) 88 | self.rolling_transcriptions.append((idx, intermediate_transcription)) 89 | 90 | def _finish_transcription(self): 91 | transcription = self.transcribe_audio( 92 | self._frames_to_wav() 93 | ) # Last transcription 94 | for request in self.rolling_requests: # Wait for rolling requests 95 | request.join() 96 | self.rolling_transcriptions.append( 97 | (len(self.rolling_transcriptions), transcription) 98 | ) 99 | sorted(self.rolling_transcriptions, key=lambda x: x[0]) # Sort by idx 100 | transcriptions = [ 101 | t[1] for t in self.rolling_transcriptions 102 | ] # Get ordered transcriptions 103 | self.event_loop.call_soon_threadsafe( # Put final combined result in finished queue 104 | self.transcriptions.put_nowait, 105 | (transcription_concat(transcriptions), self.audio_duration), 106 | ) 107 | 108 | def _frames_to_wav(self): 109 | buffer = io.BytesIO() 110 | buffer.name = "tmp.wav" 111 | wf = wave.open(buffer, "wb") 112 | wf.setnchannels(CHANNELS) 113 | wf.setsampwidth(self.audio.get_sample_size(FORMAT)) 114 | wf.setframerate(RATE) 115 | wf.writeframes(b"".join(self.frames)) 116 | wf.close() 117 | return buffer 118 | 119 | def transcribe_audio(self, audio: io.BytesIO) -> str: 120 | raise NotImplementedError("Please use a subclass of AudioTranscriber") 121 | 122 | async def get_transcriptions(self): 123 | """ 124 | Asynchronously get transcriptions from the queue. 125 | Returns (transcription string, audio duration in ms). 126 | """ 127 | while True: 128 | transcription = await self.transcriptions.get() 129 | yield transcription 130 | self.transcriptions.task_done() 131 | 132 | 133 | class WhisperAPITranscriber(AudioTranscriber): 134 | def __init__(self, base_url, model_name, *args, **kwargs): 135 | super().__init__(*args, **kwargs) 136 | 137 | self.model_name = model_name 138 | self.client = OpenAI(base_url=base_url) 139 | 140 | @staticmethod 141 | def create(*args, **kwargs): 142 | base_url = os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1') 143 | model_name = os.getenv('OPENAI_MODEL_NAME', 'whisper-1') 144 | 145 | return WhisperAPITranscriber(base_url, model_name) 146 | 147 | def transcribe_audio(self, audio: io.BytesIO) -> str: 148 | try: 149 | transcription = self.client.audio.transcriptions.create( 150 | model=self.model_name, 151 | file=audio, 152 | response_format="text", 153 | language="en", 154 | prompt="The following is normal speech or technical speech from an engineer.", 155 | ) 156 | return transcription 157 | except Exception as e: 158 | print(f"Encountered Error: {e}") 159 | return "" 160 | 161 | 162 | class WhisperLocalMLXTranscriber(AudioTranscriber): 163 | def __init__(self, model_type="distil-medium.en", *args, **kwargs): 164 | super().__init__(*args, **kwargs) 165 | from lightning_whisper_mlx import LightningWhisperMLX 166 | 167 | self.model = LightningWhisperMLX(model_type) 168 | 169 | def transcribe_audio(self, audio: io.BytesIO) -> str: 170 | try: 171 | with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: 172 | tmpfile.write(audio.getvalue()) 173 | transcription = self.model.transcribe(tmpfile.name)["text"] 174 | os.unlink(tmpfile.name) 175 | return transcription 176 | except Exception as e: 177 | print(f"Encountered Error: {e}") 178 | return "" 179 | --------------------------------------------------------------------------------