├── .env.example ├── .gitignore ├── README.md ├── main.py ├── requirements.txt └── twilio_transcriber.py /.env.example: -------------------------------------------------------------------------------- 1 | NGROK_AUTHTOKEN=your-token-here 2 | TWILIO_ACCOUNT_SID=your-sid-here 3 | TWILIO_API_KEY_SID=your-sid-here 4 | TWILIO_API_SECRET=your-secretkey-here 5 | ASSEMBLYAI_API_KEY=your-key-here 6 | TWILIO_NUMBER=your-number-here -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | venv 3 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Environment and Credentials Setup 2 | 3 | 1. Create a virtual environment 4 | 5 | ```shell 6 | # Mac/Linux 7 | python3 -m venv venv 8 | . venv/bin/activate 9 | 10 | # Windows 11 | python -m venv venv 12 | .\venv\Scripts\activate.bat 13 | ``` 14 | 15 | 2. Install required packages 16 | 17 | ```shell 18 | pip install -r requirements.txt 19 | ``` 20 | 21 | 3. Change the filename of `.env.example` to `.env` and replace `your-key-here` with your corresponding API key, authtoken, etc. for each line. **Make sure to not share this file with anyone or upload it to GitHub**. You will need: 22 | 1. An [AssemblyAI API Key](https://www.assemblyai.com/dashboard/signup) with funds added to access realtime transcription 23 | 2. A [Twilio account](https://www.twilio.com/) for your account SID as long as an API Key SID and secret 24 | 3. A [Twilio number](https://www.twilio.com/docs/phone-numbers). The value for `TWILIO_NUMBER` should be formatted as a sequence of digits with country code e.g. `+12345678910`. 25 | 4. An [ngrok](https://ngrok.com/) account authtoken 26 | 27 | 28 | # Run the application 29 | 30 | Execute `python main.py` or `python3 main.py` in the project directory to start the application. Then, call your Twilio phone number and begin speaking. You will see your speech transcribed in the console. -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | 5 | from flask import Flask, request, Response 6 | from flask_sock import Sock 7 | import ngrok 8 | from twilio.rest import Client 9 | from dotenv import load_dotenv 10 | load_dotenv() 11 | 12 | from twilio_transcriber import TwilioTranscriber 13 | 14 | # Flask settings 15 | PORT = 5000 16 | DEBUG = False 17 | INCOMING_CALL_ROUTE = '/' 18 | WEBSOCKET_ROUTE = '/realtime' 19 | 20 | # Twilio authentication 21 | account_sid = os.environ['TWILIO_ACCOUNT_SID'] 22 | api_key = os.environ['TWILIO_API_KEY_SID'] 23 | api_secret = os.environ['TWILIO_API_SECRET'] 24 | client = Client(api_key, api_secret, account_sid) 25 | 26 | # Twilio phone number to call 27 | TWILIO_NUMBER = os.environ['TWILIO_NUMBER'] 28 | 29 | # ngrok authentication 30 | ngrok.set_auth_token(os.getenv("NGROK_AUTHTOKEN")) 31 | app = Flask(__name__) 32 | sock = Sock(app) 33 | 34 | @app.route(INCOMING_CALL_ROUTE, methods=['GET', 'POST']) 35 | def receive_call(): 36 | if request.method == 'POST': 37 | xml = f""" 38 | 39 | 40 | Speak to see your speech transcribed in the console 41 | 42 | 43 | 44 | 45 | 46 | """.strip() 47 | return Response(xml, mimetype='text/xml') 48 | else: 49 | return f"Real-time phone call transcription app" 50 | 51 | @sock.route(WEBSOCKET_ROUTE) 52 | def transcription_websocket(ws): 53 | while True: 54 | data = json.loads(ws.receive()) 55 | match data['event']: 56 | case "connected": 57 | transcriber = TwilioTranscriber() 58 | transcriber.connect() 59 | print('transcriber connected') 60 | case "start": 61 | print('twilio started') 62 | case "media": 63 | payload_b64 = data['media']['payload'] 64 | payload_mulaw = base64.b64decode(payload_b64) 65 | transcriber.stream(payload_mulaw) 66 | case "stop": 67 | print('twilio stopped') 68 | transcriber.close() 69 | print('transcriber closed') 70 | 71 | 72 | if __name__ == "__main__": 73 | try: 74 | # Open Ngrok tunnel 75 | listener = ngrok.forward(f"http://localhost:{PORT}") 76 | print(f"Ngrok tunnel opened at {listener.url()} for port {PORT}") 77 | NGROK_URL = listener.url() 78 | 79 | # Set ngrok URL to ne the webhook for the appropriate Twilio number 80 | twilio_numbers = client.incoming_phone_numbers.list() 81 | twilio_number_sid = [num.sid for num in twilio_numbers if num.phone_number == TWILIO_NUMBER][0] 82 | client.incoming_phone_numbers(twilio_number_sid).update(account_sid, voice_url=f"{NGROK_URL}{INCOMING_CALL_ROUTE}") 83 | 84 | # run the app 85 | app.run(port=PORT, debug=DEBUG) 86 | finally: 87 | # Always disconnect the ngrok tunnel 88 | ngrok.disconnect() 89 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | assemblyai>=0.21.0 2 | Flask>=3.0.2 3 | flask-sock>=0.7.0 4 | ngrok>=1.0.0 5 | python-dotenv>=1.0.1 6 | twilio>=8.13.0 -------------------------------------------------------------------------------- /twilio_transcriber.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import assemblyai as aai 4 | from dotenv import load_dotenv 5 | load_dotenv() 6 | 7 | aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY') 8 | 9 | TWILIO_SAMPLE_RATE = 8000 # Hz 10 | 11 | 12 | def on_open(session_opened: aai.RealtimeSessionOpened): 13 | "Called when the connection has been established." 14 | print("Session ID:", session_opened.session_id) 15 | 16 | 17 | def on_data(transcript: aai.RealtimeTranscript): 18 | "Called when a new transcript has been received." 19 | if not transcript.text: 20 | return 21 | 22 | if isinstance(transcript, aai.RealtimeFinalTranscript): 23 | print(transcript.text, end="\r\n") 24 | else: 25 | print(transcript.text, end="\r") 26 | 27 | 28 | def on_error(error: aai.RealtimeError): 29 | "Called when the connection has been closed." 30 | print("An error occured:", error) 31 | 32 | 33 | def on_close(): 34 | "Called when the connection has been closed." 35 | print("Closing Session") 36 | 37 | 38 | class TwilioTranscriber(aai.RealtimeTranscriber): 39 | def __init__(self): 40 | super().__init__( 41 | on_data=on_data, 42 | on_error=on_error, 43 | on_open=on_open, # optional 44 | on_close=on_close, # optional 45 | sample_rate=TWILIO_SAMPLE_RATE, 46 | encoding=aai.AudioEncoding.pcm_mulaw 47 | ) 48 | --------------------------------------------------------------------------------