├── README.md └── twilio.py /README.md: -------------------------------------------------------------------------------- 1 | # Deepgram <-> Twilio live-streaming Python demo 2 | 3 | This code sample enables you to create an integration between Deepgram and Twilio that allows multiple client subscribers to watch live transcripts from ongoing Twilio calls. 4 | 5 | For full instructions, follow the accompanying tutorial [Transcribing Twilio Voice Calls in Real-Time with Deepgram](https://deepgram.com/learn/deepgram-twilio-streaming). 6 | 7 | ![image](https://github.com/deepgram-devs/deepgram-twilio-streaming-python/assets/135150417/bd98f2bc-4e4f-4eba-9fbe-fa5bb4718d57) 8 | -------------------------------------------------------------------------------- /twilio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | import json 4 | import sys 5 | import websockets 6 | import ssl 7 | from pydub import AudioSegment 8 | 9 | subscribers = {} 10 | 11 | def deepgram_connect(): 12 | extra_headers = { 13 | 'Authorization': 'Token INSERT_YOUR_DEEPGRAM_API_KEY' 14 | } 15 | deepgram_ws = websockets.connect('wss://api.deepgram.com/v1/listen?encoding=mulaw&sample_rate=8000&channels=2&multichannel=true', extra_headers = extra_headers) 16 | 17 | return deepgram_ws 18 | 19 | async def twilio_handler(twilio_ws): 20 | audio_queue = asyncio.Queue() 21 | callsid_queue = asyncio.Queue() 22 | 23 | async with deepgram_connect() as deepgram_ws: 24 | 25 | async def deepgram_sender(deepgram_ws): 26 | print('deepgram_sender started') 27 | while True: 28 | chunk = await audio_queue.get() 29 | await deepgram_ws.send(chunk) 30 | 31 | async def deepgram_receiver(deepgram_ws): 32 | print('deepgram_receiver started') 33 | # we will wait until the twilio ws connection figures out the callsid 34 | # then we will initialize our subscribers list for this callsid 35 | callsid = await callsid_queue.get() 36 | subscribers[callsid] = [] 37 | # for each deepgram result received, forward it on to all 38 | # queues subscribed to the twilio callsid 39 | async for message in deepgram_ws: 40 | for client in subscribers[callsid]: 41 | client.put_nowait(message) 42 | 43 | # once the twilio call is over, tell all subscribed clients to close 44 | # and remove the subscriber list for this callsid 45 | for client in subscribers[callsid]: 46 | client.put_nowait('close') 47 | 48 | del subscribers[callsid] 49 | 50 | async def twilio_receiver(twilio_ws): 51 | print('twilio_receiver started') 52 | # twilio sends audio data as 160 byte messages containing 20ms of audio each 53 | # we will buffer 20 twilio messages corresponding to 0.4 seconds of audio to improve throughput performance 54 | BUFFER_SIZE = 20 * 160 55 | # the algorithm to deal with mixing the two channels is somewhat complex 56 | # here we implement an algorithm which fills in silence for channels if that channel is either 57 | # A) not currently streaming (e.g. the outbound channel when the inbound channel starts ringing it) 58 | # B) packets are dropped (this happens, and sometimes the timestamps which come back for subsequent packets are not aligned) 59 | inbuffer = bytearray(b'') 60 | outbuffer = bytearray(b'') 61 | inbound_chunks_started = False 62 | outbound_chunks_started = False 63 | latest_inbound_timestamp = 0 64 | latest_outbound_timestamp = 0 65 | async for message in twilio_ws: 66 | try: 67 | data = json.loads(message) 68 | if data['event'] == 'start': 69 | start = data['start'] 70 | callsid = start['callSid'] 71 | callsid_queue.put_nowait(callsid) 72 | if data['event'] == 'connected': 73 | continue 74 | if data['event'] == 'media': 75 | media = data['media'] 76 | chunk = base64.b64decode(media['payload']) 77 | if media['track'] == 'inbound': 78 | # fills in silence if there have been dropped packets 79 | if inbound_chunks_started: 80 | if latest_inbound_timestamp + 20 < int(media['timestamp']): 81 | bytes_to_fill = 8 * (int(media['timestamp']) - (latest_inbound_timestamp + 20)) 82 | # NOTE: 0xff is silence for mulaw audio 83 | # and there are 8 bytes per ms of data for our format (8 bit, 8000 Hz) 84 | inbuffer.extend(b'\xff' * bytes_to_fill) 85 | else: 86 | # make it known that inbound chunks have started arriving 87 | inbound_chunks_started = True 88 | latest_inbound_timestamp = int(media['timestamp']) 89 | # this basically sets the starting point for outbound timestamps 90 | latest_outbound_timestamp = int(media['timestamp']) - 20 91 | latest_inbound_timestamp = int(media['timestamp']) 92 | # extend the inbound audio buffer with data 93 | inbuffer.extend(chunk) 94 | if media['track'] == 'outbound': 95 | # make it known that outbound chunks have started arriving 96 | outbound_chunked_started = True 97 | # fills in silence if there have been dropped packets 98 | if latest_outbound_timestamp + 20 < int(media['timestamp']): 99 | bytes_to_fill = 8 * (int(media['timestamp']) - (latest_outbound_timestamp + 20)) 100 | # NOTE: 0xff is silence for mulaw audio 101 | # and there are 8 bytes per ms of data for our format (8 bit, 8000 Hz) 102 | outbuffer.extend(b'\xff' * bytes_to_fill) 103 | latest_outbound_timestamp = int(media['timestamp']) 104 | # extend the outbound audio buffer with data 105 | outbuffer.extend(chunk) 106 | if data['event'] == 'stop': 107 | break 108 | 109 | # check if our buffer is ready to send to our audio_queue (and, thus, then to deepgram) 110 | while len(inbuffer) >= BUFFER_SIZE and len(outbuffer) >= BUFFER_SIZE: 111 | asinbound = AudioSegment(inbuffer[:BUFFER_SIZE], sample_width=1, frame_rate=8000, channels=1) 112 | asoutbound = AudioSegment(outbuffer[:BUFFER_SIZE], sample_width=1, frame_rate=8000, channels=1) 113 | mixed = AudioSegment.from_mono_audiosegments(asinbound, asoutbound) 114 | 115 | # sending to deepgram via the audio_queue 116 | audio_queue.put_nowait(mixed.raw_data) 117 | 118 | # clearing buffers 119 | inbuffer = inbuffer[BUFFER_SIZE:] 120 | outbuffer = outbuffer[BUFFER_SIZE:] 121 | except: 122 | break 123 | 124 | # the async for loop will end if the ws connection from twilio dies 125 | # and if this happens, we should forward an empty byte to deepgram 126 | # to signal deepgram to send back remaining messages before closing 127 | audio_queue.put_nowait(b'') 128 | 129 | await asyncio.wait([ 130 | asyncio.ensure_future(deepgram_sender(deepgram_ws)), 131 | asyncio.ensure_future(deepgram_receiver(deepgram_ws)), 132 | asyncio.ensure_future(twilio_receiver(twilio_ws)) 133 | ]) 134 | 135 | await twilio_ws.close() 136 | 137 | async def client_handler(client_ws): 138 | client_queue = asyncio.Queue() 139 | 140 | # first tell the client all active calls 141 | await client_ws.send(json.dumps(list(subscribers.keys()))) 142 | 143 | # then recieve from the client which call they would like to subscribe to 144 | # and add our client's queue to the subscriber list for that call 145 | try: 146 | # you may want to parse a proper json input here 147 | # instead of grabbing the entire message as the callsid verbatim 148 | callsid = await client_ws.recv() 149 | callsid = callsid.strip() 150 | if callsid in subscribers: 151 | subscribers[callsid].append(client_queue) 152 | else: 153 | await client_ws.close() 154 | except: 155 | await client_ws.close() 156 | 157 | async def client_sender(client_ws): 158 | while True: 159 | message = await client_queue.get() 160 | if message == 'close': 161 | break 162 | try: 163 | await client_ws.send(message) 164 | except: 165 | # if there was an error, remove this client queue 166 | subscribers[callsid].remove(client_queue) 167 | break 168 | 169 | await asyncio.wait([ 170 | asyncio.ensure_future(client_sender(client_ws)), 171 | ]) 172 | 173 | await client_ws.close() 174 | 175 | async def router(websocket, path): 176 | if path == '/client': 177 | print('client connection incoming') 178 | await client_handler(websocket) 179 | elif path == '/twilio': 180 | print('twilio connection incoming') 181 | await twilio_handler(websocket) 182 | 183 | def main(): 184 | # use this if using ssl 185 | # ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) 186 | # ssl_context.load_cert_chain('cert.pem', 'key.pem') 187 | # server = websockets.serve(router, '0.0.0.0', 443, ssl=ssl_context) 188 | 189 | # use this if not using ssl 190 | server = websockets.serve(router, 'localhost', 5000) 191 | 192 | asyncio.get_event_loop().run_until_complete(server) 193 | asyncio.get_event_loop().run_forever() 194 | 195 | if __name__ == '__main__': 196 | sys.exit(main() or 0) 197 | --------------------------------------------------------------------------------