├── README.md
└── twilio.py


/README.md:
--------------------------------------------------------------------------------
1 | # Deepgram <-> Twilio live-streaming Python demo
2 | 
3 | This code sample enables you to create an integration between Deepgram and Twilio that allows multiple client subscribers to watch live transcripts from ongoing Twilio calls.
4 | 
5 | For full instructions, follow the accompanying tutorial [Transcribing Twilio Voice Calls in Real-Time with Deepgram](https://deepgram.com/learn/deepgram-twilio-streaming).
6 | 
7 | ![image](https://github.com/deepgram-devs/deepgram-twilio-streaming-python/assets/135150417/bd98f2bc-4e4f-4eba-9fbe-fa5bb4718d57)
8 | 


--------------------------------------------------------------------------------
/twilio.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import base64
  3 | import json
  4 | import sys
  5 | import websockets
  6 | import ssl
  7 | from pydub import AudioSegment
  8 | 
  9 | subscribers = {}
 10 | 
 11 | def deepgram_connect():
 12 | 	extra_headers = {
 13 | 		'Authorization': 'Token INSERT_YOUR_DEEPGRAM_API_KEY'
 14 | 	}
 15 | 	deepgram_ws = websockets.connect('wss://api.deepgram.com/v1/listen?encoding=mulaw&sample_rate=8000&channels=2&multichannel=true', extra_headers = extra_headers)
 16 | 
 17 | 	return deepgram_ws
 18 | 
 19 | async def twilio_handler(twilio_ws):
 20 | 	audio_queue = asyncio.Queue()
 21 | 	callsid_queue = asyncio.Queue()
 22 | 
 23 | 	async with deepgram_connect() as deepgram_ws:
 24 | 
 25 | 		async def deepgram_sender(deepgram_ws):
 26 | 			print('deepgram_sender started')
 27 | 			while True:
 28 | 				chunk = await audio_queue.get()
 29 | 				await deepgram_ws.send(chunk)
 30 | 
 31 | 		async def deepgram_receiver(deepgram_ws):
 32 | 			print('deepgram_receiver started')
 33 | 			# we will wait until the twilio ws connection figures out the callsid
 34 | 			# then we will initialize our subscribers list for this callsid
 35 | 			callsid = await callsid_queue.get()
 36 | 			subscribers[callsid] = []
 37 | 			# for each deepgram result received, forward it on to all
 38 | 			# queues subscribed to the twilio callsid
 39 | 			async for message in deepgram_ws:
 40 | 				for client in subscribers[callsid]:
 41 | 					client.put_nowait(message)
 42 | 
 43 | 			# once the twilio call is over, tell all subscribed clients to close
 44 | 			# and remove the subscriber list for this callsid
 45 | 			for client in subscribers[callsid]:
 46 | 				client.put_nowait('close')
 47 | 
 48 | 			del subscribers[callsid]
 49 | 
 50 | 		async def twilio_receiver(twilio_ws):
 51 | 			print('twilio_receiver started')
 52 | 			# twilio sends audio data as 160 byte messages containing 20ms of audio each
 53 | 			# we will buffer 20 twilio messages corresponding to 0.4 seconds of audio to improve throughput performance
 54 | 			BUFFER_SIZE = 20 * 160
 55 | 			# the algorithm to deal with mixing the two channels is somewhat complex
 56 | 			# here we implement an algorithm which fills in silence for channels if that channel is either
 57 | 			#   A) not currently streaming (e.g. the outbound channel when the inbound channel starts ringing it)
 58 | 			#   B) packets are dropped (this happens, and sometimes the timestamps which come back for subsequent packets are not aligned)
 59 | 			inbuffer = bytearray(b'')
 60 | 			outbuffer = bytearray(b'')
 61 | 			inbound_chunks_started = False
 62 | 			outbound_chunks_started = False
 63 | 			latest_inbound_timestamp = 0
 64 | 			latest_outbound_timestamp = 0
 65 | 			async for message in twilio_ws:
 66 | 				try:
 67 | 					data = json.loads(message)
 68 | 					if data['event'] == 'start':
 69 | 						start = data['start']
 70 | 						callsid = start['callSid']
 71 | 						callsid_queue.put_nowait(callsid)
 72 | 					if data['event'] == 'connected':
 73 | 						continue
 74 | 					if data['event'] == 'media':
 75 | 						media = data['media']
 76 | 						chunk = base64.b64decode(media['payload'])
 77 | 						if media['track'] == 'inbound':
 78 | 							# fills in silence if there have been dropped packets
 79 | 							if inbound_chunks_started:
 80 | 								if latest_inbound_timestamp + 20 < int(media['timestamp']):
 81 | 									bytes_to_fill = 8 * (int(media['timestamp']) - (latest_inbound_timestamp + 20))
 82 | 									# NOTE: 0xff is silence for mulaw audio
 83 | 									# and there are 8 bytes per ms of data for our format (8 bit, 8000 Hz)
 84 | 									inbuffer.extend(b'\xff' * bytes_to_fill)
 85 | 							else:
 86 | 								# make it known that inbound chunks have started arriving
 87 | 								inbound_chunks_started = True
 88 | 								latest_inbound_timestamp = int(media['timestamp'])
 89 | 								# this basically sets the starting point for outbound timestamps
 90 | 								latest_outbound_timestamp = int(media['timestamp']) - 20
 91 | 							latest_inbound_timestamp = int(media['timestamp'])
 92 | 							# extend the inbound audio buffer with data
 93 | 							inbuffer.extend(chunk)
 94 | 						if media['track'] == 'outbound':
 95 | 							# make it known that outbound chunks have started arriving
 96 | 							outbound_chunked_started = True
 97 | 							# fills in silence if there have been dropped packets
 98 | 							if latest_outbound_timestamp + 20 < int(media['timestamp']):
 99 | 								bytes_to_fill = 8 * (int(media['timestamp']) - (latest_outbound_timestamp + 20))
100 | 								# NOTE: 0xff is silence for mulaw audio
101 | 								# and there are 8 bytes per ms of data for our format (8 bit, 8000 Hz)
102 | 								outbuffer.extend(b'\xff' * bytes_to_fill)
103 | 							latest_outbound_timestamp = int(media['timestamp'])
104 | 							# extend the outbound audio buffer with data
105 | 							outbuffer.extend(chunk)
106 | 					if data['event'] == 'stop':
107 | 						break
108 | 
109 | 					# check if our buffer is ready to send to our audio_queue (and, thus, then to deepgram)
110 | 					while len(inbuffer) >= BUFFER_SIZE and len(outbuffer) >= BUFFER_SIZE:
111 | 						asinbound = AudioSegment(inbuffer[:BUFFER_SIZE], sample_width=1, frame_rate=8000, channels=1)
112 | 						asoutbound = AudioSegment(outbuffer[:BUFFER_SIZE], sample_width=1, frame_rate=8000, channels=1)
113 | 						mixed = AudioSegment.from_mono_audiosegments(asinbound, asoutbound)
114 | 
115 | 						# sending to deepgram via the audio_queue
116 | 						audio_queue.put_nowait(mixed.raw_data)
117 | 
118 | 						# clearing buffers
119 | 						inbuffer = inbuffer[BUFFER_SIZE:]
120 | 						outbuffer = outbuffer[BUFFER_SIZE:]
121 | 				except:
122 | 					break
123 | 
124 | 			# the async for loop will end if the ws connection from twilio dies
125 | 			# and if this happens, we should forward an empty byte to deepgram
126 | 			# to signal deepgram to send back remaining messages before closing
127 | 			audio_queue.put_nowait(b'')
128 | 
129 | 		await asyncio.wait([
130 | 			asyncio.ensure_future(deepgram_sender(deepgram_ws)),
131 | 			asyncio.ensure_future(deepgram_receiver(deepgram_ws)),
132 | 			asyncio.ensure_future(twilio_receiver(twilio_ws))
133 | 		])
134 | 
135 | 		await twilio_ws.close()
136 | 
137 | async def client_handler(client_ws):
138 | 	client_queue = asyncio.Queue()
139 | 
140 | 	# first tell the client all active calls
141 | 	await client_ws.send(json.dumps(list(subscribers.keys())))
142 | 
143 | 	# then recieve from the client which call they would like to subscribe to
144 | 	# and add our client's queue to the subscriber list for that call
145 | 	try:
146 | 		# you may want to parse a proper json input here
147 | 		# instead of grabbing the entire message as the callsid verbatim
148 | 		callsid = await client_ws.recv()
149 | 		callsid = callsid.strip()
150 | 		if callsid in subscribers:
151 | 			subscribers[callsid].append(client_queue)
152 | 		else:
153 | 			await client_ws.close()
154 | 	except:
155 | 		await client_ws.close()
156 | 
157 | 	async def client_sender(client_ws):
158 | 		while True:
159 | 			message = await client_queue.get()
160 | 			if message == 'close':
161 | 				break
162 | 			try:
163 | 				await client_ws.send(message)
164 | 			except:
165 | 				# if there was an error, remove this client queue
166 | 				subscribers[callsid].remove(client_queue)
167 | 				break
168 | 
169 | 	await asyncio.wait([
170 | 		asyncio.ensure_future(client_sender(client_ws)),
171 | 	])
172 | 
173 | 	await client_ws.close()
174 | 
175 | async def router(websocket, path):
176 | 	if path == '/client':
177 | 		print('client connection incoming')
178 | 		await client_handler(websocket)
179 | 	elif path == '/twilio':
180 | 		print('twilio connection incoming')
181 | 		await twilio_handler(websocket)
182 | 
183 | def main():
184 | 	# use this if using ssl
185 | #	ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
186 | #	ssl_context.load_cert_chain('cert.pem', 'key.pem')
187 | #	server = websockets.serve(router, '0.0.0.0', 443, ssl=ssl_context)
188 | 
189 | 	# use this if not using ssl
190 | 	server = websockets.serve(router, 'localhost', 5000)
191 | 
192 | 	asyncio.get_event_loop().run_until_complete(server)
193 | 	asyncio.get_event_loop().run_forever()
194 | 
195 | if __name__ == '__main__':
196 | 	sys.exit(main() or 0)
197 | 


--------------------------------------------------------------------------------