├── LICENSE ├── README.md └── app.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Smitha Kolan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AssemblyAI-AI-Voice-Bot 2 | 3 | 🔑 **[Sign up for AssemblyAI's API](https://www.assemblyai.com/?utm_source=github&utm_medium=referral&utm_campaign=smitha)** to get a free API key and $50 in credits. 4 | 5 | Build your very own AI-powered dental assistant! This project walks you through creating a real-time AI voice bot using Python, AssemblyAI, and ElevenLabs. The bot is designed to handle incoming calls, transcribe speech, generate intelligent responses, and provide a human-like conversational experience. Ideal for call centers, customer support, and virtual receptionist applications. 6 | 7 | ## Features: 8 | - **Real-Time Transcription:** Leveraging AssemblyAI's Speech-to-Text API for accurate real-time transcription. 9 | - **Natural Language Processing:** Using OpenAI's language models to generate context-aware responses. 10 | - **AI Voice Synthesis:** Implementing ElevenLabs' voice synthesis to convert text responses into natural-sounding audio. 11 | 12 | ## How to Use: 13 | Follow the step-by-step guide in the repo to set up your environment, integrate the APIs, and run the AI voice assistant. 14 | 15 | [![IMAGE ALT TEXT HERE](https://img.youtube.com/vi/Nyo5m_glZXs/0.jpg)](https://www.youtube.com/watch?v=Nyo5m_glZXs) 16 | 17 | 18 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | ''' 2 | +-------------------+ +-----------------------+ +------------------+ +------------------------+ 3 | | Step 1: Install | | Step 2: Real-Time | | Step 3: Pass | | Step 4: Live Audio | 4 | | Python Libraries| | Transcription with | | Real-Time | | Stream from ElevenLabs| 5 | +-------------------+ | AssemblyAI | | Transcript to | | | 6 | | | +-----------------------+ | OpenAI | +------------------------+ 7 | | - assemblyai | | +------------------+ | 8 | | - openai | | | | 9 | | - elevenlabs | v v v 10 | | - mpv | +-----------------------+ +------------------+ +------------------------+ 11 | | - portaudio | | | | | | | 12 | +-------------------+ | AssemblyAI performs |--------> OpenAI generates|--------> ElevenLabs streams | 13 | | real-time speech-to- | | response based | | response as live | 14 | | text transcription | | on transcription| | audio to the user | 15 | | | | | | | 16 | +-----------------------+ +------------------+ +------------------------+ 17 | 18 | ###### Step 1: Install Python libraries ###### 19 | 20 | brew install portaudio 21 | pip install "assemblyai[extras]" 22 | pip install elevenlabs==0.3.0b0 23 | brew install mpv 24 | pip install --upgrade openai 25 | ''' 26 | 27 | import assemblyai as aai 28 | from elevenlabs import generate, stream 29 | from openai import OpenAI 30 | 31 | class AI_Assistant: 32 | def __init__(self): 33 | aai.settings.api_key = "ASSEMBLYAI-API-KEY" 34 | self.openai_client = OpenAI(api_key = "OPENAI-API-KEY") 35 | self.elevenlabs_api_key = "ELEVENLABS-API-KEY" 36 | 37 | self.transcriber = None 38 | 39 | # Prompt 40 | self.full_transcript = [ 41 | {"role":"system", "content":"You are a receptionist at a dental clinic. Be resourceful and efficient."}, 42 | ] 43 | 44 | ###### Step 2: Real-Time Transcription with AssemblyAI ###### 45 | 46 | def start_transcription(self): 47 | self.transcriber = aai.RealtimeTranscriber( 48 | sample_rate = 16000, 49 | on_data = self.on_data, 50 | on_error = self.on_error, 51 | on_open = self.on_open, 52 | on_close = self.on_close, 53 | end_utterance_silence_threshold = 1000 54 | ) 55 | 56 | self.transcriber.connect() 57 | microphone_stream = aai.extras.MicrophoneStream(sample_rate =16000) 58 | self.transcriber.stream(microphone_stream) 59 | 60 | def stop_transcription(self): 61 | if self.transcriber: 62 | self.transcriber.close() 63 | self.transcriber = None 64 | 65 | def on_open(self, session_opened: aai.RealtimeSessionOpened): 66 | print("Session ID:", session_opened.session_id) 67 | return 68 | 69 | 70 | def on_data(self, transcript: aai.RealtimeTranscript): 71 | if not transcript.text: 72 | return 73 | 74 | if isinstance(transcript, aai.RealtimeFinalTranscript): 75 | self.generate_ai_response(transcript) 76 | else: 77 | print(transcript.text, end="\r") 78 | 79 | 80 | def on_error(self, error: aai.RealtimeError): 81 | print("An error occured:", error) 82 | return 83 | 84 | 85 | def on_close(self): 86 | #print("Closing Session") 87 | return 88 | 89 | ###### Step 3: Pass real-time transcript to OpenAI ###### 90 | 91 | def generate_ai_response(self, transcript): 92 | 93 | self.stop_transcription() 94 | 95 | self.full_transcript.append({"role":"user", "content": transcript.text}) 96 | print(f"\nPatient: {transcript.text}", end="\r\n") 97 | 98 | response = self.openai_client.chat.completions.create( 99 | model = "gpt-3.5-turbo", 100 | messages = self.full_transcript 101 | ) 102 | 103 | ai_response = response.choices[0].message.content 104 | 105 | self.generate_audio(ai_response) 106 | 107 | self.start_transcription() 108 | print(f"\nReal-time transcription: ", end="\r\n") 109 | 110 | 111 | ###### Step 4: Generate audio with ElevenLabs ###### 112 | 113 | def generate_audio(self, text): 114 | 115 | self.full_transcript.append({"role":"assistant", "content": text}) 116 | print(f"\nAI Receptionist: {text}") 117 | 118 | audio_stream = generate( 119 | api_key = self.elevenlabs_api_key, 120 | text = text, 121 | voice = "Rachel", 122 | stream = True 123 | ) 124 | 125 | stream(audio_stream) 126 | 127 | greeting = "Thank you for calling Vancouver dental clinic. My name is Sandy, how may I assist you?" 128 | ai_assistant = AI_Assistant() 129 | ai_assistant.generate_audio(greeting) 130 | ai_assistant.start_transcription() 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | --------------------------------------------------------------------------------