├── CBLogo.png ├── README.md ├── Screenshot 2023-05-31.png └── msaicb_Final.py /CBLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/d2a07e24e45b9f088aacc60f8459f376674eec53/CBLogo.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AzureStudioChatGPTVoiceBot 2 | 3 | https://youtu.be/IAQYzez7-k4 4 | # 5 | ![image](https://github.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/assets/47751509/ff4548f8-bf68-41d3-8def-403c1cde1c49) 6 | # ChatGPT Turbo 3.5 Voice BOT with Azure Speech Studio 7 | 8 | Note: Be sure to add a config.py local file with your API_Keys to use the code 9 | # 10 | ![Flow Chart](https://github.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/assets/47751509/a6ccd132-928b-4ebd-864b-557493ce2ee2) 11 | -------------------------------------------------------------------------------- /Screenshot 2023-05-31.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/d2a07e24e45b9f088aacc60f8459f376674eec53/Screenshot 2023-05-31.png -------------------------------------------------------------------------------- /msaicb_Final.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from streamlit_chat import message 3 | import os 4 | import tempfile 5 | import azure.cognitiveservices.speech as speechsdk 6 | import openai 7 | import config 8 | import winsound 9 | import textwrap 10 | from IPython.display import display, Audio 11 | import streamlit as st 12 | from IPython.display import Video 13 | from IPython.display import HTML 14 | import base64 15 | 16 | # Provide the path to your logo image file 17 | logo = "https://media.discordapp.net/attachments/995431274267279440/1108765311802556456/YellowArrow_PDFs_to_Custom_Chat_Bot_app_illustration_for_a_cove_e8cb1bda-8b00-48df-b8a3-7f41b7ea4d03.png" 18 | 19 | 20 | # Provide the path to your video file 21 | video_file_path = "C:/Projects/MS/Studio/VoAIce-main/VoAIce-main/MSCB.mp4" 22 | 23 | # Read the video file as bytes 24 | video_bytes = open(video_file_path, "rb").read() 25 | 26 | # Encode the video bytes as base64 27 | video_base64 = base64.b64encode(video_bytes).decode("utf-8") 28 | 29 | # Generate the HTML video tag with auto-play 30 | video_tag = f""" 34 | """ 35 | 36 | # Display the video using Streamlit 37 | st.write(video_tag, unsafe_allow_html=True) 38 | 39 | # Apply CSS styling to center the video 40 | st.markdown( 41 | """ 42 | 49 | """, 50 | unsafe_allow_html=True 51 | ) 52 | 53 | st.sidebar.markdown( 54 | """ 55 |
56 | 57 |
58 |
59 | """.format(logo), 60 | unsafe_allow_html=True, 61 | ) 62 | 63 | # Define the robot emoji 64 | robot_emoji = "�" 65 | 66 | def transcribe_audio(speech_config): 67 | audio_config = speechsdk.AudioConfig(use_default_microphone=True) 68 | speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) 69 | 70 | result = speech_recognizer.recognize_once_async().get() 71 | return result.text.strip() 72 | 73 | def generate_response(input_text, conversation_history): 74 | messages = [ 75 | {"role": "system", "content": "You are a helpful assistant. Please respond to all input in 50 words or less."}, 76 | ] 77 | 78 | messages.extend(conversation_history) 79 | messages.append({"role": "user", "content": input_text}) 80 | 81 | response = openai.ChatCompletion.create( 82 | model="gpt-3.5-turbo", 83 | messages=messages, 84 | max_tokens=1000, 85 | n=1, 86 | stop=None, 87 | temperature=1.3, 88 | ) 89 | 90 | return response['choices'][0]['message']['content'] 91 | 92 | def synthesize_and_save_speech(speech_config, response_text, file_path): 93 | speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config) 94 | result = speech_synthesizer.speak_text_async(response_text).get() 95 | 96 | with open(file_path, "wb") as f: 97 | f.write(result.audio_data) 98 | 99 | def play_audio(audio_file_path): 100 | display(Audio(audio_file_path)) 101 | 102 | def remove_temp_files(file_path): 103 | os.remove(file_path) 104 | 105 | def main(stop_keyword="stop", exit_keyword="exit"): 106 | st.title("🤖 ChatGPT Voice Assistant Powered by Azure Speech Studio") 107 | 108 | # Define speech config 109 | azure_api_key = config.azure_api_key 110 | azure_region = config.azure_region 111 | voice = "en-US-ChristopherNeural" 112 | speech_config = speechsdk.SpeechConfig(subscription=azure_api_key, region=azure_region) 113 | speech_config.speech_synthesis_voice_name = voice 114 | openai.api_key = config.openai_api_key 115 | 116 | conversation_history = [] 117 | 118 | # Increase font size 119 | st.markdown("", unsafe_allow_html=True) 120 | 121 | # Sidebar button to start the program 122 | st.sidebar.write("Press the 'Start Button' and ask me a question and I will respond...") # Instruction section 123 | if st.sidebar.button("Start Program"): 124 | # st.sidebar.write("Please ask me a question and I will respond...") # Instruction section 125 | st.sidebar.write("Note: You can start your question over by saying 'Stop' during question input...") # Instruction section 126 | st.sidebar.write("You can end the chat session by saying 'Exit'") # Instruction section 127 | while True: 128 | st.text(robot_emoji + " Listening...") 129 | winsound.Beep(800, 200) # Play a beep sound when ready for input 130 | 131 | input_text = transcribe_audio(speech_config) 132 | wrapped_input = textwrap.fill(input_text, width=90) 133 | indented_input = "\n".join(["
" + line + "
" for line in wrapped_input.splitlines()]) 134 | 135 | st.markdown(f"
" 136 | f"
" 137 | f"{indented_input}
" 138 | f"
", 139 | unsafe_allow_html=True) 140 | 141 | if stop_keyword.lower() in input_text.lower(): 142 | st.text("Restarting prompt...") 143 | conversation_history = [] 144 | continue 145 | 146 | if exit_keyword.lower() in input_text.lower(): 147 | st.markdown(f"
" 148 | f"Goodbye for now...
", 149 | unsafe_allow_html=True) 150 | break 151 | 152 | response_text = generate_response(input_text, conversation_history) 153 | wrapped_response = textwrap.fill(response_text, width=70) 154 | indented_response = "\n".join(["
" + line + "
" for line in wrapped_response.splitlines()]) 155 | 156 | st.markdown(f"
" 157 | f"{indented_response}
", 158 | unsafe_allow_html=True) 159 | 160 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: 161 | audio_file_path = f.name 162 | 163 | try: 164 | synthesize_and_save_speech(speech_config, response_text, audio_file_path) 165 | play_audio(audio_file_path) 166 | remove_temp_files(audio_file_path) 167 | except Exception as e: 168 | st.error(f"Error: Failed to generate or play audio - {e}") 169 | 170 | conversation_history.append({"role": "user", "content": input_text}) 171 | conversation_history.append({"role": "assistant", "content": response_text}) 172 | 173 | if __name__ == "__main__": 174 | main(stop_keyword="stop", exit_keyword="exit") 175 | --------------------------------------------------------------------------------