├── CBLogo.png
├── README.md
├── Screenshot 2023-05-31.png
└── msaicb_Final.py
/CBLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/d2a07e24e45b9f088aacc60f8459f376674eec53/CBLogo.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AzureStudioChatGPTVoiceBot
2 |
3 | https://youtu.be/IAQYzez7-k4
4 | #
5 | 
6 | # ChatGPT Turbo 3.5 Voice BOT with Azure Speech Studio
7 |
8 | Note: Be sure to add a config.py local file with your API_Keys to use the code
9 | #
10 | 
11 |
--------------------------------------------------------------------------------
/Screenshot 2023-05-31.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jjmlovesgit/AzureStudioChatGPTVoiceBot/d2a07e24e45b9f088aacc60f8459f376674eec53/Screenshot 2023-05-31.png
--------------------------------------------------------------------------------
/msaicb_Final.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | from streamlit_chat import message
3 | import os
4 | import tempfile
5 | import azure.cognitiveservices.speech as speechsdk
6 | import openai
7 | import config
8 | import winsound
9 | import textwrap
10 | from IPython.display import display, Audio
11 | import streamlit as st
12 | from IPython.display import Video
13 | from IPython.display import HTML
14 | import base64
15 |
16 | # Provide the path to your logo image file
17 | logo = "https://media.discordapp.net/attachments/995431274267279440/1108765311802556456/YellowArrow_PDFs_to_Custom_Chat_Bot_app_illustration_for_a_cove_e8cb1bda-8b00-48df-b8a3-7f41b7ea4d03.png"
18 |
19 |
20 | # Provide the path to your video file
21 | video_file_path = "C:/Projects/MS/Studio/VoAIce-main/VoAIce-main/MSCB.mp4"
22 |
23 | # Read the video file as bytes
24 | video_bytes = open(video_file_path, "rb").read()
25 |
26 | # Encode the video bytes as base64
27 | video_base64 = base64.b64encode(video_bytes).decode("utf-8")
28 |
29 | # Generate the HTML video tag with auto-play
30 | video_tag = f"""
34 | """
35 |
36 | # Display the video using Streamlit
37 | st.write(video_tag, unsafe_allow_html=True)
38 |
39 | # Apply CSS styling to center the video
40 | st.markdown(
41 | """
42 |
49 | """,
50 | unsafe_allow_html=True
51 | )
52 |
53 | st.sidebar.markdown(
54 | """
55 |
56 |

57 |
58 |
59 | """.format(logo),
60 | unsafe_allow_html=True,
61 | )
62 |
63 | # Define the robot emoji
64 | robot_emoji = "�"
65 |
66 | def transcribe_audio(speech_config):
67 | audio_config = speechsdk.AudioConfig(use_default_microphone=True)
68 | speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
69 |
70 | result = speech_recognizer.recognize_once_async().get()
71 | return result.text.strip()
72 |
73 | def generate_response(input_text, conversation_history):
74 | messages = [
75 | {"role": "system", "content": "You are a helpful assistant. Please respond to all input in 50 words or less."},
76 | ]
77 |
78 | messages.extend(conversation_history)
79 | messages.append({"role": "user", "content": input_text})
80 |
81 | response = openai.ChatCompletion.create(
82 | model="gpt-3.5-turbo",
83 | messages=messages,
84 | max_tokens=1000,
85 | n=1,
86 | stop=None,
87 | temperature=1.3,
88 | )
89 |
90 | return response['choices'][0]['message']['content']
91 |
92 | def synthesize_and_save_speech(speech_config, response_text, file_path):
93 | speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)
94 | result = speech_synthesizer.speak_text_async(response_text).get()
95 |
96 | with open(file_path, "wb") as f:
97 | f.write(result.audio_data)
98 |
99 | def play_audio(audio_file_path):
100 | display(Audio(audio_file_path))
101 |
102 | def remove_temp_files(file_path):
103 | os.remove(file_path)
104 |
105 | def main(stop_keyword="stop", exit_keyword="exit"):
106 | st.title("🤖 ChatGPT Voice Assistant Powered by Azure Speech Studio")
107 |
108 | # Define speech config
109 | azure_api_key = config.azure_api_key
110 | azure_region = config.azure_region
111 | voice = "en-US-ChristopherNeural"
112 | speech_config = speechsdk.SpeechConfig(subscription=azure_api_key, region=azure_region)
113 | speech_config.speech_synthesis_voice_name = voice
114 | openai.api_key = config.openai_api_key
115 |
116 | conversation_history = []
117 |
118 | # Increase font size
119 | st.markdown("", unsafe_allow_html=True)
120 |
121 | # Sidebar button to start the program
122 | st.sidebar.write("Press the 'Start Button' and ask me a question and I will respond...") # Instruction section
123 | if st.sidebar.button("Start Program"):
124 | # st.sidebar.write("Please ask me a question and I will respond...") # Instruction section
125 | st.sidebar.write("Note: You can start your question over by saying 'Stop' during question input...") # Instruction section
126 | st.sidebar.write("You can end the chat session by saying 'Exit'") # Instruction section
127 | while True:
128 | st.text(robot_emoji + " Listening...")
129 | winsound.Beep(800, 200) # Play a beep sound when ready for input
130 |
131 | input_text = transcribe_audio(speech_config)
132 | wrapped_input = textwrap.fill(input_text, width=90)
133 | indented_input = "\n".join(["" + line + "
" for line in wrapped_input.splitlines()])
134 |
135 | st.markdown(f""
136 | f"
"
137 | f"{indented_input}
"
138 | f"
",
139 | unsafe_allow_html=True)
140 |
141 | if stop_keyword.lower() in input_text.lower():
142 | st.text("Restarting prompt...")
143 | conversation_history = []
144 | continue
145 |
146 | if exit_keyword.lower() in input_text.lower():
147 | st.markdown(f""
148 | f"Goodbye for now...
",
149 | unsafe_allow_html=True)
150 | break
151 |
152 | response_text = generate_response(input_text, conversation_history)
153 | wrapped_response = textwrap.fill(response_text, width=70)
154 | indented_response = "\n".join(["" + line + "
" for line in wrapped_response.splitlines()])
155 |
156 | st.markdown(f""
157 | f"{indented_response}
",
158 | unsafe_allow_html=True)
159 |
160 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
161 | audio_file_path = f.name
162 |
163 | try:
164 | synthesize_and_save_speech(speech_config, response_text, audio_file_path)
165 | play_audio(audio_file_path)
166 | remove_temp_files(audio_file_path)
167 | except Exception as e:
168 | st.error(f"Error: Failed to generate or play audio - {e}")
169 |
170 | conversation_history.append({"role": "user", "content": input_text})
171 | conversation_history.append({"role": "assistant", "content": response_text})
172 |
173 | if __name__ == "__main__":
174 | main(stop_keyword="stop", exit_keyword="exit")
175 |
--------------------------------------------------------------------------------