├── audios
└── tempfile.mp3
├── images
└── Masahiro.png
├── videos
├── Masahiro.mp4
└── tempfile.mp4
├── requirements.txt
├── README.md
├── .gitattributes
├── azure_utils.py
├── polly_utils.py
└── app.py
/audios/tempfile.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/alitrack/Chat-GPT-LangChain/HEAD/audios/tempfile.mp3
--------------------------------------------------------------------------------
/images/Masahiro.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:215bfaa1bdb0ee4852988b29d480e2d1c2d9669eaa907ba25cc2d3dfa6ebfa4e
3 | size 4392529
4 |
--------------------------------------------------------------------------------
/videos/Masahiro.mp4:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ca886517414fae8bcea5a5130ae1d01ef3ead7aed437203abebf032217fd0be6
3 | size 2425212
4 |
--------------------------------------------------------------------------------
/videos/tempfile.mp4:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:4579f43ce3f39906372a8f52b0858510321807df2ed98d4a539a991986ab0cc7
3 | size 103305
4 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai==0.27.0
2 | gradio==3.19.1
3 | google-search-results
4 | google-api-python-client==2.80.0
5 | wolframalpha
6 | langchain==0.0.98
7 | requests==2.28.2
8 | git+https://github.com/openai/whisper.git
9 | boto3==1.26.82
10 | faiss-cpu
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: GPT+WolframAlpha+Whisper
3 | emoji: 👀
4 | colorFrom: red
5 | colorTo: gray
6 | sdk: gradio
7 | sdk_version: 3.16.1
8 | app_file: app.py
9 | pinned: false
10 | license: apache-2.0
11 | ---
12 |
13 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.7z filter=lfs diff=lfs merge=lfs -text
2 | *.arrow filter=lfs diff=lfs merge=lfs -text
3 | *.bin filter=lfs diff=lfs merge=lfs -text
4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
6 | *.ftz filter=lfs diff=lfs merge=lfs -text
7 | *.gz filter=lfs diff=lfs merge=lfs -text
8 | *.h5 filter=lfs diff=lfs merge=lfs -text
9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tflite filter=lfs diff=lfs merge=lfs -text
29 | *.tgz filter=lfs diff=lfs merge=lfs -text
30 | *.wasm filter=lfs diff=lfs merge=lfs -text
31 | *.xz filter=lfs diff=lfs merge=lfs -text
32 | *.zip filter=lfs diff=lfs merge=lfs -text
33 | *.zst filter=lfs diff=lfs merge=lfs -text
34 | *tfevents* filter=lfs diff=lfs merge=lfs -text
35 | *.mp4 filter=lfs diff=lfs merge=lfs -text
36 | *.png filter=lfs diff=lfs merge=lfs -text
37 |
--------------------------------------------------------------------------------
/azure_utils.py:
--------------------------------------------------------------------------------
1 | # This class stores Azure voice data. Specifically, the class stores several records containing
2 | # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3 | # voice_id, lang_code and engine given a language and gender.
4 |
5 | NEURAL_ENGINE = "neural"
6 | STANDARD_ENGINE = "standard"
7 |
8 |
9 | class AzureVoiceData:
10 | def get_voice(self, language, gender):
11 | for voice in self.voice_data:
12 | if voice['language'] == language and voice['gender'] == gender:
13 | return voice['azure_voice']
14 | return None
15 |
16 | def __init__(self):
17 | self.voice_data = [
18 | {'language': 'Arabic',
19 | 'azure_voice': 'ar-EG-ShakirNeural',
20 | 'gender': 'Male'},
21 | {'language': 'Arabic (Gulf)',
22 | 'azure_voice': 'ar-KW-FahedNeural',
23 | 'gender': 'Male'},
24 | {'language': 'Catalan',
25 | 'azure_voice': 'ca-ES-EnricNeural',
26 | 'gender': 'Male'},
27 | {'language': 'Chinese (Cantonese)',
28 | 'azure_voice': 'yue-CN-YunSongNeural',
29 | 'gender': 'Male'},
30 | {'language': 'Chinese (Mandarin)',
31 | 'azure_voice': 'zh-CN-YunxiNeural',
32 | 'gender': 'Male'},
33 | {'language': 'Danish',
34 | 'azure_voice': 'da-DK-JeppeNeural',
35 | 'gender': 'Male'},
36 | {'language': 'Dutch',
37 | 'azure_voice': 'nl-NL-MaartenNeural',
38 | 'gender': 'Male'},
39 | {'language': 'English (Australian)',
40 | 'azure_voice': 'en-AU-KenNeural',
41 | 'gender': 'Male'},
42 | {'language': 'English (British)',
43 | 'azure_voice': 'en-GB-RyanNeural',
44 | 'gender': 'Male'},
45 | {'language': 'English (Indian)',
46 | 'azure_voice': 'en-IN-PrabhatNeural',
47 | 'gender': 'Male'},
48 | {'language': 'English (New Zealand)',
49 | 'azure_voice': 'en-NZ-MitchellNeural',
50 | 'gender': 'Male'},
51 | {'language': 'English (South African)',
52 | 'azure_voice': 'en-ZA-LukeNeural',
53 | 'gender': 'Male'},
54 | {'language': 'English (US)',
55 | 'azure_voice': 'en-US-ChristopherNeural',
56 | 'gender': 'Male'},
57 | {'language': 'English (Welsh)',
58 | 'azure_voice': 'cy-GB-AledNeural',
59 | 'gender': 'Male'},
60 | {'language': 'Finnish',
61 | 'azure_voice': 'fi-FI-HarriNeural',
62 | 'gender': 'Male'},
63 | {'language': 'French',
64 | 'azure_voice': 'fr-FR-HenriNeural',
65 | 'gender': 'Male'},
66 | {'language': 'French (Canadian)',
67 | 'azure_voice': 'fr-CA-AntoineNeural',
68 | 'gender': 'Male'},
69 | {'language': 'German',
70 | 'azure_voice': 'de-DE-KlausNeural',
71 | 'gender': 'Male'},
72 | {'language': 'German (Austrian)',
73 | 'azure_voice': 'de-AT-JonasNeural',
74 | 'gender': 'Male'},
75 | {'language': 'Hindi',
76 | 'azure_voice': 'hi-IN-MadhurNeural',
77 | 'gender': 'Male'},
78 | {'language': 'Icelandic',
79 | 'azure_voice': 'is-IS-GunnarNeural',
80 | 'gender': 'Male'},
81 | {'language': 'Italian',
82 | 'azure_voice': 'it-IT-GianniNeural',
83 | 'gender': 'Male'},
84 | {'language': 'Japanese',
85 | 'azure_voice': 'ja-JP-KeitaNeural',
86 | 'gender': 'Male'},
87 | {'language': 'Korean',
88 | 'azure_voice': 'ko-KR-GookMinNeural',
89 | 'gender': 'Male'},
90 | {'language': 'Norwegian',
91 | 'azure_voice': 'nb-NO-FinnNeural',
92 | 'gender': 'Male'},
93 | {'language': 'Polish',
94 | 'azure_voice': 'pl-PL-MarekNeural',
95 | 'gender': 'Male'},
96 | {'language': 'Portuguese (Brazilian)',
97 | 'azure_voice': 'pt-BR-NicolauNeural',
98 | 'gender': 'Male'},
99 | {'language': 'Portuguese (European)',
100 | 'azure_voice': 'pt-PT-DuarteNeural',
101 | 'gender': 'Male'},
102 | {'language': 'Romanian',
103 | 'azure_voice': 'ro-RO-EmilNeural',
104 | 'gender': 'Male'},
105 | {'language': 'Russian',
106 | 'azure_voice': 'ru-RU-DmitryNeural',
107 | 'gender': 'Male'},
108 | {'language': 'Spanish (European)',
109 | 'azure_voice': 'es-ES-TeoNeural',
110 | 'gender': 'Male'},
111 | {'language': 'Spanish (Mexican)',
112 | 'azure_voice': 'es-MX-LibertoNeural',
113 | 'gender': 'Male'},
114 | {'language': 'Spanish (US)',
115 | 'azure_voice': 'es-US-AlonsoNeural"',
116 | 'gender': 'Male'},
117 | {'language': 'Swedish',
118 | 'azure_voice': 'sv-SE-MattiasNeural',
119 | 'gender': 'Male'},
120 | {'language': 'Turkish',
121 | 'azure_voice': 'tr-TR-AhmetNeural',
122 | 'gender': 'Male'},
123 | {'language': 'Welsh',
124 | 'azure_voice': 'cy-GB-AledNeural',
125 | 'gender': 'Male'},
126 | ]
127 |
128 |
129 | # Run from the command-line
130 | if __name__ == '__main__':
131 | azure_voice_data = AzureVoiceData()
132 |
133 | azure_voice = azure_voice_data.get_voice('English (US)', 'Male')
134 | print('English (US)', 'Male', azure_voice)
135 |
136 | azure_voice = azure_voice_data.get_voice('English (US)', 'Female')
137 | print('English (US)', 'Female', azure_voice)
138 |
139 | azure_voice = azure_voice_data.get_voice('French', 'Female')
140 | print('French', 'Female', azure_voice)
141 |
142 | azure_voice = azure_voice_data.get_voice('French', 'Male')
143 | print('French', 'Male', azure_voice)
144 |
145 | azure_voice = azure_voice_data.get_voice('Japanese', 'Female')
146 | print('Japanese', 'Female', azure_voice)
147 |
148 | azure_voice = azure_voice_data.get_voice('Japanese', 'Male')
149 | print('Japanese', 'Male', azure_voice)
150 |
151 | azure_voice = azure_voice_data.get_voice('Hindi', 'Female')
152 | print('Hindi', 'Female', azure_voice)
153 |
154 | azure_voice = azure_voice_data.get_voice('Hindi', 'Male')
155 | print('Hindi', 'Male', azure_voice)
156 |
--------------------------------------------------------------------------------
/polly_utils.py:
--------------------------------------------------------------------------------
1 | # This class stores Polly voice data. Specifically, the class stores several records containing
2 | # language, lang_code, gender, voice_id and engine. The class also has a method to return the
3 | # voice_id, lang_code and engine given a language and gender.
4 |
5 | NEURAL_ENGINE = "neural"
6 | STANDARD_ENGINE = "standard"
7 |
8 |
9 | class PollyVoiceData:
10 | def get_voice(self, language, gender):
11 | for voice in self.voice_data:
12 | if voice['language'] == language and voice['gender'] == gender:
13 | if voice['neural'] == 'Yes':
14 | return voice['voice_id'], voice['lang_code'], NEURAL_ENGINE
15 | for voice in self.voice_data:
16 | if voice['language'] == language and voice['gender'] == gender:
17 | if voice['standard'] == 'Yes':
18 | return voice['voice_id'], voice['lang_code'], STANDARD_ENGINE
19 | return None, None, None
20 |
21 | def get_whisper_lang_code(self, language):
22 | for voice in self.voice_data:
23 | if voice['language'] == language:
24 | return voice['whisper_lang_code']
25 | return "en"
26 |
27 | def __init__(self):
28 | self.voice_data = [
29 | {'language': 'Arabic',
30 | 'lang_code': 'arb',
31 | 'whisper_lang_code': 'ar',
32 | 'voice_id': 'Zeina',
33 | 'gender': 'Female',
34 | 'neural': 'No',
35 | 'standard': 'Yes'},
36 | {'language': 'Arabic (Gulf)',
37 | 'lang_code': 'ar-AE',
38 | 'whisper_lang_code': 'ar',
39 | 'voice_id': 'Hala',
40 | 'gender': 'Female',
41 | 'neural': 'Yes',
42 | 'standard': 'No'},
43 | {'language': 'Catalan',
44 | 'lang_code': 'ca-ES',
45 | 'whisper_lang_code': 'ca',
46 | 'voice_id': 'Arlet',
47 | 'gender': 'Female',
48 | 'neural': 'Yes',
49 | 'standard': 'No'},
50 | {'language': 'Chinese (Cantonese)',
51 | 'lang_code': 'yue-CN',
52 | 'whisper_lang_code': 'zh',
53 | 'voice_id': 'Hiujin',
54 | 'gender': 'Female',
55 | 'neural': 'Yes',
56 | 'standard': 'No'},
57 | {'language': 'Chinese (Mandarin)',
58 | 'lang_code': 'cmn-CN',
59 | 'whisper_lang_code': 'zh',
60 | 'voice_id': 'Zhiyu',
61 | 'gender': 'Female',
62 | 'neural': 'Yes',
63 | 'standard': 'No'},
64 | {'language': 'Danish',
65 | 'lang_code': 'da-DK',
66 | 'whisper_lang_code': 'da',
67 | 'voice_id': 'Naja',
68 | 'gender': 'Female',
69 | 'neural': 'No',
70 | 'standard': 'Yes'},
71 | {'language': 'Danish',
72 | 'lang_code': 'da-DK',
73 | 'whisper_lang_code': 'da',
74 | 'voice_id': 'Mads',
75 | 'gender': 'Male',
76 | 'neural': 'No',
77 | 'standard': 'Yes'},
78 | {'language': 'Dutch',
79 | 'lang_code': 'nl-NL',
80 | 'whisper_lang_code': 'nl',
81 | 'voice_id': 'Laura',
82 | 'gender': 'Female',
83 | 'neural': 'Yes',
84 | 'standard': 'No'},
85 | {'language': 'Dutch',
86 | 'lang_code': 'nl-NL',
87 | 'whisper_lang_code': 'nl',
88 | 'voice_id': 'Lotte',
89 | 'gender': 'Female',
90 | 'neural': 'No',
91 | 'standard': 'Yes'},
92 | {'language': 'Dutch',
93 | 'lang_code': 'nl-NL',
94 | 'whisper_lang_code': 'nl',
95 | 'voice_id': 'Ruben',
96 | 'gender': 'Male',
97 | 'neural': 'No',
98 | 'standard': 'Yes'},
99 | {'language': 'English (Australian)',
100 | 'lang_code': 'en-AU',
101 | 'whisper_lang_code': 'en',
102 | 'voice_id': 'Nicole',
103 | 'gender': 'Female',
104 | 'neural': 'No',
105 | 'standard': 'Yes'},
106 | {'language': 'English (Australian)',
107 | 'lang_code': 'en-AU',
108 | 'whisper_lang_code': 'en',
109 | 'voice_id': 'Olivia',
110 | 'gender': 'Female',
111 | 'neural': 'Yes',
112 | 'standard': 'No'},
113 | {'language': 'English (Australian)',
114 | 'lang_code': 'en-AU',
115 | 'whisper_lang_code': 'en',
116 | 'voice_id': 'Russell',
117 | 'gender': 'Male',
118 | 'neural': 'No',
119 | 'standard': 'Yes'},
120 | {'language': 'English (British)',
121 | 'lang_code': 'en-GB',
122 | 'whisper_lang_code': 'en',
123 | 'voice_id': 'Amy',
124 | 'gender': 'Female',
125 | 'neural': 'Yes',
126 | 'standard': 'Yes'},
127 | {'language': 'English (British)',
128 | 'lang_code': 'en-GB',
129 | 'whisper_lang_code': 'en',
130 | 'voice_id': 'Emma',
131 | 'gender': 'Female',
132 | 'neural': 'Yes',
133 | 'standard': 'Yes'},
134 | {'language': 'English (British)',
135 | 'lang_code': 'en-GB',
136 | 'whisper_lang_code': 'en',
137 | 'voice_id': 'Brian',
138 | 'gender': 'Male',
139 | 'neural': 'Yes',
140 | 'standard': 'Yes'},
141 | {'language': 'English (British)',
142 | 'lang_code': 'en-GB',
143 | 'whisper_lang_code': 'en',
144 | 'voice_id': 'Arthur',
145 | 'gender': 'Male',
146 | 'neural': 'Yes',
147 | 'standard': 'No'},
148 | {'language': 'English (Indian)',
149 | 'lang_code': 'en-IN',
150 | 'whisper_lang_code': 'en',
151 | 'voice_id': 'Aditi',
152 | 'gender': 'Female',
153 | 'neural': 'No',
154 | 'standard': 'Yes'},
155 | {'language': 'English (Indian)',
156 | 'lang_code': 'en-IN',
157 | 'whisper_lang_code': 'en',
158 | 'voice_id': 'Raveena',
159 | 'gender': 'Female',
160 | 'neural': 'No',
161 | 'standard': 'Yes'},
162 | {'language': 'English (Indian)',
163 | 'lang_code': 'en-IN',
164 | 'whisper_lang_code': 'en',
165 | 'voice_id': 'Kajal',
166 | 'gender': 'Female',
167 | 'neural': 'Yes',
168 | 'standard': 'No'},
169 | {'language': 'English (New Zealand)',
170 | 'lang_code': 'en-NZ',
171 | 'whisper_lang_code': 'en',
172 | 'voice_id': 'Aria',
173 | 'gender': 'Female',
174 | 'neural': 'Yes',
175 | 'standard': 'No'},
176 | {'language': 'English (South African)',
177 | 'lang_code': 'en-ZA',
178 | 'whisper_lang_code': 'en',
179 | 'voice_id': 'Ayanda',
180 | 'gender': 'Female',
181 | 'neural': 'Yes',
182 | 'standard': 'No'},
183 | {'language': 'English (US)',
184 | 'lang_code': 'en-US',
185 | 'whisper_lang_code': 'en',
186 | 'voice_id': 'Ivy',
187 | 'gender': 'Female (child)',
188 | 'neural': 'Yes',
189 | 'standard': 'Yes'},
190 | {'language': 'English (US)',
191 | 'lang_code': 'en-US',
192 | 'whisper_lang_code': 'en',
193 | 'voice_id': 'Joanna',
194 | 'gender': 'Female',
195 | 'neural': 'Yes',
196 | 'standard': 'Yes'},
197 | {'language': 'English (US)',
198 | 'lang_code': 'en-US',
199 | 'whisper_lang_code': 'en',
200 | 'voice_id': 'Kendra',
201 | 'gender': 'Female',
202 | 'neural': 'Yes',
203 | 'standard': 'Yes'},
204 | {'language': 'English (US)',
205 | 'lang_code': 'en-US',
206 | 'whisper_lang_code': 'en',
207 | 'voice_id': 'Kimberly',
208 | 'gender': 'Female',
209 | 'neural': 'Yes',
210 | 'standard': 'Yes'},
211 | {'language': 'English (US)',
212 | 'lang_code': 'en-US',
213 | 'whisper_lang_code': 'en',
214 | 'voice_id': 'Salli',
215 | 'gender': 'Female',
216 | 'neural': 'Yes',
217 | 'standard': 'Yes'},
218 | {'language': 'English (US)',
219 | 'lang_code': 'en-US',
220 | 'whisper_lang_code': 'en',
221 | 'voice_id': 'Joey',
222 | 'gender': 'Male',
223 | 'neural': 'Yes',
224 | 'standard': 'Yes'},
225 | {'language': 'English (US)',
226 | 'lang_code': 'en-US',
227 | 'whisper_lang_code': 'en',
228 | 'voice_id': 'Justin',
229 | 'gender': 'Male (child)',
230 | 'neural': 'Yes',
231 | 'standard': 'Yes'},
232 | {'language': 'English (US)',
233 | 'lang_code': 'en-US',
234 | 'whisper_lang_code': 'en',
235 | 'voice_id': 'Kevin',
236 | 'gender': 'Male (child)',
237 | 'neural': 'Yes',
238 | 'standard': 'No'},
239 | {'language': 'English (US)',
240 | 'lang_code': 'en-US',
241 | 'whisper_lang_code': 'en',
242 | 'voice_id': 'Matthew',
243 | 'gender': 'Male',
244 | 'neural': 'Yes',
245 | 'standard': 'Yes'},
246 | {'language': 'English (Welsh)',
247 | 'lang_code': 'en-GB-WLS',
248 | 'whisper_lang_code': 'en',
249 | 'voice_id': 'Geraint',
250 | 'gender': 'Male',
251 | 'neural': 'No',
252 | 'standard': 'Yes'},
253 | {'language': 'Finnish',
254 | 'lang_code': 'fi-FI',
255 | 'whisper_lang_code': 'fi',
256 | 'voice_id': 'Suvi',
257 | 'gender': 'Female',
258 | 'neural': 'Yes',
259 | 'standard': 'No'},
260 | {'language': 'French',
261 | 'lang_code': 'fr-FR',
262 | 'whisper_lang_code': 'fr',
263 | 'voice_id': 'Celine',
264 | 'gender': 'Female',
265 | 'neural': 'No',
266 | 'standard': 'Yes'},
267 | {'language': 'French',
268 | 'lang_code': 'fr-FR',
269 | 'whisper_lang_code': 'fr',
270 | 'voice_id': 'Lea',
271 | 'gender': 'Female',
272 | 'neural': 'Yes',
273 | 'standard': 'Yes'},
274 | {'language': 'French',
275 | 'lang_code': 'fr-FR',
276 | 'whisper_lang_code': 'fr',
277 | 'voice_id': 'Mathieu',
278 | 'gender': 'Male',
279 | 'neural': 'No',
280 | 'standard': 'Yes'},
281 | {'language': 'French (Canadian)',
282 | 'lang_code': 'fr-CA',
283 | 'whisper_lang_code': 'fr',
284 | 'voice_id': 'Chantal',
285 | 'gender': 'Female',
286 | 'neural': 'No',
287 | 'standard': 'Yes'},
288 | {'language': 'French (Canadian)',
289 | 'lang_code': 'fr-CA',
290 | 'whisper_lang_code': 'fr',
291 | 'voice_id': 'Gabrielle',
292 | 'gender': 'Female',
293 | 'neural': 'Yes',
294 | 'standard': 'No'},
295 | {'language': 'French (Canadian)',
296 | 'lang_code': 'fr-CA',
297 | 'whisper_lang_code': 'fr',
298 | 'voice_id': 'Liam',
299 | 'gender': 'Male',
300 | 'neural': 'Yes',
301 | 'standard': 'No'},
302 | {'language': 'German',
303 | 'lang_code': 'de-DE',
304 | 'whisper_lang_code': 'de',
305 | 'voice_id': 'Marlene',
306 | 'gender': 'Female',
307 | 'neural': 'No',
308 | 'standard': 'Yes'},
309 | {'language': 'German',
310 | 'lang_code': 'de-DE',
311 | 'whisper_lang_code': 'de',
312 | 'voice_id': 'Vicki',
313 | 'gender': 'Female',
314 | 'neural': 'Yes',
315 | 'standard': 'Yes'},
316 | {'language': 'German',
317 | 'lang_code': 'de-DE',
318 | 'whisper_lang_code': 'de',
319 | 'voice_id': 'Hans',
320 | 'gender': 'Male',
321 | 'neural': 'No',
322 | 'standard': 'Yes'},
323 | {'language': 'German',
324 | 'lang_code': 'de-DE',
325 | 'whisper_lang_code': 'de',
326 | 'voice_id': 'Daniel',
327 | 'gender': 'Male',
328 | 'neural': 'Yes',
329 | 'standard': 'No'},
330 | {'language': 'German (Austrian)',
331 | 'lang_code': 'de-AT',
332 | 'whisper_lang_code': 'de',
333 | 'voice_id': 'Hannah',
334 | 'gender': 'Female',
335 | 'neural': 'Yes',
336 | 'standard': 'No'},
337 | {'language': 'Hindi',
338 | 'lang_code': 'hi-IN',
339 | 'whisper_lang_code': 'hi',
340 | 'voice_id': 'Aditi',
341 | 'gender': 'Female',
342 | 'neural': 'No',
343 | 'standard': 'Yes'},
344 | {'language': 'Hindi',
345 | 'lang_code': 'hi-IN',
346 | 'whisper_lang_code': 'hi',
347 | 'voice_id': 'Kajal',
348 | 'gender': 'Female',
349 | 'neural': 'Yes',
350 | 'standard': 'No'},
351 | {'language': 'Icelandic',
352 | 'lang_code': 'is-IS',
353 | 'whisper_lang_code': 'is',
354 | 'voice_id': 'Dora',
355 | 'gender': 'Female',
356 | 'neural': 'No',
357 | 'standard': 'Yes'},
358 | {'language': 'Icelandic',
359 | 'lang_code': 'is-IS',
360 | 'whisper_lang_code': 'is',
361 | 'voice_id': 'Karl',
362 | 'gender': 'Male',
363 | 'neural': 'No',
364 | 'standard': 'Yes'},
365 | {'language': 'Italian',
366 | 'lang_code': 'it-IT',
367 | 'whisper_lang_code': 'it',
368 | 'voice_id': 'Carla',
369 | 'gender': 'Female',
370 | 'neural': 'No',
371 | 'standard': 'Yes'},
372 | {'language': 'Italian',
373 | 'lang_code': 'it-IT',
374 | 'whisper_lang_code': 'it',
375 | 'voice_id': 'Bianca',
376 | 'gender': 'Female',
377 | 'neural': 'Yes',
378 | 'standard': 'Yes'},
379 | {'language': 'Japanese',
380 | 'lang_code': 'ja-JP',
381 | 'whisper_lang_code': 'ja',
382 | 'voice_id': 'Mizuki',
383 | 'gender': 'Female',
384 | 'neural': 'No',
385 | 'standard': 'Yes'},
386 | {'language': 'Japanese',
387 | 'lang_code': 'ja-JP',
388 | 'whisper_lang_code': 'ja',
389 | 'voice_id': 'Takumi',
390 | 'gender': 'Male',
391 | 'neural': 'Yes',
392 | 'standard': 'Yes'},
393 | {'language': 'Korean',
394 | 'lang_code': 'ko-KR',
395 | 'whisper_lang_code': 'ko',
396 | 'voice_id': 'Seoyeon',
397 | 'gender': 'Female',
398 | 'neural': 'Yes',
399 | 'standard': 'Yes'},
400 | {'language': 'Norwegian',
401 | 'lang_code': 'nb-NO',
402 | 'whisper_lang_code': 'no',
403 | 'voice_id': 'Liv',
404 | 'gender': 'Female',
405 | 'neural': 'No',
406 | 'standard': 'Yes'},
407 | {'language': 'Norwegian',
408 | 'lang_code': 'nb-NO',
409 | 'whisper_lang_code': 'no',
410 | 'voice_id': 'Ida',
411 | 'gender': 'Female',
412 | 'neural': 'Yes',
413 | 'standard': 'No'},
414 | {'language': 'Polish',
415 | 'lang_code': 'pl-PL',
416 | 'whisper_lang_code': 'pl',
417 | 'voice_id': 'Ewa',
418 | 'gender': 'Female',
419 | 'neural': 'No',
420 | 'standard': 'Yes'},
421 | {'language': 'Polish',
422 | 'lang_code': 'pl-PL',
423 | 'whisper_lang_code': 'pl',
424 | 'voice_id': 'Maja',
425 | 'gender': 'Female',
426 | 'neural': 'No',
427 | 'standard': 'Yes'},
428 | {'language': 'Polish',
429 | 'lang_code': 'pl-PL',
430 | 'whisper_lang_code': 'pl',
431 | 'voice_id': 'Jacek',
432 | 'gender': 'Male',
433 | 'neural': 'No',
434 | 'standard': 'Yes'},
435 | {'language': 'Polish',
436 | 'lang_code': 'pl-PL',
437 | 'whisper_lang_code': 'pl',
438 | 'voice_id': 'Jan',
439 | 'gender': 'Male',
440 | 'neural': 'No',
441 | 'standard': 'Yes'},
442 | {'language': 'Polish',
443 | 'lang_code': 'pl-PL',
444 | 'whisper_lang_code': 'pl',
445 | 'voice_id': 'Ola',
446 | 'gender': 'Female',
447 | 'neural': 'Yes',
448 | 'standard': 'No'},
449 | {'language': 'Portuguese (Brazilian)',
450 | 'lang_code': 'pt-BR',
451 | 'whisper_lang_code': 'pt',
452 | 'voice_id': 'Camila',
453 | 'gender': 'Female',
454 | 'neural': 'Yes',
455 | 'standard': 'Yes'},
456 | {'language': 'Portuguese (Brazilian)',
457 | 'lang_code': 'pt-BR',
458 | 'whisper_lang_code': 'pt',
459 | 'voice_id': 'Vitoria',
460 | 'gender': 'Female',
461 | 'neural': 'Yes',
462 | 'standard': 'Yes'},
463 | {'language': 'Portuguese (Brazilian)',
464 | 'lang_code': 'pt-BR',
465 | 'whisper_lang_code': 'pt',
466 | 'voice_id': 'Ricardo',
467 | 'gender': 'Male',
468 | 'neural': 'No',
469 | 'standard': 'Yes'},
470 | {'language': 'Portuguese (European)',
471 | 'lang_code': 'pt-PT',
472 | 'whisper_lang_code': 'pt',
473 | 'voice_id': 'Ines',
474 | 'gender': 'Female',
475 | 'neural': 'Yes',
476 | 'standard': 'Yes'},
477 | {'language': 'Portuguese (European)',
478 | 'lang_code': 'pt-PT',
479 | 'whisper_lang_code': 'pt',
480 | 'voice_id': 'Cristiano',
481 | 'gender': 'Male',
482 | 'neural': 'No',
483 | 'standard': 'Yes'},
484 | {'language': 'Romanian',
485 | 'lang_code': 'ro-RO',
486 | 'whisper_lang_code': 'ro',
487 | 'voice_id': 'Carmen',
488 | 'gender': 'Female',
489 | 'neural': 'No',
490 | 'standard': 'Yes'},
491 | {'language': 'Russian',
492 | 'lang_code': 'ru-RU',
493 | 'whisper_lang_code': 'ru',
494 | 'voice_id': 'Tatyana',
495 | 'gender': 'Female',
496 | 'neural': 'No',
497 | 'standard': 'Yes'},
498 | {'language': 'Russian',
499 | 'lang_code': 'ru-RU',
500 | 'whisper_lang_code': 'ru',
501 | 'voice_id': 'Maxim',
502 | 'gender': 'Male',
503 | 'neural': 'No',
504 | 'standard': 'Yes'},
505 | {'language': 'Spanish (European)',
506 | 'lang_code': 'es-ES',
507 | 'whisper_lang_code': 'es',
508 | 'voice_id': 'Conchita',
509 | 'gender': 'Female',
510 | 'neural': 'No',
511 | 'standard': 'Yes'},
512 | {'language': 'Spanish (European)',
513 | 'lang_code': 'es-ES',
514 | 'whisper_lang_code': 'es',
515 | 'voice_id': 'Lucia',
516 | 'gender': 'Female',
517 | 'neural': 'Yes',
518 | 'standard': 'Yes'},
519 | {'language': 'Spanish (European)',
520 | 'lang_code': 'es-ES',
521 | 'whisper_lang_code': 'es',
522 | 'voice_id': 'Enrique',
523 | 'gender': 'Male',
524 | 'neural': 'No',
525 | 'standard': 'Yes'},
526 | {'language': 'Spanish (Mexican)',
527 | 'lang_code': 'es-MX',
528 | 'whisper_lang_code': 'es',
529 | 'voice_id': 'Mia',
530 | 'gender': 'Female',
531 | 'neural': 'Yes',
532 | 'standard': 'Yes'},
533 | {'language': 'Spanish (US)',
534 | 'lang_code': 'es-US',
535 | 'whisper_lang_code': 'es',
536 | 'voice_id': 'Lupe',
537 | 'gender': 'Female',
538 | 'neural': 'Yes',
539 | 'standard': 'Yes'},
540 | {'language': 'Spanish (US)',
541 | 'lang_code': 'es-US',
542 | 'whisper_lang_code': 'es',
543 | 'voice_id': 'Penelope',
544 | 'gender': 'Female',
545 | 'neural': 'No',
546 | 'standard': 'Yes'},
547 | {'language': 'Spanish (US)',
548 | 'lang_code': 'es-US',
549 | 'whisper_lang_code': 'es',
550 | 'voice_id': 'Miguel',
551 | 'gender': 'Male',
552 | 'neural': 'No',
553 | 'standard': 'Yes'},
554 | {'language': 'Spanish (US)',
555 | 'lang_code': 'es-US',
556 | 'whisper_lang_code': 'es',
557 | 'voice_id': 'Pedro',
558 | 'gender': 'Male',
559 | 'neural': 'Yes',
560 | 'standard': 'No'},
561 | {'language': 'Swedish',
562 | 'lang_code': 'sv-SE',
563 | 'whisper_lang_code': 'sv',
564 | 'voice_id': 'Astrid',
565 | 'gender': 'Female',
566 | 'neural': 'No',
567 | 'standard': 'Yes'},
568 | {'language': 'Swedish',
569 | 'lang_code': 'sv-SE',
570 | 'whisper_lang_code': 'sv',
571 | 'voice_id': 'Elin',
572 | 'gender': 'Female',
573 | 'neural': 'Yes',
574 | 'standard': 'No'},
575 | {'language': 'Turkish',
576 | 'lang_code': 'tr-TR',
577 | 'whisper_lang_code': 'tr',
578 | 'voice_id': 'Filiz',
579 | 'gender': 'Female',
580 | 'neural': 'No',
581 | 'standard': 'Yes'},
582 | {'language': 'Welsh',
583 | 'lang_code': 'cy-GB',
584 | 'whisper_lang_code': 'cy',
585 | 'voice_id': 'Gwyneth',
586 | 'gender': 'Female',
587 | 'neural': 'No',
588 | 'standard': 'Yes'}
589 | ]
590 |
591 |
592 | # Run from the command-line
593 | if __name__ == '__main__':
594 | polly_voice_data = PollyVoiceData()
595 |
596 | voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Male')
597 | print('English (US)', 'Male', voice_id, language_code, engine)
598 |
599 | voice_id, language_code, engine = polly_voice_data.get_voice('English (US)', 'Female')
600 | print('English (US)', 'Female', voice_id, language_code, engine)
601 |
602 | voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Female')
603 | print('French', 'Female', voice_id, language_code, engine)
604 |
605 | voice_id, language_code, engine = polly_voice_data.get_voice('French', 'Male')
606 | print('French', 'Male', voice_id, language_code, engine)
607 |
608 | voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Female')
609 | print('Japanese', 'Female', voice_id, language_code, engine)
610 |
611 | voice_id, language_code, engine = polly_voice_data.get_voice('Japanese', 'Male')
612 | print('Japanese', 'Male', voice_id, language_code, engine)
613 |
614 | voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Female')
615 | print('Hindi', 'Female', voice_id, language_code, engine)
616 |
617 | voice_id, language_code, engine = polly_voice_data.get_voice('Hindi', 'Male')
618 | print('Hindi', 'Male', voice_id, language_code, engine)
619 |
620 | whisper_lang_code = polly_voice_data.get_whisper_lang_code('English (US)')
621 | print('English (US) whisper_lang_code:', whisper_lang_code)
622 |
623 | whisper_lang_code = polly_voice_data.get_whisper_lang_code('Chinese (Mandarin)')
624 | print('Chinese (Mandarin) whisper_lang_code:', whisper_lang_code)
625 |
626 | whisper_lang_code = polly_voice_data.get_whisper_lang_code('Norwegian')
627 | print('Norwegian whisper_lang_code:', whisper_lang_code)
628 |
629 | whisper_lang_code = polly_voice_data.get_whisper_lang_code('Dutch')
630 | print('Dutch whisper_lang_code:', whisper_lang_code)
631 |
632 | whisper_lang_code = polly_voice_data.get_whisper_lang_code('Foo')
633 | print('Foo whisper_lang_code:', whisper_lang_code)
634 |
635 |
636 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import ssl
4 | from contextlib import closing
5 | from typing import Optional, Tuple
6 | import datetime
7 |
8 | import boto3
9 | import gradio as gr
10 | import requests
11 |
12 | # UNCOMMENT TO USE WHISPER
13 | import warnings
14 | import whisper
15 |
16 | from langchain import ConversationChain, LLMChain
17 |
18 | from langchain.agents import load_tools, initialize_agent
19 | from langchain.chains.conversation.memory import ConversationBufferMemory
20 | from langchain.llms import OpenAI
21 | from threading import Lock
22 |
23 | # Console to variable
24 | from io import StringIO
25 | import sys
26 | import re
27 |
28 | from openai.error import AuthenticationError, InvalidRequestError, RateLimitError
29 |
30 | # Pertains to Express-inator functionality
31 | from langchain.prompts import PromptTemplate
32 |
33 | from polly_utils import PollyVoiceData, NEURAL_ENGINE
34 | from azure_utils import AzureVoiceData
35 |
36 | # Pertains to question answering functionality
37 | from langchain.embeddings.openai import OpenAIEmbeddings
38 | from langchain.text_splitter import CharacterTextSplitter
39 | from langchain.vectorstores.faiss import FAISS
40 | from langchain.docstore.document import Document
41 | from langchain.chains.question_answering import load_qa_chain
42 |
43 | news_api_key = os.environ["NEWS_API_KEY"]
44 | tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"]
45 |
46 | TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects'] #'google-search','news-api','tmdb-api','open-meteo-api'
47 | TOOLS_DEFAULT_LIST = ['serpapi', 'pal-math']
48 | BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!"
49 | # AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it."
50 | AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. "
51 | MAX_TOKENS = 512
52 |
53 | LOOPING_TALKING_HEAD = "videos/Masahiro.mp4"
54 | TALKING_HEAD_WIDTH = "192"
55 | MAX_TALKING_HEAD_TEXT_LENGTH = 155
56 |
57 | # Pertains to Express-inator functionality
58 | NUM_WORDS_DEFAULT = 0
59 | MAX_WORDS = 400
60 | FORMALITY_DEFAULT = "N/A"
61 | TEMPERATURE_DEFAULT = 0.5
62 | EMOTION_DEFAULT = "N/A"
63 | LANG_LEVEL_DEFAULT = "N/A"
64 | TRANSLATE_TO_DEFAULT = "N/A"
65 | LITERARY_STYLE_DEFAULT = "N/A"
66 | PROMPT_TEMPLATE = PromptTemplate(
67 | input_variables=["original_words", "num_words", "formality", "emotions", "lang_level", "translate_to",
68 | "literary_style"],
69 | template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n",
70 | )
71 |
72 | POLLY_VOICE_DATA = PollyVoiceData()
73 | AZURE_VOICE_DATA = AzureVoiceData()
74 |
75 | # Pertains to WHISPER functionality
76 | WHISPER_DETECT_LANG = "Detect language"
77 |
78 |
79 | # UNCOMMENT TO USE WHISPER
80 | warnings.filterwarnings("ignore")
81 | WHISPER_MODEL = whisper.load_model("tiny")
82 | print("WHISPER_MODEL", WHISPER_MODEL)
83 |
84 |
85 | # UNCOMMENT TO USE WHISPER
86 | def transcribe(aud_inp, whisper_lang):
87 | if aud_inp is None:
88 | return ""
89 | aud = whisper.load_audio(aud_inp)
90 | aud = whisper.pad_or_trim(aud)
91 | mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
92 | _, probs = WHISPER_MODEL.detect_language(mel)
93 | options = whisper.DecodingOptions()
94 | if whisper_lang != WHISPER_DETECT_LANG:
95 | whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
96 | options = whisper.DecodingOptions(language=whisper_lang_code)
97 | result = whisper.decode(WHISPER_MODEL, mel, options)
98 | print("result.text", result.text)
99 | result_text = ""
100 | if result and result.text:
101 | result_text = result.text
102 | return result_text
103 |
104 |
105 | # Temporarily address Wolfram Alpha SSL certificate issue
106 | ssl._create_default_https_context = ssl._create_unverified_context
107 |
108 |
109 | # TEMPORARY FOR TESTING
110 | def transcribe_dummy(aud_inp_tb, whisper_lang):
111 | if aud_inp_tb is None:
112 | return ""
113 | # aud = whisper.load_audio(aud_inp)
114 | # aud = whisper.pad_or_trim(aud)
115 | # mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device)
116 | # _, probs = WHISPER_MODEL.detect_language(mel)
117 | # options = whisper.DecodingOptions()
118 | # options = whisper.DecodingOptions(language="ja")
119 | # result = whisper.decode(WHISPER_MODEL, mel, options)
120 | result_text = "Whisper will detect language"
121 | if whisper_lang != WHISPER_DETECT_LANG:
122 | whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code(whisper_lang)
123 | result_text = f"Whisper will use lang code: {whisper_lang_code}"
124 | print("result_text", result_text)
125 | return aud_inp_tb
126 |
127 |
128 | # Pertains to Express-inator functionality
129 | def transform_text(desc, express_chain, num_words, formality,
130 | anticipation_level, joy_level, trust_level,
131 | fear_level, surprise_level, sadness_level, disgust_level, anger_level,
132 | lang_level, translate_to, literary_style):
133 | num_words_prompt = ""
134 | if num_words and int(num_words) != 0:
135 | num_words_prompt = "using up to " + str(num_words) + " words, "
136 |
137 | # Change some arguments to lower case
138 | formality = formality.lower()
139 | anticipation_level = anticipation_level.lower()
140 | joy_level = joy_level.lower()
141 | trust_level = trust_level.lower()
142 | fear_level = fear_level.lower()
143 | surprise_level = surprise_level.lower()
144 | sadness_level = sadness_level.lower()
145 | disgust_level = disgust_level.lower()
146 | anger_level = anger_level.lower()
147 |
148 | formality_str = ""
149 | if formality != "n/a":
150 | formality_str = "in a " + formality + " manner, "
151 |
152 | # put all emotions into a list
153 | emotions = []
154 | if anticipation_level != "n/a":
155 | emotions.append(anticipation_level)
156 | if joy_level != "n/a":
157 | emotions.append(joy_level)
158 | if trust_level != "n/a":
159 | emotions.append(trust_level)
160 | if fear_level != "n/a":
161 | emotions.append(fear_level)
162 | if surprise_level != "n/a":
163 | emotions.append(surprise_level)
164 | if sadness_level != "n/a":
165 | emotions.append(sadness_level)
166 | if disgust_level != "n/a":
167 | emotions.append(disgust_level)
168 | if anger_level != "n/a":
169 | emotions.append(anger_level)
170 |
171 | emotions_str = ""
172 | if len(emotions) > 0:
173 | if len(emotions) == 1:
174 | emotions_str = "with emotion of " + emotions[0] + ", "
175 | else:
176 | emotions_str = "with emotions of " + ", ".join(emotions[:-1]) + " and " + emotions[-1] + ", "
177 |
178 | lang_level_str = ""
179 | if lang_level != LANG_LEVEL_DEFAULT:
180 | lang_level_str = "at a level that a person in " + lang_level + " can easily comprehend, " if translate_to == TRANSLATE_TO_DEFAULT else ""
181 |
182 | translate_to_str = ""
183 | if translate_to != TRANSLATE_TO_DEFAULT:
184 | translate_to_str = "translated to " + translate_to + (
185 | "" if lang_level == TRANSLATE_TO_DEFAULT else " at a level that a person in " + lang_level + " can easily comprehend") + ", "
186 |
187 | literary_style_str = ""
188 | if literary_style != LITERARY_STYLE_DEFAULT:
189 | if literary_style == "Prose":
190 | literary_style_str = "as prose, "
191 | if literary_style == "Story":
192 | literary_style_str = "as a story, "
193 | elif literary_style == "Summary":
194 | literary_style_str = "as a summary, "
195 | elif literary_style == "Outline":
196 | literary_style_str = "as an outline numbers and lower case letters, "
197 | elif literary_style == "Bullets":
198 | literary_style_str = "as bullet points using bullets, "
199 | elif literary_style == "Poetry":
200 | literary_style_str = "as a poem, "
201 | elif literary_style == "Haiku":
202 | literary_style_str = "as a haiku, "
203 | elif literary_style == "Limerick":
204 | literary_style_str = "as a limerick, "
205 | elif literary_style == "Rap":
206 | literary_style_str = "as a rap, "
207 | elif literary_style == "Joke":
208 | literary_style_str = "as a very funny joke with a setup and punchline, "
209 | elif literary_style == "Knock-knock":
210 | literary_style_str = "as a very funny knock-knock joke, "
211 | elif literary_style == "FAQ":
212 | literary_style_str = "as a FAQ with several questions and answers, "
213 |
214 | formatted_prompt = PROMPT_TEMPLATE.format(
215 | original_words=desc,
216 | num_words=num_words_prompt,
217 | formality=formality_str,
218 | emotions=emotions_str,
219 | lang_level=lang_level_str,
220 | translate_to=translate_to_str,
221 | literary_style=literary_style_str
222 | )
223 |
224 | trans_instr = num_words_prompt + formality_str + emotions_str + lang_level_str + translate_to_str + literary_style_str
225 | if express_chain and len(trans_instr.strip()) > 0:
226 | generated_text = express_chain.run(
227 | {'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str,
228 | 'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str,
229 | 'literary_style': literary_style_str}).strip()
230 | else:
231 | print("Not transforming text")
232 | generated_text = desc
233 |
234 | # replace all newlines with
in generated_text
235 | generated_text = generated_text.replace("\n", "\n\n")
236 |
237 | prompt_plus_generated = "GPT prompt: " + formatted_prompt + "\n\n" + generated_text
238 |
239 | print("\n==== date/time: " + str(datetime.datetime.now() - datetime.timedelta(hours=5)) + " ====")
240 | print("prompt_plus_generated: " + prompt_plus_generated)
241 |
242 | return generated_text
243 |
244 |
245 | def load_chain(tools_list, llm):
246 | chain = None
247 | express_chain = None
248 | memory = None
249 | if llm:
250 | print("\ntools_list", tools_list)
251 | tool_names = tools_list
252 | tools = load_tools(tool_names, llm=llm, news_api_key=news_api_key, tmdb_bearer_token=tmdb_bearer_token)
253 |
254 | memory = ConversationBufferMemory(memory_key="chat_history")
255 |
256 | chain = initialize_agent(tools, llm, agent="conversational-react-description", verbose=True, memory=memory)
257 | express_chain = LLMChain(llm=llm, prompt=PROMPT_TEMPLATE, verbose=True)
258 | return chain, express_chain, memory
259 |
260 |
261 | def set_openai_api_key(api_key):
262 | """Set the api key and return chain.
263 | If no api_key, then None is returned.
264 | """
265 | if api_key and api_key.startswith("sk-") and len(api_key) > 50:
266 | os.environ["OPENAI_API_KEY"] = api_key
267 | print("\n\n ++++++++++++++ Setting OpenAI API key ++++++++++++++ \n\n")
268 | print(str(datetime.datetime.now()) + ": Before OpenAI, OPENAI_API_KEY length: " + str(
269 | len(os.environ["OPENAI_API_KEY"])))
270 | llm = OpenAI(temperature=0, max_tokens=MAX_TOKENS)
271 | print(str(datetime.datetime.now()) + ": After OpenAI, OPENAI_API_KEY length: " + str(
272 | len(os.environ["OPENAI_API_KEY"])))
273 | chain, express_chain, memory = load_chain(TOOLS_DEFAULT_LIST, llm)
274 |
275 | # Pertains to question answering functionality
276 | embeddings = OpenAIEmbeddings()
277 | qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
278 |
279 | print(str(datetime.datetime.now()) + ": After load_chain, OPENAI_API_KEY length: " + str(
280 | len(os.environ["OPENAI_API_KEY"])))
281 | os.environ["OPENAI_API_KEY"] = ""
282 | return chain, express_chain, llm, embeddings, qa_chain, memory
283 | return None, None, None, None, None, None
284 |
285 |
286 | def run_chain(chain, inp, capture_hidden_text):
287 | output = ""
288 | hidden_text = None
289 | if capture_hidden_text:
290 | error_msg = None
291 | tmp = sys.stdout
292 | hidden_text_io = StringIO()
293 | sys.stdout = hidden_text_io
294 |
295 | try:
296 | output = chain.run(input=inp)
297 | except AuthenticationError as ae:
298 | error_msg = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
299 | print("error_msg", error_msg)
300 | except RateLimitError as rle:
301 | error_msg = "\n\nRateLimitError: " + str(rle)
302 | except ValueError as ve:
303 | error_msg = "\n\nValueError: " + str(ve)
304 | except InvalidRequestError as ire:
305 | error_msg = "\n\nInvalidRequestError: " + str(ire)
306 | except Exception as e:
307 | error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
308 |
309 | sys.stdout = tmp
310 | hidden_text = hidden_text_io.getvalue()
311 |
312 | # remove escape characters from hidden_text
313 | hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text)
314 |
315 | # remove "Entering new AgentExecutor chain..." from hidden_text
316 | hidden_text = re.sub(r"Entering new AgentExecutor chain...\n", "", hidden_text)
317 |
318 | # remove "Finished chain." from hidden_text
319 | hidden_text = re.sub(r"Finished chain.", "", hidden_text)
320 |
321 | # Add newline after "Thought:" "Action:" "Observation:" "Input:" and "AI:"
322 | hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text)
323 | hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text)
324 | hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text)
325 | hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text)
326 | hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text)
327 |
328 | if error_msg:
329 | hidden_text += error_msg
330 |
331 | print("hidden_text: ", hidden_text)
332 | else:
333 | try:
334 | output = chain.run(input=inp)
335 | except AuthenticationError as ae:
336 | output = AUTH_ERR_MSG + str(datetime.datetime.now()) + ". " + str(ae)
337 | print("output", output)
338 | except RateLimitError as rle:
339 | output = "\n\nRateLimitError: " + str(rle)
340 | except ValueError as ve:
341 | output = "\n\nValueError: " + str(ve)
342 | except InvalidRequestError as ire:
343 | output = "\n\nInvalidRequestError: " + str(ire)
344 | except Exception as e:
345 | output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e)
346 |
347 | return output, hidden_text
348 |
349 |
350 | def reset_memory(history, memory):
351 | memory.clear()
352 | history = []
353 | return history, history, memory
354 |
355 |
356 | class ChatWrapper:
357 |
358 | def __init__(self):
359 | self.lock = Lock()
360 |
361 | def __call__(
362 | self, api_key: str, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain],
363 | trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain],
364 | num_words, formality, anticipation_level, joy_level, trust_level,
365 | fear_level, surprise_level, sadness_level, disgust_level, anger_level,
366 | lang_level, translate_to, literary_style, qa_chain, docsearch, use_embeddings
367 | ):
368 | """Execute the chat functionality."""
369 | self.lock.acquire()
370 | try:
371 | print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
372 | print("inp: " + inp)
373 | print("trace_chain: ", trace_chain)
374 | print("speak_text: ", speak_text)
375 | print("talking_head: ", talking_head)
376 | print("monologue: ", monologue)
377 | history = history or []
378 | # If chain is None, that is because no API key was provided.
379 | output = "Please paste your OpenAI key from openai.com to use this app. " + str(datetime.datetime.now())
380 | hidden_text = output
381 |
382 | if chain:
383 | # Set OpenAI key
384 | import openai
385 | openai.api_key = api_key
386 | if not monologue:
387 | if use_embeddings:
388 | if inp and inp.strip() != "":
389 | if docsearch:
390 | docs = docsearch.similarity_search(inp)
391 | output = str(qa_chain.run(input_documents=docs, question=inp))
392 | else:
393 | output, hidden_text = "Please supply some text in the the Embeddings tab.", None
394 | else:
395 | output, hidden_text = "What's on your mind?", None
396 | else:
397 | output, hidden_text = run_chain(chain, inp, capture_hidden_text=trace_chain)
398 | else:
399 | output, hidden_text = inp, None
400 |
401 | output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level,
402 | trust_level,
403 | fear_level, surprise_level, sadness_level, disgust_level, anger_level,
404 | lang_level, translate_to, literary_style)
405 |
406 | text_to_display = output
407 | if trace_chain:
408 | text_to_display = hidden_text + "\n\n" + output
409 | history.append((inp, text_to_display))
410 |
411 | html_video, temp_file, html_audio, temp_aud_file = None, None, None, None
412 | if speak_text:
413 | if talking_head:
414 | if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH:
415 | html_video, temp_file = do_html_video_speak(output, translate_to)
416 | else:
417 | temp_file = LOOPING_TALKING_HEAD
418 | html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
419 | html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
420 | else:
421 | html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
422 | else:
423 | if talking_head:
424 | temp_file = LOOPING_TALKING_HEAD
425 | html_video = create_html_video(temp_file, TALKING_HEAD_WIDTH)
426 | else:
427 | # html_audio, temp_aud_file = do_html_audio_speak(output, translate_to)
428 | # html_video = create_html_video(temp_file, "128")
429 | pass
430 |
431 | except Exception as e:
432 | raise e
433 | finally:
434 | self.lock.release()
435 | return history, history, html_video, temp_file, html_audio, temp_aud_file, ""
436 | # return history, history, html_audio, temp_aud_file, ""
437 |
438 |
439 | chat = ChatWrapper()
440 |
441 |
442 | def do_html_audio_speak(words_to_speak, polly_language):
443 | polly_client = boto3.Session(
444 | aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
445 | aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
446 | region_name=os.environ["AWS_DEFAULT_REGION"]
447 | ).client('polly')
448 |
449 | # voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Female")
450 | voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice(polly_language, "Male")
451 | if not voice_id:
452 | # voice_id = "Joanna"
453 | voice_id = "Matthew"
454 | language_code = "en-US"
455 | engine = NEURAL_ENGINE
456 | response = polly_client.synthesize_speech(
457 | Text=words_to_speak,
458 | OutputFormat='mp3',
459 | VoiceId=voice_id,
460 | LanguageCode=language_code,
461 | Engine=engine
462 | )
463 |
464 | html_audio = '
no audio' 465 | 466 | # Save the audio stream returned by Amazon Polly on Lambda's temp directory 467 | if "AudioStream" in response: 468 | with closing(response["AudioStream"]) as stream: 469 | # output = os.path.join("/tmp/", "speech.mp3") 470 | 471 | try: 472 | with open('audios/tempfile.mp3', 'wb') as f: 473 | f.write(stream.read()) 474 | temp_aud_file = gr.File("audios/tempfile.mp3") 475 | temp_aud_file_url = "/file=" + temp_aud_file.value['name'] 476 | html_audio = f'' 477 | except IOError as error: 478 | # Could not write to file, exit gracefully 479 | print(error) 480 | return None, None 481 | else: 482 | # The response didn't contain audio data, exit gracefully 483 | print("Could not stream audio") 484 | return None, None 485 | 486 | return html_audio, "audios/tempfile.mp3" 487 | 488 | 489 | def create_html_video(file_name, width): 490 | temp_file_url = "/file=" + tmp_file.value['name'] 491 | html_video = f'' 492 | return html_video 493 | 494 | 495 | def do_html_video_speak(words_to_speak, azure_language): 496 | azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male") 497 | if not azure_voice: 498 | azure_voice = "en-US-ChristopherNeural" 499 | 500 | headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"} 501 | body = { 502 | 'bot_name': 'Masahiro', 503 | 'bot_response': words_to_speak, 504 | 'azure_voice': azure_voice, 505 | 'azure_style': 'friendly', 506 | 'animation_pipeline': 'high_speed', 507 | } 508 | api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync" 509 | res = requests.post(api_endpoint, json=body, headers=headers) 510 | print("res.status_code: ", res.status_code) 511 | 512 | html_video = '
no video' 513 | if isinstance(res.content, bytes): 514 | response_stream = io.BytesIO(res.content) 515 | print("len(res.content)): ", len(res.content)) 516 | 517 | with open('videos/tempfile.mp4', 'wb') as f: 518 | f.write(response_stream.read()) 519 | temp_file = gr.File("videos/tempfile.mp4") 520 | temp_file_url = "/file=" + temp_file.value['name'] 521 | html_video = f'' 522 | else: 523 | print('video url unknown') 524 | return html_video, "videos/tempfile.mp4" 525 | 526 | 527 | def update_selected_tools(widget, state, llm): 528 | if widget: 529 | state = widget 530 | chain, express_chain, memory = load_chain(state, llm) 531 | return state, llm, chain, express_chain 532 | 533 | 534 | def update_talking_head(widget, state): 535 | if widget: 536 | state = widget 537 | 538 | video_html_talking_head = create_html_video(LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH) 539 | return state, video_html_talking_head 540 | else: 541 | # return state, create_html_video(LOOPING_TALKING_HEAD, "32") 542 | return None, "" 543 | 544 | 545 | def update_foo(widget, state): 546 | if widget: 547 | state = widget 548 | return state 549 | 550 | 551 | # Pertains to question answering functionality 552 | def update_embeddings(embeddings_text, embeddings, qa_chain): 553 | if embeddings_text: 554 | text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) 555 | texts = text_splitter.split_text(embeddings_text) 556 | 557 | docsearch = FAISS.from_texts(texts, embeddings) 558 | print("Embeddings updated") 559 | return docsearch 560 | 561 | 562 | # Pertains to question answering functionality 563 | def update_use_embeddings(widget, state): 564 | if widget: 565 | state = widget 566 | return state 567 | 568 | 569 | with gr.Blocks(css=".gradio-container {background-color: lightgray}") as block: 570 | llm_state = gr.State() 571 | history_state = gr.State() 572 | chain_state = gr.State() 573 | express_chain_state = gr.State() 574 | tools_list_state = gr.State(TOOLS_DEFAULT_LIST) 575 | trace_chain_state = gr.State(False) 576 | speak_text_state = gr.State(False) 577 | talking_head_state = gr.State(True) 578 | monologue_state = gr.State(False) # Takes the input and repeats it back to the user, optionally transforming it. 579 | memory_state = gr.State() 580 | 581 | # Pertains to Express-inator functionality 582 | num_words_state = gr.State(NUM_WORDS_DEFAULT) 583 | formality_state = gr.State(FORMALITY_DEFAULT) 584 | anticipation_level_state = gr.State(EMOTION_DEFAULT) 585 | joy_level_state = gr.State(EMOTION_DEFAULT) 586 | trust_level_state = gr.State(EMOTION_DEFAULT) 587 | fear_level_state = gr.State(EMOTION_DEFAULT) 588 | surprise_level_state = gr.State(EMOTION_DEFAULT) 589 | sadness_level_state = gr.State(EMOTION_DEFAULT) 590 | disgust_level_state = gr.State(EMOTION_DEFAULT) 591 | anger_level_state = gr.State(EMOTION_DEFAULT) 592 | lang_level_state = gr.State(LANG_LEVEL_DEFAULT) 593 | translate_to_state = gr.State(TRANSLATE_TO_DEFAULT) 594 | literary_style_state = gr.State(LITERARY_STYLE_DEFAULT) 595 | 596 | # Pertains to WHISPER functionality 597 | whisper_lang_state = gr.State(WHISPER_DETECT_LANG) 598 | 599 | # Pertains to question answering functionality 600 | embeddings_state = gr.State() 601 | qa_chain_state = gr.State() 602 | docsearch_state = gr.State() 603 | use_embeddings_state = gr.State(False) 604 | 605 | with gr.Tab("Chat"): 606 | with gr.Row(): 607 | with gr.Column(): 608 | gr.HTML( 609 | """
This application, developed by James L. Weaver, 829 | demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain. 830 | When necessary, it leverages tools for complex math, searching the internet, and accessing news and weather. 831 | Uses talking heads from Ex-Human. 832 | For faster inference without waiting in queue, you may duplicate the space. 833 |
""") 834 | 835 | gr.HTML(""" 836 | 844 | """) 845 | 846 | gr.HTML("""