├── README.md ├── screenshots ├── ai1.PNG ├── ai1.gif ├── ai2.PNG └── ai2.gif ├── sox-dll ├── libmad.dll └── libmp3lame.dll └── src └── ai.py /README.md: -------------------------------------------------------------------------------- 1 | # Python-Voice-Assistant 2 | 3 | A Python based Voice Assistant like Siri, using [SpeechRecognition](https://pypi.org/project/SpeechRecognition/), [Baidu Speech](https://cloud.baidu.com/doc/SPEECH/index.html), [Turing](https://www.kancloud.cn/turing/www-tuling123-com/718218) and [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/). 4 | 5 | Nothing special, just for fun. 6 | 7 | ![ai](https://github.com/rollingstarky/Python-Voice-Assistant/blob/master/screenshots/ai1.PNG?raw=true) 8 | 9 | ### Requirements 10 | * [PyAudio](https://pypi.org/project/PyAudio/) 11 | * [SpeechRecognition](https://pypi.org/project/SpeechRecognition/) 12 | * [baidu-aip](https://pypi.org/project/baidu-aip/) 13 | * [SoX](http://sox.sourceforge.net/) with mp3 support -------------------------------------------------------------------------------- /screenshots/ai1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai1.PNG -------------------------------------------------------------------------------- /screenshots/ai1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai1.gif -------------------------------------------------------------------------------- /screenshots/ai2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai2.PNG -------------------------------------------------------------------------------- /screenshots/ai2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai2.gif -------------------------------------------------------------------------------- /sox-dll/libmad.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/sox-dll/libmad.dll -------------------------------------------------------------------------------- /sox-dll/libmp3lame.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/sox-dll/libmp3lame.dll -------------------------------------------------------------------------------- /src/ai.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import pyaudio 4 | import wave 5 | 6 | import speech_recognition as sr 7 | from aip import AipSpeech 8 | 9 | import requests 10 | import json 11 | 12 | # Baidu Speech API, replace with your personal key 13 | APP_ID = 'Your AppID' 14 | API_KEY = 'Your API Key' 15 | SECRET_KEY = 'Your Secret Key' 16 | 17 | client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 18 | 19 | 20 | # Turing API, replace with your personal key 21 | TURING_KEY = "Your appkey" 22 | URL = "http://openapi.tuling123.com/openapi/api/v2" 23 | HEADERS = {'Content-Type': 'application/json;charset=UTF-8'} 24 | 25 | 26 | # Use SpeechRecognition to record 27 | def rec(rate=16000): 28 | r = sr.Recognizer() 29 | with sr.Microphone(sample_rate=rate) as source: 30 | print("please say something") 31 | audio = r.listen(source) 32 | 33 | with open("recording.wav", "wb") as f: 34 | f.write(audio.get_wav_data()) 35 | 36 | 37 | # Use Baidu Speech as STT engine 38 | def listen(): 39 | with open('recording.wav', 'rb') as f: 40 | audio_data = f.read() 41 | 42 | result = client.asr(audio_data, 'wav', 16000, { 43 | 'dev_pid': 1536, 44 | }) 45 | 46 | result_text = result["result"][0] 47 | 48 | print("you said: " + result_text) 49 | 50 | return result_text 51 | 52 | 53 | # The Turing chatbot 54 | def robot(text=""): 55 | data = { 56 | "reqType": 0, 57 | "perception": { 58 | "inputText": { 59 | "text": "" 60 | }, 61 | "selfInfo": { 62 | "location": { 63 | "city": "杭州", 64 | "street": "网商路" 65 | } 66 | } 67 | }, 68 | "userInfo": { 69 | "apiKey": TURING_KEY, 70 | "userId": "starky" 71 | } 72 | } 73 | 74 | data["perception"]["inputText"]["text"] = text 75 | response = requests.request("post", URL, json=data, headers=HEADERS) 76 | response_dict = json.loads(response.text) 77 | 78 | result = response_dict["results"][0]["values"]["text"] 79 | print("the AI said: " + result) 80 | return result 81 | 82 | 83 | # Baidu Speech as TTS engine 84 | def speak(text=""): 85 | result = client.synthesis(text, 'zh', 1, { 86 | 'spd': 4, 87 | 'vol': 5, 88 | 'per': 4, 89 | }) 90 | 91 | if not isinstance(result, dict): 92 | with open('audio.mp3', 'wb') as f: 93 | f.write(result) 94 | 95 | 96 | # Pyaudio to play mp3 file 97 | def play(): 98 | os.system('sox audio.mp3 audio.wav') 99 | wf = wave.open('audio.wav', 'rb') 100 | p = pyaudio.PyAudio() 101 | 102 | def callback(in_data, frame_count, time_info, status): 103 | data = wf.readframes(frame_count) 104 | return (data, pyaudio.paContinue) 105 | 106 | stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), 107 | channels=wf.getnchannels(), 108 | rate=wf.getframerate(), 109 | output=True, 110 | stream_callback=callback) 111 | 112 | stream.start_stream() 113 | 114 | while stream.is_active(): 115 | time.sleep(0.1) 116 | 117 | stream.stop_stream() 118 | stream.close() 119 | wf.close() 120 | 121 | p.terminate() 122 | 123 | 124 | while True: 125 | rec() 126 | request = listen() 127 | response = robot(request) 128 | speak(response) 129 | play() 130 | --------------------------------------------------------------------------------