├── README.md
├── screenshots
    ├── ai1.PNG
    ├── ai1.gif
    ├── ai2.PNG
    └── ai2.gif
├── sox-dll
    ├── libmad.dll
    └── libmp3lame.dll
└── src
    └── ai.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Python-Voice-Assistant
 2 | 
 3 | A Python based Voice Assistant like Siri, using [SpeechRecognition](https://pypi.org/project/SpeechRecognition/), [Baidu Speech](https://cloud.baidu.com/doc/SPEECH/index.html), [Turing](https://www.kancloud.cn/turing/www-tuling123-com/718218) and [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/).
 4 | 
 5 | Nothing special, just for fun.
 6 | 
 7 | ![ai](https://github.com/rollingstarky/Python-Voice-Assistant/blob/master/screenshots/ai1.PNG?raw=true)
 8 | 
 9 | ### Requirements
10 | * [PyAudio](https://pypi.org/project/PyAudio/)
11 | * [SpeechRecognition](https://pypi.org/project/SpeechRecognition/)
12 | * [baidu-aip](https://pypi.org/project/baidu-aip/)
13 | * [SoX](http://sox.sourceforge.net/) with mp3 support


--------------------------------------------------------------------------------
/screenshots/ai1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai1.PNG


--------------------------------------------------------------------------------
/screenshots/ai1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai1.gif


--------------------------------------------------------------------------------
/screenshots/ai2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai2.PNG


--------------------------------------------------------------------------------
/screenshots/ai2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/screenshots/ai2.gif


--------------------------------------------------------------------------------
/sox-dll/libmad.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/sox-dll/libmad.dll


--------------------------------------------------------------------------------
/sox-dll/libmp3lame.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rollingstarky/Python-Voice-Assistant/31f45540502238f61d31bec9fe488400f6ec0d3c/sox-dll/libmp3lame.dll


--------------------------------------------------------------------------------
/src/ai.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | import pyaudio
  4 | import wave
  5 | 
  6 | import speech_recognition as sr
  7 | from aip import AipSpeech
  8 | 
  9 | import requests
 10 | import json
 11 | 
 12 | # Baidu Speech API, replace with your personal key
 13 | APP_ID = 'Your AppID'
 14 | API_KEY = 'Your API Key'
 15 | SECRET_KEY = 'Your Secret Key'
 16 | 
 17 | client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
 18 | 
 19 | 
 20 | # Turing API, replace with your personal key
 21 | TURING_KEY = "Your appkey"
 22 | URL = "http://openapi.tuling123.com/openapi/api/v2"
 23 | HEADERS = {'Content-Type': 'application/json;charset=UTF-8'}
 24 | 
 25 | 
 26 | # Use SpeechRecognition to record
 27 | def rec(rate=16000):
 28 |     r = sr.Recognizer()
 29 |     with sr.Microphone(sample_rate=rate) as source:
 30 |         print("please say something")
 31 |         audio = r.listen(source)
 32 | 
 33 |     with open("recording.wav", "wb") as f:
 34 |         f.write(audio.get_wav_data())
 35 | 
 36 | 
 37 | # Use Baidu Speech as STT engine
 38 | def listen():
 39 |     with open('recording.wav', 'rb') as f:
 40 |         audio_data = f.read()
 41 | 
 42 |     result = client.asr(audio_data, 'wav', 16000, {
 43 |         'dev_pid': 1536,
 44 |     })
 45 | 
 46 |     result_text = result["result"][0]
 47 | 
 48 |     print("you said: " + result_text)
 49 | 
 50 |     return result_text
 51 | 
 52 | 
 53 | # The Turing chatbot
 54 | def robot(text=""):
 55 |     data = {
 56 |         "reqType": 0,
 57 |         "perception": {
 58 |             "inputText": {
 59 |                 "text": ""
 60 |             },
 61 |             "selfInfo": {
 62 |                 "location": {
 63 |                     "city": "杭州",
 64 |                     "street": "网商路"
 65 |                 }
 66 |             }
 67 |         },
 68 |         "userInfo": {
 69 |             "apiKey": TURING_KEY,
 70 |             "userId": "starky"
 71 |         }
 72 |     }
 73 | 
 74 |     data["perception"]["inputText"]["text"] = text
 75 |     response = requests.request("post", URL, json=data, headers=HEADERS)
 76 |     response_dict = json.loads(response.text)
 77 | 
 78 |     result = response_dict["results"][0]["values"]["text"]
 79 |     print("the AI said: " + result)
 80 |     return result
 81 | 
 82 | 
 83 | # Baidu Speech as TTS engine
 84 | def speak(text=""):
 85 |     result = client.synthesis(text, 'zh', 1, {
 86 |         'spd': 4,
 87 |         'vol': 5,
 88 |         'per': 4,
 89 |     })
 90 | 
 91 |     if not isinstance(result, dict):
 92 |         with open('audio.mp3', 'wb') as f:
 93 |             f.write(result)
 94 | 
 95 | 
 96 | # Pyaudio to play mp3 file
 97 | def play():
 98 |     os.system('sox audio.mp3 audio.wav')
 99 |     wf = wave.open('audio.wav', 'rb')
100 |     p = pyaudio.PyAudio()
101 | 
102 |     def callback(in_data, frame_count, time_info, status):
103 |         data = wf.readframes(frame_count)
104 |         return (data, pyaudio.paContinue)
105 | 
106 |     stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
107 |                     channels=wf.getnchannels(),
108 |                     rate=wf.getframerate(),
109 |                     output=True,
110 |                     stream_callback=callback)
111 | 
112 |     stream.start_stream()
113 | 
114 |     while stream.is_active():
115 |         time.sleep(0.1)
116 | 
117 |     stream.stop_stream()
118 |     stream.close()
119 |     wf.close()
120 | 
121 |     p.terminate()
122 | 
123 | 
124 | while True:
125 |     rec()
126 |     request = listen()
127 |     response = robot(request)
128 |     speak(response)
129 |     play()
130 | 


--------------------------------------------------------------------------------