├── sox-dll ├── libmad.dll └── libmp3lame.dll ├── README.md ├── Amber.py └── src └── ai.py /sox-dll/libmad.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinli2/Ai-assistent-Amber/HEAD/sox-dll/libmad.dll -------------------------------------------------------------------------------- /sox-dll/libmp3lame.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinli2/Ai-assistent-Amber/HEAD/sox-dll/libmp3lame.dll -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Voice-Assistant-Amber 2 | 3 | A Python based Voice Assistant like Siri, using [SpeechRecognition](https://pypi.org/project/SpeechRecognition/), [Baidu Speech](https://cloud.baidu.com/doc/SPEECH/index.html), [Turing](https://www.kancloud.cn/turing/www-tuling123-com/718218) and [PyAudio](http://people.csail.mit.edu/hubert/pyaudio/). 4 | 5 | Nothing special, just for fun. 6 | 7 | 8 | ### Requirements 9 | * [PyAudio](https://pypi.org/project/PyAudio/) 10 | * [SpeechRecognition](https://pypi.org/project/SpeechRecognition/) 11 | * [baidu-aip](https://pypi.org/project/baidu-aip/) 12 | * [SoX](http://sox.sourceforge.net/) with mp3 support 13 | -------------------------------------------------------------------------------- /Amber.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | #Old version 4 | userid = str('Eliza') 5 | apikey = '#########' 6 | 7 | # Create post function 8 | def robot(content): 9 | # Amber api 10 | api = r'#########' 11 | # Create post submitted data 12 | data = { 13 | "perception": { 14 | "inputText": { 15 | "text": content 16 | } 17 | }, 18 | "userInfo": { 19 | "apiKey": apikey, 20 | "userId": userid, 21 | } 22 | } 23 | # Convert to json format 24 | jsondata = json.dumps(data) 25 | # Initiate a post request 26 | response = requests.post(api, data = jsondata) 27 | # Decode the returned json data 28 | robot_res = json.loads(response.content) 29 | # Extract conversation data 30 | print(robot_res["results"][0]['values']['text']) 31 | 32 | for x in range(100): 33 | content = input("talk:") 34 | # Enter the content of the conversation 35 | robot(content) 36 | if x == 100: 37 | break 38 | 39 | while True: 40 | content = input("talk:") 41 | # Enter the content of the conversation 42 | robot(content) 43 | if content == 'bye': 44 | # Set stopwords 45 | break 46 | 47 | # # Create an endless loop of dialogue 48 | # while True: 49 | # # Enter the content of the conversation 50 | # content = input("talk:") 51 | -------------------------------------------------------------------------------- /src/ai.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import pyaudio 4 | import wave 5 | 6 | import speech_recognition as sr 7 | from aip import AipSpeech 8 | 9 | import requests 10 | import json 11 | 12 | # Baidu Speech API, replace with your personal key 13 | APP_ID = '#######' 14 | API_KEY = '######' 15 | SECRET_KEY = '######' 16 | 17 | client = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 18 | 19 | 20 | # Turing API, replace with your personal key 21 | TURING_KEY = "4687764e6cb14d0ca1b351fb10e4c595" 22 | URL = "http://openapi.tuling123.com/openapi/api/v2" 23 | HEADERS = {'Content-Type': 'application/json;charset=UTF-8'} 24 | 25 | 26 | # Use SpeechRecognition to record 27 | def rec(rate=16000): 28 | r = sr.Recognizer() 29 | with sr.Microphone(sample_rate=rate) as source: 30 | print("please say something") 31 | audio = r.listen(source) 32 | 33 | with open("recording.wav", "wb") as f: 34 | f.write(audio.get_wav_data()) 35 | 36 | 37 | # Use Baidu Speech as STT engine 38 | def listen(): 39 | with open('recording.wav', 'rb') as f: 40 | audio_data = f.read() 41 | 42 | result = client.asr(audio_data, 'wav', 16000, { 43 | 'dev_pid': 1536, 44 | }) 45 | 46 | result_text = result["result"][0] 47 | 48 | print("you said: " + result_text) 49 | 50 | return result_text 51 | 52 | 53 | # The Turing chatbot 54 | def robot(text=""): 55 | data = { 56 | "reqType": 0, 57 | "perception": { 58 | "inputText": { 59 | "text": "" 60 | }, 61 | "selfInfo": { 62 | "location": { 63 | "city": "Tucson", 64 | "street": "Helen Street" 65 | } 66 | } 67 | }, 68 | "userInfo": { 69 | "apiKey": TURING_KEY, 70 | "userId": "starky" 71 | } 72 | } 73 | 74 | data["perception"]["inputText"]["text"] = text 75 | response = requests.request("post", URL, json=data, headers=HEADERS) 76 | response_dict = json.loads(response.text) 77 | 78 | result = response_dict["results"][0]["values"]["text"] 79 | print("the AI said: " + result) 80 | return result 81 | 82 | 83 | # Baidu Speech as TTS engine 84 | def speak(text=""): 85 | result = client.synthesis(text, 'zh', 1, { 86 | 'spd': 4, 87 | 'vol': 5, 88 | 'per': 4, 89 | }) 90 | 91 | if not isinstance(result, dict): 92 | with open('audio.mp3', 'wb') as f: 93 | f.write(result) 94 | 95 | 96 | # Pyaudio to play mp3 file 97 | def play(): 98 | os.system('sox audio.mp3 audio.wav') 99 | wf = wave.open('audio.wav', 'rb') 100 | p = pyaudio.PyAudio() 101 | 102 | def callback(in_data, frame_count, time_info, status): 103 | data = wf.readframes(frame_count) 104 | return (data, pyaudio.paContinue) 105 | 106 | stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), 107 | channels=wf.getnchannels(), 108 | rate=wf.getframerate(), 109 | output=True, 110 | stream_callback=callback) 111 | 112 | stream.start_stream() 113 | 114 | while stream.is_active(): 115 | time.sleep(0.1) 116 | 117 | stream.stop_stream() 118 | stream.close() 119 | wf.close() 120 | 121 | p.terminate() 122 | 123 | 124 | 125 | while True: 126 | rec() 127 | request = listen() 128 | response = robot(request) 129 | speak(response) 130 | play() 131 | --------------------------------------------------------------------------------