├── .gitignore ├── LICENSE ├── README.md ├── audio_player.py ├── audio_recorder.py ├── gpt_service_api.py ├── gva_main.py ├── paddle_service_api.py ├── requirements.txt ├── resource ├── ppn │ ├── hello-go_en_linux_v2_1_0.ppn │ ├── hello-model_en_raspberry-pi_v2_1_0.ppn │ ├── hello-model_en_windows_v2_1_0.ppn │ └── hello-siri_en_windows_v2_1_0.ppn ├── settings.json └── wav │ ├── wait │ └── sikaoyixia.wav │ └── wakeup_audio │ ├── gansha.wav │ └── zaine.wav ├── utils.py └── wakeup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | settings_me.json 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 csensor 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | https://user-images.githubusercontent.com/7485678/219089373-18906d4f-a5ab-4a92-936b-ee8126028525.mov 2 | # GPT_Voice_Assistant 3 | 基于openAI GPT实现的智能交互语音助手,产品主要涉及四个任务:语音唤醒,语音识别,GPT对话,文本转语音 4 | 5 | # 实现的功能 6 | 通过唤醒词唤醒后,进行语音对话 7 | 8 | # 可玩性 9 | 1. 可在PC(笔记本、台式机)实现语音助手功能 10 | 2. 运行在树莓派(Raspberry Pi)增加麦克风+喇叭可实现完整语音助手功能 11 | 12 | # 安装方法 13 | 1. `pip install -r requirements.txt` 14 | 15 | # 配置 16 | 按需修改配置 17 | ``` 18 | { 19 | "porcupine": { 20 | "access_key": "", // porcupine的ak 21 | "keywords": [], // 可使用 porcupine 内置的唤醒词,否则使用keyword_paths 22 | "keyword_paths": ["ppn/hello-model_en_windows_v2_1_0.ppn"], // 项目下自带了唤醒词模型,在resource/ppn 目录下可供选择 23 | "sensitivities": [0.5] // 唤醒词阈值,值越高precision越高,反之recall越高 24 | }, 25 | "nlp_service": { 26 | "paddlepaddle": { 27 | "asr_url": "http://x.x.x.x:8090/paddlespeech/asr", // 自己搭建的paddlespeech的asr服务, x 换成ip地址 28 | "tts_url": "http://x.x.x.x:8090/paddlespeech/tts" // 自己搭建的paddlespeech的tts服务, x 换成ip地址 29 | } 30 | }, 31 | "openai": { 32 | "api_key": "xxxxxxxxxxxx" // openai 上的api key 33 | } 34 | } 35 | ``` 36 | 37 | # 运行方法 38 | 1. `python3 gva.py` 39 | 40 | # 常见问题 41 | > 如何生成自己的唤醒词? 42 | 43 | 见 https://console.picovoice.ai/ 44 | 45 | > 如何搭建paddlespeech 服务 46 | 47 | 见 https://github.com/PaddlePaddle/PaddleSpeech 48 | 49 | > 安装依赖时报错: "fatal error: portaudio.h: No such file or directory" 50 | 51 | `sudo apt install portaudio19-dev` 可解决 52 | -------------------------------------------------------------------------------- /audio_player.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import wave 3 | import os 4 | import random 5 | CHUNK = 2048 6 | resource_path = os.path.join(os.path.dirname(__file__), 'resource') 7 | 8 | 9 | def play_audio(wave_input_path): 10 | p = pyaudio.PyAudio() 11 | wf = wave.open(wave_input_path, 'rb') 12 | stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), 13 | channels=wf.getnchannels(), 14 | rate=wf.getframerate(), 15 | output=True) 16 | data = wf.readframes(CHUNK) 17 | while len(data) > 0: 18 | stream.write(data) 19 | data = wf.readframes(CHUNK) 20 | 21 | stream.stop_stream() 22 | stream.close() 23 | p.terminate() 24 | 25 | 26 | def play_wakeup(): 27 | wav_path = os.path.join(resource_path, 'wav/wakeup_audio') 28 | wakeup_audio_list = os.listdir(wav_path) 29 | select_idx = random.randint(0, len(wakeup_audio_list)-1) 30 | wakeup_audio = wakeup_audio_list[select_idx] 31 | wakeup_audio_path = os.path.join(wav_path, wakeup_audio) 32 | play_audio(wakeup_audio_path) 33 | 34 | 35 | def play_waiting(): 36 | wav_path = os.path.join(resource_path, 'wav/wait') 37 | wakeup_audio_list = os.listdir(wav_path) 38 | select_idx = random.randint(0, len(wakeup_audio_list)-1) 39 | wakeup_audio = wakeup_audio_list[select_idx] 40 | wakeup_audio_path = os.path.join(wav_path, wakeup_audio) 41 | play_audio(wakeup_audio_path) 42 | -------------------------------------------------------------------------------- /audio_recorder.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import wave 3 | 4 | FORMAT = pyaudio.paInt16 5 | CHANNELS = 1 6 | RATE = 16000 7 | CHUNK = 2048 8 | 9 | 10 | def record_audio(wave_out_path, record_second): 11 | p = pyaudio.PyAudio() 12 | 13 | stream = p.open(format=FORMAT, 14 | channels=CHANNELS, 15 | rate=RATE, 16 | input=True, 17 | frames_per_buffer=CHUNK) 18 | wf = wave.open(wave_out_path, 'wb') 19 | wf.setnchannels(CHANNELS) 20 | wf.setsampwidth(p.get_sample_size(FORMAT)) 21 | wf.setframerate(RATE) 22 | 23 | for _ in range(0, int(RATE * record_second / CHUNK)): 24 | data = stream.read(CHUNK) 25 | wf.writeframes(data) 26 | stream.stop_stream() 27 | stream.close() 28 | p.terminate() 29 | wf.close() 30 | -------------------------------------------------------------------------------- /gpt_service_api.py: -------------------------------------------------------------------------------- 1 | import openai 2 | 3 | 4 | class GPTBot(): 5 | def __init__(self, settings): 6 | self.settings = settings 7 | openai.api_key = settings['api_key'] 8 | 9 | def feed_prompt(self, text): 10 | completions = openai.Completion.create( 11 | engine="text-davinci-003", 12 | prompt=text, 13 | max_tokens=1024, 14 | n=1, 15 | stop=None, 16 | temperature=0.5, 17 | ) 18 | message = completions.choices[0].text.strip() 19 | if '?' in message: 20 | message = message.split('?')[-1] 21 | return message 22 | -------------------------------------------------------------------------------- /gva_main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from wakeup import WakeupHandler 3 | from audio_player import play_audio, play_wakeup, play_waiting 4 | from audio_recorder import record_audio 5 | from paddle_service_api import NLPService 6 | from gpt_service_api import GPTBot 7 | from utils import get_json 8 | import logging 9 | logging.basicConfig(level=logging.INFO, 10 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class Pipeline(): 15 | def __init__(self, settings): 16 | self.wakeup_handler = WakeupHandler(settings['porcupine']) 17 | self.nlp_service = NLPService(settings['nlp_service']) 18 | self.gpt_bot = GPTBot(settings['openai']) 19 | 20 | def run(self): 21 | logger.info('running...') 22 | while True: 23 | keyword = self.wakeup_handler.run_detect_wakeup_word() 24 | if not keyword: 25 | continue 26 | # 1. Wait for the wake word 27 | logger.info('wakeup & listening...') 28 | play_wakeup() 29 | 30 | # 2. Record your voice 31 | temp_audio_path = 'temp.wav' 32 | record_audio(temp_audio_path, 5) 33 | 34 | # 3. Call Automatic Speech Recognition api 35 | query_text = self.nlp_service.get_asr_result(temp_audio_path) 36 | if not query_text: 37 | logger.info("play not get result") 38 | logger.info('query text: {}'.format(query_text)) 39 | 40 | # 4. Wait for GPT reply 41 | play_waiting() 42 | response_text = self.gpt_bot.feed_prompt(query_text) 43 | logger.info('GPT response: {}'.format(response_text)) 44 | temp_response_audio_path = 'temp_res.wav' 45 | logger.info('gen speech...') 46 | 47 | # 5. Call Text to speech api 48 | self.nlp_service.get_tts_result( 49 | response_text, temp_response_audio_path) 50 | logger.info('play response...') 51 | 52 | # 6. Play the speech 53 | play_audio(temp_response_audio_path) 54 | logger.info('listening...') 55 | 56 | # 7. Clean up temporary files 57 | os.remove(temp_audio_path) 58 | os.remove(temp_response_audio_path) 59 | 60 | 61 | if __name__ == '__main__': 62 | settings_file = os.path.join(os.path.dirname( 63 | os.path.abspath(__file__)), 'resource/settings.json') 64 | settings = get_json(settings_file) 65 | pipeline = Pipeline(settings) 66 | pipeline.run() 67 | -------------------------------------------------------------------------------- /paddle_service_api.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import requests 4 | import soundfile 5 | import io 6 | import traceback 7 | 8 | 9 | class NLPService(): 10 | def __init__(self, settings): 11 | self.settings = settings['paddlepaddle'] 12 | 13 | def get_asr_result(self, wav_file): 14 | with open(wav_file, 'rb') as f: 15 | try: 16 | base64_bytes = base64.b64encode(f.read()) 17 | base64_string = base64_bytes.decode('utf-8') 18 | 19 | data = { 20 | "audio": base64_string, 21 | "audio_format": "wav", 22 | "sample_rate": 16000, 23 | "lang": "zh_cn", 24 | "punc": 0 25 | } 26 | 27 | url = self.settings['asr_url'] 28 | 29 | payload = json.dumps(data) 30 | headers = { 31 | 'Content-Type': 'application/json' 32 | } 33 | 34 | response = requests.request( 35 | "POST", url, headers=headers, data=payload).json() 36 | return response['result']['transcription'] 37 | except Exception as e: 38 | traceback.print_exc() 39 | return False 40 | 41 | def get_tts_result(self, text, output_path): 42 | try: 43 | url = self.settings['tts_url'] 44 | payload = json.dumps({ 45 | "text": text, 46 | "spk_id": 0, 47 | "speed": 1, 48 | "volume": 1, 49 | "sample_rate": 0 50 | }) 51 | headers = { 52 | 'Content-Type': 'application/json' 53 | } 54 | 55 | res = requests.request( 56 | "POST", url, headers=headers, data=payload).json() 57 | if not res['success']: 58 | return False 59 | wav_base64 = res['result']['audio'] 60 | audio_data_byte = base64.b64decode(wav_base64) 61 | samples, sample_rate = soundfile.read( 62 | io.BytesIO(audio_data_byte), dtype='float32') 63 | soundfile.write(output_path, samples, sample_rate) 64 | return True 65 | except Exception as e: 66 | traceback.print_exc() 67 | return False 68 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/requirements.txt -------------------------------------------------------------------------------- /resource/ppn/hello-go_en_linux_v2_1_0.ppn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-go_en_linux_v2_1_0.ppn -------------------------------------------------------------------------------- /resource/ppn/hello-model_en_raspberry-pi_v2_1_0.ppn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-model_en_raspberry-pi_v2_1_0.ppn -------------------------------------------------------------------------------- /resource/ppn/hello-model_en_windows_v2_1_0.ppn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-model_en_windows_v2_1_0.ppn -------------------------------------------------------------------------------- /resource/ppn/hello-siri_en_windows_v2_1_0.ppn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-siri_en_windows_v2_1_0.ppn -------------------------------------------------------------------------------- /resource/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "porcupine": { 3 | "access_key": "xxxxxxxxxxxx", 4 | "keywords": [], 5 | "keyword_paths": ["ppn/hello-model_en_windows_v2_1_0.ppn"], 6 | "sensitivities": [0.5] 7 | }, 8 | "nlp_service": { 9 | "paddlepaddle": { 10 | "asr_url": "http://x.x.x.x:8090/paddlespeech/asr", 11 | "tts_url": "http://x.x.x.x:8090/paddlespeech/tts" 12 | } 13 | }, 14 | "openai": { 15 | "api_key": "xxxxxxxxxxxx" 16 | } 17 | } -------------------------------------------------------------------------------- /resource/wav/wait/sikaoyixia.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wait/sikaoyixia.wav -------------------------------------------------------------------------------- /resource/wav/wakeup_audio/gansha.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wakeup_audio/gansha.wav -------------------------------------------------------------------------------- /resource/wav/wakeup_audio/zaine.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wakeup_audio/zaine.wav -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def get_json(file_path): 4 | with open(file_path, 'r') as fi: 5 | return json.load(fi) -------------------------------------------------------------------------------- /wakeup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import struct 3 | import pyaudio 4 | import pvporcupine 5 | 6 | 7 | class WakeupHandler(): 8 | def __init__(self, settings): 9 | resource_path = os.path.join(os.path.dirname(__file__), 'resource') 10 | access_key = settings['access_key'] 11 | self.pa = pyaudio.PyAudio() 12 | self.porcupine = pvporcupine.create( 13 | access_key=access_key, 14 | keywords=settings['keywords'] if settings['keywords'] else False, 15 | keyword_paths=[os.path.join(resource_path, model_name) 16 | for model_name in settings['keyword_paths']] if settings['keyword_paths'] else False, 17 | sensitivities=settings['sensitivities'] if settings['sensitivities'] else [ 18 | 0.5] 19 | ) 20 | self.keywords = settings['keywords'] if settings['keywords'] else [ 21 | name.split('_')[0] for name in settings['keyword_paths']] 22 | self.audio_stream = self.pa.open( 23 | rate=self.porcupine.sample_rate, 24 | channels=1, 25 | format=pyaudio.paInt16, 26 | input=True, 27 | frames_per_buffer=self.porcupine.frame_length 28 | ) 29 | 30 | def run_detect_wakeup_word(self): 31 | pcm = self.audio_stream.read(self.porcupine.frame_length) 32 | pcm = struct.unpack_from('h' * self.porcupine.frame_length, pcm) 33 | keyword_index = self.porcupine.process(pcm) 34 | if keyword_index >= 0: 35 | return self.keywords[keyword_index] 36 | else: 37 | return False 38 | 39 | def release(self): 40 | self.porcupine.delete() 41 | --------------------------------------------------------------------------------