├── .gitignore
├── LICENSE
├── README.md
├── audio_player.py
├── audio_recorder.py
├── gpt_service_api.py
├── gva_main.py
├── paddle_service_api.py
├── requirements.txt
├── resource
    ├── ppn
    │   ├── hello-go_en_linux_v2_1_0.ppn
    │   ├── hello-model_en_raspberry-pi_v2_1_0.ppn
    │   ├── hello-model_en_windows_v2_1_0.ppn
    │   └── hello-siri_en_windows_v2_1_0.ppn
    ├── settings.json
    └── wav
    │   ├── wait
    │       └── sikaoyixia.wav
    │   └── wakeup_audio
    │       ├── gansha.wav
    │       └── zaine.wav
├── utils.py
└── wakeup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | settings_me.json
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 csensor
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | https://user-images.githubusercontent.com/7485678/219089373-18906d4f-a5ab-4a92-936b-ee8126028525.mov
 2 | # GPT_Voice_Assistant
 3 | 基于openAI GPT实现的智能交互语音助手,产品主要涉及四个任务：语音唤醒，语音识别，GPT对话，文本转语音
 4 | 
 5 | # 实现的功能
 6 | 通过唤醒词唤醒后，进行语音对话
 7 | 
 8 | # 可玩性
 9 | 1. 可在PC（笔记本、台式机）实现语音助手功能
10 | 2. 运行在树莓派（Raspberry Pi）增加麦克风+喇叭可实现完整语音助手功能
11 | 
12 | # 安装方法
13 | 1. `pip install -r requirements.txt`
14 | 
15 | # 配置
16 | 按需修改配置
17 | ```
18 | {
19 |     "porcupine": {
20 |         "access_key": "",                // porcupine的ak
21 |         "keywords": [],                  // 可使用 porcupine 内置的唤醒词，否则使用keyword_paths
22 |         "keyword_paths": ["ppn/hello-model_en_windows_v2_1_0.ppn"], // 项目下自带了唤醒词模型，在resource/ppn 目录下可供选择
23 |         "sensitivities": [0.5]           // 唤醒词阈值，值越高precision越高，反之recall越高
24 |     },
25 |     "nlp_service": {
26 |         "paddlepaddle": {               
27 |             "asr_url": "http://x.x.x.x:8090/paddlespeech/asr",  // 自己搭建的paddlespeech的asr服务, x 换成ip地址
28 |             "tts_url": "http://x.x.x.x:8090/paddlespeech/tts"   // 自己搭建的paddlespeech的tts服务, x 换成ip地址
29 |         }
30 |     },
31 |     "openai": {
32 |         "api_key": "xxxxxxxxxxxx" // openai 上的api key
33 |     } 
34 | }
35 | ```
36 | 
37 | # 运行方法
38 | 1. `python3 gva.py`
39 | 
40 | # 常见问题
41 | > 如何生成自己的唤醒词？
42 | 
43 | 见 https://console.picovoice.ai/
44 | 
45 | > 如何搭建paddlespeech 服务
46 | 
47 | 见 https://github.com/PaddlePaddle/PaddleSpeech
48 | 
49 | > 安装依赖时报错： "fatal error: portaudio.h: No such file or directory"
50 | 
51 | `sudo apt install portaudio19-dev` 可解决
52 | 


--------------------------------------------------------------------------------
/audio_player.py:
--------------------------------------------------------------------------------
 1 | import pyaudio
 2 | import wave
 3 | import os
 4 | import random
 5 | CHUNK = 2048
 6 | resource_path = os.path.join(os.path.dirname(__file__), 'resource')
 7 | 
 8 | 
 9 | def play_audio(wave_input_path):
10 |     p = pyaudio.PyAudio()
11 |     wf = wave.open(wave_input_path, 'rb')
12 |     stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
13 |                     channels=wf.getnchannels(),
14 |                     rate=wf.getframerate(),
15 |                     output=True)
16 |     data = wf.readframes(CHUNK)
17 |     while len(data) > 0:
18 |         stream.write(data)
19 |         data = wf.readframes(CHUNK)
20 | 
21 |     stream.stop_stream()
22 |     stream.close()
23 |     p.terminate()
24 | 
25 | 
26 | def play_wakeup():
27 |     wav_path = os.path.join(resource_path, 'wav/wakeup_audio')
28 |     wakeup_audio_list = os.listdir(wav_path)
29 |     select_idx = random.randint(0, len(wakeup_audio_list)-1)
30 |     wakeup_audio = wakeup_audio_list[select_idx]
31 |     wakeup_audio_path = os.path.join(wav_path, wakeup_audio)
32 |     play_audio(wakeup_audio_path)
33 | 
34 | 
35 | def play_waiting():
36 |     wav_path = os.path.join(resource_path, 'wav/wait')
37 |     wakeup_audio_list = os.listdir(wav_path)
38 |     select_idx = random.randint(0, len(wakeup_audio_list)-1)
39 |     wakeup_audio = wakeup_audio_list[select_idx]
40 |     wakeup_audio_path = os.path.join(wav_path, wakeup_audio)
41 |     play_audio(wakeup_audio_path)
42 | 


--------------------------------------------------------------------------------
/audio_recorder.py:
--------------------------------------------------------------------------------
 1 | import pyaudio
 2 | import wave
 3 | 
 4 | FORMAT = pyaudio.paInt16
 5 | CHANNELS = 1
 6 | RATE = 16000
 7 | CHUNK = 2048
 8 | 
 9 | 
10 | def record_audio(wave_out_path, record_second):
11 |     p = pyaudio.PyAudio()
12 | 
13 |     stream = p.open(format=FORMAT,
14 |                     channels=CHANNELS,
15 |                     rate=RATE,
16 |                     input=True,
17 |                     frames_per_buffer=CHUNK)
18 |     wf = wave.open(wave_out_path, 'wb')
19 |     wf.setnchannels(CHANNELS)
20 |     wf.setsampwidth(p.get_sample_size(FORMAT))
21 |     wf.setframerate(RATE)
22 | 
23 |     for _ in range(0, int(RATE * record_second / CHUNK)):
24 |         data = stream.read(CHUNK)
25 |         wf.writeframes(data)
26 |     stream.stop_stream()
27 |     stream.close()
28 |     p.terminate()
29 |     wf.close()
30 | 


--------------------------------------------------------------------------------
/gpt_service_api.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | 
 3 | 
 4 | class GPTBot():
 5 |     def __init__(self, settings):
 6 |         self.settings = settings
 7 |         openai.api_key = settings['api_key']
 8 | 
 9 |     def feed_prompt(self, text):
10 |         completions = openai.Completion.create(
11 |             engine="text-davinci-003",
12 |             prompt=text,
13 |             max_tokens=1024,
14 |             n=1,
15 |             stop=None,
16 |             temperature=0.5,
17 |         )
18 |         message = completions.choices[0].text.strip()
19 |         if '？' in message:
20 |             message = message.split('？')[-1]
21 |         return message
22 | 


--------------------------------------------------------------------------------
/gva_main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from wakeup import WakeupHandler
 3 | from audio_player import play_audio, play_wakeup, play_waiting
 4 | from audio_recorder import record_audio
 5 | from paddle_service_api import NLPService
 6 | from gpt_service_api import GPTBot
 7 | from utils import get_json
 8 | import logging
 9 | logging.basicConfig(level=logging.INFO,
10 |                     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class Pipeline():
15 |     def __init__(self, settings):
16 |         self.wakeup_handler = WakeupHandler(settings['porcupine'])
17 |         self.nlp_service = NLPService(settings['nlp_service'])
18 |         self.gpt_bot = GPTBot(settings['openai'])
19 | 
20 |     def run(self):
21 |         logger.info('running...')
22 |         while True:
23 |             keyword = self.wakeup_handler.run_detect_wakeup_word()
24 |             if not keyword:
25 |                 continue
26 |             # 1. Wait for the wake word
27 |             logger.info('wakeup & listening...')
28 |             play_wakeup()
29 | 
30 |             # 2. Record your voice
31 |             temp_audio_path = 'temp.wav'
32 |             record_audio(temp_audio_path, 5)
33 | 
34 |             # 3. Call Automatic Speech Recognition api
35 |             query_text = self.nlp_service.get_asr_result(temp_audio_path)
36 |             if not query_text:
37 |                 logger.info("play not get result")
38 |             logger.info('query text: {}'.format(query_text))
39 | 
40 |             # 4. Wait for GPT reply
41 |             play_waiting()
42 |             response_text = self.gpt_bot.feed_prompt(query_text)
43 |             logger.info('GPT response: {}'.format(response_text))
44 |             temp_response_audio_path = 'temp_res.wav'
45 |             logger.info('gen speech...')
46 | 
47 |             # 5. Call Text to speech api
48 |             self.nlp_service.get_tts_result(
49 |                 response_text, temp_response_audio_path)
50 |             logger.info('play response...')
51 | 
52 |             # 6. Play the speech
53 |             play_audio(temp_response_audio_path)
54 |             logger.info('listening...')
55 | 
56 |             # 7. Clean up temporary files
57 |             os.remove(temp_audio_path)
58 |             os.remove(temp_response_audio_path)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     settings_file = os.path.join(os.path.dirname(
63 |         os.path.abspath(__file__)), 'resource/settings.json')
64 |     settings = get_json(settings_file)
65 |     pipeline = Pipeline(settings)
66 |     pipeline.run()
67 | 


--------------------------------------------------------------------------------
/paddle_service_api.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import json
 3 | import requests
 4 | import soundfile
 5 | import io
 6 | import traceback
 7 | 
 8 | 
 9 | class NLPService():
10 |     def __init__(self, settings):
11 |         self.settings = settings['paddlepaddle']
12 | 
13 |     def get_asr_result(self, wav_file):
14 |         with open(wav_file, 'rb') as f:
15 |             try:
16 |                 base64_bytes = base64.b64encode(f.read())
17 |                 base64_string = base64_bytes.decode('utf-8')
18 | 
19 |                 data = {
20 |                     "audio": base64_string,
21 |                     "audio_format": "wav",
22 |                     "sample_rate": 16000,
23 |                     "lang": "zh_cn",
24 |                     "punc": 0
25 |                 }
26 | 
27 |                 url = self.settings['asr_url']
28 | 
29 |                 payload = json.dumps(data)
30 |                 headers = {
31 |                     'Content-Type': 'application/json'
32 |                 }
33 | 
34 |                 response = requests.request(
35 |                     "POST", url, headers=headers, data=payload).json()
36 |                 return response['result']['transcription']
37 |             except Exception as e:
38 |                 traceback.print_exc()
39 |                 return False
40 | 
41 |     def get_tts_result(self, text, output_path):
42 |         try:
43 |             url = self.settings['tts_url']
44 |             payload = json.dumps({
45 |                 "text": text,
46 |                 "spk_id": 0,
47 |                 "speed": 1,
48 |                 "volume": 1,
49 |                 "sample_rate": 0
50 |             })
51 |             headers = {
52 |                 'Content-Type': 'application/json'
53 |             }
54 | 
55 |             res = requests.request(
56 |                 "POST", url, headers=headers, data=payload).json()
57 |             if not res['success']:
58 |                 return False
59 |             wav_base64 = res['result']['audio']
60 |             audio_data_byte = base64.b64decode(wav_base64)
61 |             samples, sample_rate = soundfile.read(
62 |                 io.BytesIO(audio_data_byte), dtype='float32')
63 |             soundfile.write(output_path, samples, sample_rate)
64 |             return True
65 |         except Exception as e:
66 |             traceback.print_exc()
67 |             return False
68 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/requirements.txt


--------------------------------------------------------------------------------
/resource/ppn/hello-go_en_linux_v2_1_0.ppn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-go_en_linux_v2_1_0.ppn


--------------------------------------------------------------------------------
/resource/ppn/hello-model_en_raspberry-pi_v2_1_0.ppn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-model_en_raspberry-pi_v2_1_0.ppn


--------------------------------------------------------------------------------
/resource/ppn/hello-model_en_windows_v2_1_0.ppn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-model_en_windows_v2_1_0.ppn


--------------------------------------------------------------------------------
/resource/ppn/hello-siri_en_windows_v2_1_0.ppn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/ppn/hello-siri_en_windows_v2_1_0.ppn


--------------------------------------------------------------------------------
/resource/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "porcupine": {
 3 |         "access_key": "xxxxxxxxxxxx",
 4 |         "keywords": [],
 5 |         "keyword_paths": ["ppn/hello-model_en_windows_v2_1_0.ppn"],
 6 |         "sensitivities": [0.5]
 7 |     },
 8 |     "nlp_service": {
 9 |         "paddlepaddle": {
10 |             "asr_url": "http://x.x.x.x:8090/paddlespeech/asr",
11 |             "tts_url": "http://x.x.x.x:8090/paddlespeech/tts"
12 |         }
13 |     },
14 |     "openai": {
15 |         "api_key": "xxxxxxxxxxxx"
16 |     } 
17 | }


--------------------------------------------------------------------------------
/resource/wav/wait/sikaoyixia.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wait/sikaoyixia.wav


--------------------------------------------------------------------------------
/resource/wav/wakeup_audio/gansha.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wakeup_audio/gansha.wav


--------------------------------------------------------------------------------
/resource/wav/wakeup_audio/zaine.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/csensor/GPT_Voice_Assistant/d6edb9283d542ae97356e7f0013048fe8f1c250e/resource/wav/wakeup_audio/zaine.wav


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | def get_json(file_path):
4 |     with open(file_path, 'r') as fi:
5 |         return json.load(fi)


--------------------------------------------------------------------------------
/wakeup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import struct
 3 | import pyaudio
 4 | import pvporcupine
 5 | 
 6 | 
 7 | class WakeupHandler():
 8 |     def __init__(self, settings):
 9 |         resource_path = os.path.join(os.path.dirname(__file__), 'resource')
10 |         access_key = settings['access_key']
11 |         self.pa = pyaudio.PyAudio()
12 |         self.porcupine = pvporcupine.create(
13 |             access_key=access_key,
14 |             keywords=settings['keywords'] if settings['keywords'] else False,
15 |             keyword_paths=[os.path.join(resource_path, model_name)
16 |                            for model_name in settings['keyword_paths']] if settings['keyword_paths'] else False,
17 |             sensitivities=settings['sensitivities'] if settings['sensitivities'] else [
18 |                 0.5]
19 |         )
20 |         self.keywords = settings['keywords'] if settings['keywords'] else [
21 |             name.split('_')[0] for name in settings['keyword_paths']]
22 |         self.audio_stream = self.pa.open(
23 |             rate=self.porcupine.sample_rate,
24 |             channels=1,
25 |             format=pyaudio.paInt16,
26 |             input=True,
27 |             frames_per_buffer=self.porcupine.frame_length
28 |         )
29 | 
30 |     def run_detect_wakeup_word(self):
31 |         pcm = self.audio_stream.read(self.porcupine.frame_length)
32 |         pcm = struct.unpack_from('h' * self.porcupine.frame_length, pcm)
33 |         keyword_index = self.porcupine.process(pcm)
34 |         if keyword_index >= 0:
35 |             return self.keywords[keyword_index]
36 |         else:
37 |             return False
38 | 
39 |     def release(self):
40 |         self.porcupine.delete()
41 | 


--------------------------------------------------------------------------------