├── README.md
├── asr.py
├── demo.py
├── getsound.py
├── gpt.py
├── resources
    ├── common.res
    ├── javs.pmdl
    ├── new.pmdl
    └── snowboy.umdl
├── snowboydecoder.py
├── sound
    ├── bye.wav
    ├── error.wav
    ├── hi.wav
    ├── result.wav
    ├── right.wav
    ├── start.wav
    └── wrong.wav
└── tts.py


/README.md:
--------------------------------------------------------------------------------
1 | # linux-gpt-assistant
2 | 欢迎学习交流
3 | 


--------------------------------------------------------------------------------
/asr.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import sys
  4 | import json
  5 | import base64
  6 | import time
  7 | import requests
  8 | 
  9 | IS_PY3 = sys.version_info.major == 3
 10 | 
 11 | 
 12 | from urllib.request import urlopen
 13 | from urllib.request import Request
 14 | from urllib.error import URLError
 15 | from urllib.parse import urlencode
 16 | timer = time.perf_counter
 17 | 
 18 | #百度api
 19 | API_KEY = ''
 20 | SECRET_KEY = ''
 21 | 
 22 | # 需要识别的文件
 23 | AUDIO_FILE = ''  # 只支持 pcm/wav/amr 格式，极速版额外支持m4a 格式
 24 | # 文件格式
 25 | FORMAT = "" # 文件后缀只支持 pcm/wav/amr 格式，极速版额外支持m4a 格式
 26 | 
 27 | CUID = '123456PYTHON'
 28 | # 采样率
 29 | RATE = 16000  # 固定值
 30 | 
 31 | # 普通版
 32 | 
 33 | DEV_PID = 1537  # 1537 表示识别普通话，使用输入法模型。根据文档填写PID，选择语言及识别模型
 34 | ASR_URL = 'http://vop.baidu.com/server_api'
 35 | SCOPE = 'audio_voice_assistant_get'  # 有此scope表示有asr能力，没有请在网页里勾选，非常旧的应用可能没有
 36 | 
 37 | #测试自训练平台需要打开以下信息， 自训练平台模型上线后，您会看见 第二步：“”获取专属模型参数pid:8001，modelid:1234”，按照这个信息获取 dev_pid=8001，lm_id=1234
 38 | # DEV_PID = 8001 ;   
 39 | # LM_ID = 1234 ;
 40 | 
 41 | # 极速版 打开注释的话请填写自己申请的appkey appSecret ，并在网页中开通极速版（开通后可能会收费）
 42 | 
 43 | # DEV_PID = 80001
 44 | # ASR_URL = 'http://vop.baidu.com/pro_api'
 45 | # SCOPE = 'brain_enhanced_asr'  # 有此scope表示有极速版能力，没有请在网页里开通极速版
 46 | 
 47 | # 忽略scope检查，非常旧的应用可能没有
 48 | # SCOPE = False
 49 | 
 50 | class DemoError(Exception):
 51 |     pass
 52 | 
 53 | 
 54 | """  TOKEN start """
 55 | 
 56 | TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'
 57 | 
 58 | 
 59 | def fetch_token():
 60 |     params = {'grant_type': 'client_credentials',
 61 |               'client_id': API_KEY,
 62 |               'client_secret': SECRET_KEY}
 63 |     post_data = urlencode(params)
 64 |     if (IS_PY3):
 65 |         post_data = post_data.encode( 'utf-8')
 66 |     req = Request(TOKEN_URL, post_data)
 67 |     try:
 68 |         f = urlopen(req)
 69 |         result_str = f.read()
 70 |     except URLError as err:
 71 |         # print('token http response http code : ' + str(err.code))
 72 |         result_str = err.read()
 73 |     if (IS_PY3):
 74 |         result_str =  result_str.decode()
 75 | 
 76 |     # print(result_str)
 77 |     result = json.loads(result_str)
 78 |     # print(result)
 79 |     if ('access_token' in result.keys() and 'scope' in result.keys()):
 80 |         # print(SCOPE)
 81 |         if SCOPE and (not SCOPE in result['scope'].split(' ')):  # SCOPE = False 忽略检查
 82 |             raise DemoError('scope is not correct')
 83 |         # print('SUCCESS WITH TOKEN: %s  EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
 84 |         return result['access_token']
 85 |     else:
 86 |         raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
 87 | 
 88 | """  TOKEN end """
 89 | 
 90 | def getasr(filename):
 91 |     AUDIO_FILE = filename
 92 |     FORMAT = AUDIO_FILE[-3:] 
 93 | 
 94 |     token = fetch_token()
 95 |     speech_data = []
 96 |     with open(AUDIO_FILE, 'rb') as speech_file:
 97 |         speech_data = speech_file.read()
 98 | 
 99 |     length = len(speech_data)
100 |     if length == 0:
101 |         raise DemoError('file %s length read 0 bytes' % AUDIO_FILE)
102 |     speech = base64.b64encode(speech_data)
103 |     speech = str(speech, 'utf-8')
104 |     
105 |     params = {'dev_pid': DEV_PID,
106 |              #"lm_id" : LM_ID,    #测试自训练平台开启此项
107 |               'format': FORMAT,
108 |               'rate': RATE,
109 |               'token': token,
110 |               'cuid': CUID,
111 |               'channel': 1,
112 |               'speech': speech,
113 |               'len': length
114 |               }
115 |     headers={
116 |         "Content-Type":'application/json'
117 |     }
118 |     r=requests.post(url=ASR_URL,headers=headers,data=json.dumps(params))
119 |     work=r.json()
120 |     return(work['result'][0])
121 |     
122 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import snowboydecoder
  2 | import sys
  3 | import signal
  4 | import getsound
  5 | import os
  6 | import time
  7 | 
  8 | import tts
  9 | import asr
 10 | import gpt
 11 | 
 12 | 
 13 | interrupted = False
 14 | model =['resources/javs.pmdl','resources/new.pmdl']
 15 | 
 16 | os.close(sys.stderr.fileno())
 17 | 
 18 | callbacks = [lambda: callbacks1(),
 19 |              lambda: callbacks2()]
 20 | 
 21 | def callbacks1():
 22 |     msg=[{"role": "system", "content": "你说话的语气萌萌哒，用词十分可爱"}]
 23 |     detector.terminate()
 24 |     print("检测成功!")
 25 | 
 26 |     os.system('aplay -D default sound/hi.wav')
 27 |     try:
 28 |         getsound.rec()
 29 |         qu=asr.getasr('sound/question.wav')
 30 |         print('问:'+qu)
 31 |         msg.append({"role": "user", "content": qu})
 32 |         if qu!='':
 33 |             an=gpt.getgpt(msg)
 34 |             print('答:'+an)
 35 |             tts.gettts(an)
 36 |             os.system('aplay -D default sound/result.wav')
 37 |     except:
 38 |         os.system('aplay -D default sound/error.wav')
 39 |         print("发生错误")
 40 | 
 41 |     os.system('aplay -D default sound/bye.wav')
 42 |     print("结束对话!")
 43 | 
 44 |     print('检测中...')
 45 |     detector.start(detected_callback=callbacks,
 46 |                 interrupt_check=interrupt_callback,
 47 |                 sleep_time=0.03)
 48 |     
 49 | def callbacks2():
 50 |     msg=[{"role": "system", "content": "你说话的语气萌萌哒，用词十分可爱"}]
 51 |     detector.terminate()
 52 |     print("检测成功!")
 53 | 
 54 |     os.system('aplay -D default sound/start.wav')
 55 |     try:
 56 |         getsound.rec()
 57 |         qu=asr.getasr('sound/question.wav')
 58 |         print('输入:'+qu)
 59 |         if '我爱你' in qu:
 60 |             os.system('aplay -D default sound/right.wav')
 61 |             for i in range(5):
 62 |                 getsound.rec()
 63 |                 qu=asr.getasr('sound/question.wav')
 64 |                 print('问:'+qu)
 65 |                 msg.append({"role": "user", "content": qu})
 66 |                 if '再见' in qu: break
 67 |                 if qu=='':break
 68 |                 an=gpt.getgpt(msg)
 69 |                 msg.append({"role": "assistant", "content": an})
 70 |                 print('答:'+an)
 71 |                 tts.gettts(an)
 72 |                 os.system('aplay -D default sound/result.wav')
 73 |         else:
 74 |             os.system('aplay -D default sound/wrong.wav')
 75 |             print("密码错误")
 76 |     except:
 77 |         os.system('aplay -D default sound/error.wav')
 78 |         print("发生错误")
 79 |     
 80 |     os.system('aplay -D default sound/bye.wav')
 81 |     print("结束对话!")
 82 | 
 83 |     print('检测中...')
 84 |     detector.start(detected_callback=callbacks,
 85 |                 interrupt_check=interrupt_callback,
 86 |                 sleep_time=0.03)
 87 | 
 88 | def signal_handler(signal, frame):
 89 |     global interrupted
 90 |     interrupted = True
 91 | 
 92 | 
 93 | def interrupt_callback():
 94 |     global interrupted
 95 |     return interrupted
 96 | 
 97 | # capture SIGINT signal, e.g., Ctrl+C
 98 | signal.signal(signal.SIGINT, signal_handler)
 99 | 
100 | detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
101 |     
102 | # main loop
103 | 
104 | print('检测中...')
105 | detector.start(detected_callback=callbacks,
106 |                interrupt_check=interrupt_callback,
107 |                sleep_time=0.03)
108 | 
109 | 
110 | detector.terminate()


--------------------------------------------------------------------------------
/getsound.py:
--------------------------------------------------------------------------------
 1 | import speech_recognition as sr
 2 | 
 3 | #从系统麦克风拾取音频数据，采样率为 16000
 4 | def rec():
 5 |     rate=16000
 6 |     r = sr.Recognizer()
 7 |     with sr.Microphone(sample_rate=rate) as source:
 8 |         print('正在获取声音中...')
 9 |         audio = r.listen(source,timeout=3,phrase_time_limit=13)
10 |     with open("sound/question.wav", "wb") as f:
11 |         f.write(audio.get_wav_data())
12 |         print('声音获取完成.')
13 | 


--------------------------------------------------------------------------------
/gpt.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | 
 3 | openai.api_key = ""  # 你的 OpenAI API Key
 4 | 
 5 | def getgpt(msg):
 6 |     completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
 7 |                                         messages=msg,
 8 |                                         max_tokens=2048,
 9 |                                         temperature=1.2)
10 |     return completion.choices[0].message.content
11 | 


--------------------------------------------------------------------------------
/resources/common.res:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/resources/common.res


--------------------------------------------------------------------------------
/resources/javs.pmdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/resources/javs.pmdl


--------------------------------------------------------------------------------
/resources/new.pmdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/resources/new.pmdl


--------------------------------------------------------------------------------
/resources/snowboy.umdl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/resources/snowboy.umdl


--------------------------------------------------------------------------------
/snowboydecoder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import collections
  4 | import pyaudio
  5 | import snowboydetect
  6 | import time
  7 | import wave
  8 | import os
  9 | import logging
 10 | from ctypes import *
 11 | from contextlib import contextmanager
 12 | 
 13 | logging.basicConfig()
 14 | logger = logging.getLogger("snowboy")
 15 | logger.setLevel(logging.INFO)
 16 | TOP_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | 
 18 | RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
 19 | DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
 20 | DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
 21 | 
 22 | def py_error_handler(filename, line, function, err, fmt):
 23 |     pass
 24 | 
 25 | ERROR_HANDLER_FUNC = CFUNCTYPE(None, c_char_p, c_int, c_char_p, c_int, c_char_p)
 26 | 
 27 | c_error_handler = ERROR_HANDLER_FUNC(py_error_handler)
 28 | 
 29 | @contextmanager
 30 | def no_alsa_error():
 31 |     try:
 32 |         asound = cdll.LoadLibrary('libasound.so')
 33 |         asound.snd_lib_error_set_handler(c_error_handler)
 34 |         yield
 35 |         asound.snd_lib_error_set_handler(None)
 36 |     except:
 37 |         yield
 38 |         pass
 39 | 
 40 | class RingBuffer(object):
 41 |     """Ring buffer to hold audio from PortAudio"""
 42 | 
 43 |     def __init__(self, size=4096):
 44 |         self._buf = collections.deque(maxlen=size)
 45 | 
 46 |     def extend(self, data):
 47 |         """Adds data to the end of buffer"""
 48 |         self._buf.extend(data)
 49 | 
 50 |     def get(self):
 51 |         """Retrieves data from the beginning of buffer and clears it"""
 52 |         tmp = bytes(bytearray(self._buf))
 53 |         self._buf.clear()
 54 |         return tmp
 55 | 
 56 | 
 57 | def play_audio_file(fname=DETECT_DING):
 58 |     """Simple callback function to play a wave file. By default it plays
 59 |     a Ding sound.
 60 | 
 61 |     :param str fname: wave file name
 62 |     :return: None
 63 |     """
 64 |     ding_wav = wave.open(fname, 'rb')
 65 |     ding_data = ding_wav.readframes(ding_wav.getnframes())
 66 |     with no_alsa_error():
 67 |         audio = pyaudio.PyAudio()
 68 |     stream_out = audio.open(
 69 |         format=audio.get_format_from_width(ding_wav.getsampwidth()),
 70 |         channels=ding_wav.getnchannels(),
 71 |         rate=ding_wav.getframerate(), input=False, output=True)
 72 |     stream_out.start_stream()
 73 |     stream_out.write(ding_data)
 74 |     time.sleep(0.2)
 75 |     stream_out.stop_stream()
 76 |     stream_out.close()
 77 |     audio.terminate()
 78 | 
 79 | 
 80 | class HotwordDetector(object):
 81 |     """
 82 |     Snowboy decoder to detect whether a keyword specified by `decoder_model`
 83 |     exists in a microphone input stream.
 84 | 
 85 |     :param decoder_model: decoder model file path, a string or a list of strings
 86 |     :param resource: resource file path.
 87 |     :param sensitivity: decoder sensitivity, a float of a list of floats.
 88 |                               The bigger the value, the more senstive the
 89 |                               decoder. If an empty list is provided, then the
 90 |                               default sensitivity in the model will be used.
 91 |     :param audio_gain: multiply input volume by this factor.
 92 |     :param apply_frontend: applies the frontend processing algorithm if True.
 93 |     """
 94 | 
 95 |     def __init__(self, decoder_model,
 96 |                  resource=RESOURCE_FILE,
 97 |                  sensitivity=[],
 98 |                  audio_gain=1,
 99 |                  apply_frontend=False):
100 | 
101 |         tm = type(decoder_model)
102 |         ts = type(sensitivity)
103 |         if tm is not list:
104 |             decoder_model = [decoder_model]
105 |         if ts is not list:
106 |             sensitivity = [sensitivity]
107 |         model_str = ",".join(decoder_model)
108 | 
109 |         self.detector = snowboydetect.SnowboyDetect(
110 |             resource_filename=resource.encode(), model_str=model_str.encode())
111 |         self.detector.SetAudioGain(audio_gain)
112 |         self.detector.ApplyFrontend(apply_frontend)
113 |         self.num_hotwords = self.detector.NumHotwords()
114 | 
115 |         if len(decoder_model) > 1 and len(sensitivity) == 1:
116 |             sensitivity = sensitivity * self.num_hotwords
117 |         if len(sensitivity) != 0:
118 |             assert self.num_hotwords == len(sensitivity), \
119 |                 "number of hotwords in decoder_model (%d) and sensitivity " \
120 |                 "(%d) does not match" % (self.num_hotwords, len(sensitivity))
121 |         sensitivity_str = ",".join([str(t) for t in sensitivity])
122 |         if len(sensitivity) != 0:
123 |             self.detector.SetSensitivity(sensitivity_str.encode())
124 | 
125 |         self.ring_buffer = RingBuffer(
126 |             self.detector.NumChannels() * self.detector.SampleRate() * 5)
127 | 
128 |     def start(self, detected_callback=play_audio_file,
129 |               interrupt_check=lambda: False,
130 |               sleep_time=0.03,
131 |               audio_recorder_callback=None,
132 |               silent_count_threshold=15,
133 |               recording_timeout=100):
134 |         """
135 |         Start the voice detector. For every `sleep_time` second it checks the
136 |         audio buffer for triggering keywords. If detected, then call
137 |         corresponding function in `detected_callback`, which can be a single
138 |         function (single model) or a list of callback functions (multiple
139 |         models). Every loop it also calls `interrupt_check` -- if it returns
140 |         True, then breaks from the loop and return.
141 | 
142 |         :param detected_callback: a function or list of functions. The number of
143 |                                   items must match the number of models in
144 |                                   `decoder_model`.
145 |         :param interrupt_check: a function that returns True if the main loop
146 |                                 needs to stop.
147 |         :param float sleep_time: how much time in second every loop waits.
148 |         :param audio_recorder_callback: if specified, this will be called after
149 |                                         a keyword has been spoken and after the
150 |                                         phrase immediately after the keyword has
151 |                                         been recorded. The function will be
152 |                                         passed the name of the file where the
153 |                                         phrase was recorded.
154 |         :param silent_count_threshold: indicates how long silence must be heard
155 |                                        to mark the end of a phrase that is
156 |                                        being recorded.
157 |         :param recording_timeout: limits the maximum length of a recording.
158 |         :return: None
159 |         """
160 |         self._running = True
161 | 
162 |         def audio_callback(in_data, frame_count, time_info, status):
163 |             self.ring_buffer.extend(in_data)
164 |             play_data = chr(0) * len(in_data)
165 |             return play_data, pyaudio.paContinue
166 | 
167 |         with no_alsa_error():
168 |             self.audio = pyaudio.PyAudio()
169 |         self.stream_in = self.audio.open(
170 |             input=True, output=False,
171 |             format=self.audio.get_format_from_width(
172 |                 self.detector.BitsPerSample() / 8),
173 |             channels=self.detector.NumChannels(),
174 |             rate=self.detector.SampleRate(),
175 |             frames_per_buffer=2048,
176 |             stream_callback=audio_callback)
177 | 
178 |         if interrupt_check():
179 |             logger.debug("detect voice return")
180 |             return
181 | 
182 |         tc = type(detected_callback)
183 |         if tc is not list:
184 |             detected_callback = [detected_callback]
185 |         if len(detected_callback) == 1 and self.num_hotwords > 1:
186 |             detected_callback *= self.num_hotwords
187 | 
188 |         assert self.num_hotwords == len(detected_callback), \
189 |             "Error: hotwords in your models (%d) do not match the number of " \
190 |             "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
191 | 
192 |         logger.debug("detecting...")
193 | 
194 |         state = "PASSIVE"
195 |         while self._running is True:
196 |             if interrupt_check():
197 |                 logger.debug("detect voice break")
198 |                 break
199 |             data = self.ring_buffer.get()
200 |             if len(data) == 0:
201 |                 time.sleep(sleep_time)
202 |                 continue
203 | 
204 |             status = self.detector.RunDetection(data)
205 |             if status == -1:
206 |                 logger.warning("Error initializing streams or reading audio data")
207 | 
208 |             #small state machine to handle recording of phrase after keyword
209 |             if state == "PASSIVE":
210 |                 if status > 0: #key word found
211 |                     self.recordedData = []
212 |                     self.recordedData.append(data)
213 |                     silentCount = 0
214 |                     recordingCount = 0
215 |                     message = "Keyword " + str(status) + " detected at time: "
216 |                     message += time.strftime("%Y-%m-%d %H:%M:%S",
217 |                                          time.localtime(time.time()))
218 |                     logger.info(message)
219 |                     callback = detected_callback[status-1]
220 |                     if callback is not None:
221 |                         callback()
222 | 
223 |                     if audio_recorder_callback is not None:
224 |                         state = "ACTIVE"
225 |                     continue
226 | 
227 |             elif state == "ACTIVE":
228 |                 stopRecording = False
229 |                 if recordingCount > recording_timeout:
230 |                     stopRecording = True
231 |                 elif status == -2: #silence found
232 |                     if silentCount > silent_count_threshold:
233 |                         stopRecording = True
234 |                     else:
235 |                         silentCount = silentCount + 1
236 |                 elif status == 0: #voice found
237 |                     silentCount = 0
238 | 
239 |                 if stopRecording == True:
240 |                     fname = self.saveMessage()
241 |                     audio_recorder_callback(fname)
242 |                     state = "PASSIVE"
243 |                     continue
244 | 
245 |                 recordingCount = recordingCount + 1
246 |                 self.recordedData.append(data)
247 | 
248 |         logger.debug("finished.")
249 | 
250 |     def saveMessage(self):
251 |         """
252 |         Save the message stored in self.recordedData to a timestamped file.
253 |         """
254 |         filename = 'output' + str(int(time.time())) + '.wav'
255 |         data = b''.join(self.recordedData)
256 | 
257 |         #use wave to save data
258 |         wf = wave.open(filename, 'wb')
259 |         wf.setnchannels(1)
260 |         wf.setsampwidth(self.audio.get_sample_size(
261 |             self.audio.get_format_from_width(
262 |                 self.detector.BitsPerSample() / 8)))
263 |         wf.setframerate(self.detector.SampleRate())
264 |         wf.writeframes(data)
265 |         wf.close()
266 |         logger.debug("finished saving: " + filename)
267 |         return filename
268 | 
269 |     def terminate(self):
270 |         """
271 |         Terminate audio stream. Users can call start() again to detect.
272 |         :return: None
273 |         """
274 |         self.stream_in.stop_stream()
275 |         self.stream_in.close()
276 |         self.audio.terminate()
277 |         self._running = False
278 | 


--------------------------------------------------------------------------------
/sound/bye.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/bye.wav


--------------------------------------------------------------------------------
/sound/error.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/error.wav


--------------------------------------------------------------------------------
/sound/hi.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/hi.wav


--------------------------------------------------------------------------------
/sound/result.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/result.wav


--------------------------------------------------------------------------------
/sound/right.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/right.wav


--------------------------------------------------------------------------------
/sound/start.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/start.wav


--------------------------------------------------------------------------------
/sound/wrong.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VMIJUNV/linux-gpt-assistant/cd6761378de8e397183f8284324f02a5b579afe9/sound/wrong.wav


--------------------------------------------------------------------------------
/tts.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import sys
  3 | import json
  4 | 
  5 | IS_PY3 = sys.version_info.major == 3
  6 | 
  7 | from urllib.request import urlopen
  8 | from urllib.request import Request
  9 | from urllib.error import URLError
 10 | from urllib.parse import urlencode
 11 | from urllib.parse import quote_plus
 12 | 
 13 | #百度api
 14 | API_KEY = ''
 15 | SECRET_KEY = ''
 16 | 
 17 | TEXT = ""
 18 | 
 19 | # 发音人选择, 基础音库：0为度小美，1为度小宇，3为度逍遥，4为度丫丫，
 20 | # 精品音库：5为度小娇，103为度米朵，106为度博文，110为度小童，111为度小萌，默认为度小美 
 21 | PER = 0
 22 | # 语速，取值0-15，默认为5中语速
 23 | SPD = 5
 24 | # 音调，取值0-15，默认为5中语调
 25 | PIT = 5
 26 | # 音量，取值0-9，默认为5中音量
 27 | VOL = 5
 28 | # 下载的文件格式, 3：mp3(default) 4： pcm-16k 5： pcm-8k 6. wav
 29 | AUE = 6
 30 | 
 31 | FORMATS = {3: "mp3", 4: "pcm", 5: "pcm", 6: "wav"}
 32 | FORMAT = FORMATS[AUE]
 33 | 
 34 | CUID = "123456PYTHON"
 35 | 
 36 | TTS_URL = 'http://tsn.baidu.com/text2audio'
 37 | 
 38 | 
 39 | class DemoError(Exception):
 40 |     pass
 41 | 
 42 | 
 43 | """  TOKEN start """
 44 | 
 45 | TOKEN_URL = 'http://aip.baidubce.com/oauth/2.0/token'
 46 | SCOPE = 'audio_tts_post'  # 有此scope表示有tts能力，没有请在网页里勾选
 47 | 
 48 | 
 49 | def fetch_token():
 50 |     # print("fetch token begin")
 51 |     params = {'grant_type': 'client_credentials',
 52 |               'client_id': API_KEY,
 53 |               'client_secret': SECRET_KEY}
 54 |     post_data = urlencode(params)
 55 |     if (IS_PY3):
 56 |         post_data = post_data.encode('utf-8')
 57 |     req = Request(TOKEN_URL, post_data)
 58 |     try:
 59 |         f = urlopen(req, timeout=5)
 60 |         result_str = f.read()
 61 |     except URLError as err:
 62 |         # print('token http response http code : ' + str(err.code))
 63 |         result_str = err.read()
 64 |     if (IS_PY3):
 65 |         result_str = result_str.decode()
 66 | 
 67 |     # print(result_str)
 68 |     result = json.loads(result_str)
 69 |     # print(result)
 70 |     if ('access_token' in result.keys() and 'scope' in result.keys()):
 71 |         if not SCOPE in result['scope'].split(' '):
 72 |             raise DemoError('scope is not correct')
 73 |         # print('SUCCESS WITH TOKEN: %s ; EXPIRES IN SECONDS: %s' % (result['access_token'], result['expires_in']))
 74 |         return result['access_token']
 75 |     else:
 76 |         raise DemoError('MAYBE API_KEY or SECRET_KEY not correct: access_token or scope not found in token response')
 77 | 
 78 | 
 79 | """  TOKEN end """
 80 | 
 81 | def gettts(text):
 82 |     TEXT=text
 83 |     token = fetch_token()
 84 |     tex = quote_plus(TEXT)  # 此处TEXT需要两次urlencode
 85 |     # print(tex)
 86 |     params = {'tok': token, 'tex': tex, 'per': PER, 'spd': SPD, 'pit': PIT, 'vol': VOL, 'aue': AUE, 'cuid': CUID,
 87 |               'lan': 'zh', 'ctp': 1}  # lan ctp 固定参数
 88 | 
 89 |     data = urlencode(params)
 90 |     # print('test on Web Browser' + TTS_URL + '?' + data)
 91 | 
 92 |     req = Request(TTS_URL, data.encode('utf-8'))
 93 |     has_error = False
 94 |     try:
 95 |         f = urlopen(req)
 96 |         result_str = f.read()
 97 | 
 98 |         headers = dict((name.lower(), value) for name, value in f.headers.items())
 99 | 
100 |         has_error = ('content-type' not in headers.keys() or headers['content-type'].find('audio/') < 0)
101 |     except  URLError as err:
102 |         # print('asr http response http code : ' + str(err.code))
103 |         result_str = err.read()
104 |         has_error = True
105 | 
106 |     with open('sound/result.wav', 'wb') as of:
107 |         of.write(result_str)
108 | 
109 | 


--------------------------------------------------------------------------------