├── .gitignore ├── sphinx-models ├── respeaker │ ├── pocketsphinx-data │ │ ├── hmm │ │ │ ├── mdef │ │ │ ├── means │ │ │ ├── sendump │ │ │ ├── variances │ │ │ ├── transition_matrices │ │ │ ├── noisedict │ │ │ └── feat.params │ │ ├── keywords.txt │ │ └── dictionary.txt │ ├── tdt_sc_8k │ │ ├── mdef │ │ ├── means │ │ ├── sendump │ │ ├── variances │ │ ├── transition_matrices │ │ ├── noisedict │ │ └── feat.params │ ├── TAR0287 │ │ ├── 0287.vocab │ │ ├── 0287.dic │ │ ├── 0287.sent │ │ ├── 0287.log_pronounce │ │ └── 0287.lm │ ├── __init__.py │ ├── assistant.py │ ├── usb_hid │ │ ├── interface.py │ │ ├── __init__.py │ │ ├── hidapi_backend.py │ │ ├── pywinusb_backend.py │ │ └── pyusb_backend.py │ ├── spectrum_analyzer.py │ ├── vad.py │ ├── pixel_ring.py │ ├── fft.py │ ├── spi.py │ ├── player.py │ ├── gpio.py │ ├── bing_speech_api.py │ └── microphone.py ├── TAR0287.tgz ├── basic.txt ├── output.txt ├── genearate_keywords.py ├── new.dic ├── create_symbol.py └── chinese_dicit.py ├── requirements.txt ├── models ├── xiaobai.pmdl ├── close-light.pmdl └── open-light.pmdl ├── screenshots └── generate-sphinx-knowledge.png ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/mdef: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/means: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/sendump: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/variances: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/transition_matrices: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jieba 2 | pocketsphinx 3 | pyaudio 4 | webrtcvad 5 | requests 6 | -------------------------------------------------------------------------------- /models/xiaobai.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/models/xiaobai.pmdl -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/noisedict: -------------------------------------------------------------------------------- 1 | ~~SIL 2 |~~ SIL 3 | SIL 4 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/keywords.txt: -------------------------------------------------------------------------------- 1 | respeaker /1e-30/ 2 | alexa /1e-30/ 3 | play music /1e-40/ -------------------------------------------------------------------------------- /models/close-light.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/models/close-light.pmdl -------------------------------------------------------------------------------- /models/open-light.pmdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/models/open-light.pmdl -------------------------------------------------------------------------------- /sphinx-models/TAR0287.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/TAR0287.tgz -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/dictionary.txt: -------------------------------------------------------------------------------- 1 | respeaker R IY S P IY K ER 2 | alexa AH L EH K S AH 3 | play P L EY 4 | music M Y UW Z IH K -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/mdef: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/respeaker/tdt_sc_8k/mdef -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/means: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/respeaker/tdt_sc_8k/means -------------------------------------------------------------------------------- /screenshots/generate-sphinx-knowledge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/screenshots/generate-sphinx-knowledge.png -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/sendump: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/respeaker/tdt_sc_8k/sendump -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/variances: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/respeaker/tdt_sc_8k/variances -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/transition_matrices: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phodal/diy-private-smart-speaker/master/sphinx-models/respeaker/tdt_sc_8k/transition_matrices -------------------------------------------------------------------------------- /sphinx-models/respeaker/pocketsphinx-data/hmm/feat.params: -------------------------------------------------------------------------------- 1 | -nfilt 40 2 | -lowerf 133.3334 3 | -upperf 6855.4976 4 | -feat s2_4x 5 | -agc none 6 | -varnorm no 7 | -cmninit 8,0,0 8 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/noisedict: -------------------------------------------------------------------------------- 1 | ~~SIL 2 |~~ SIL 3 | SIL 4 | ++laugh++ +LAUGH+ 5 | ++lipsmack++ +LIPSMACK+ 6 | ++cough++ +COUGH+ 7 | ++breath++ +BREATHE+ 8 | -------------------------------------------------------------------------------- /sphinx-models/basic.txt: -------------------------------------------------------------------------------- 1 | 打开台灯 2 | 关闭台灯 3 | 打开电视 4 | 关闭电视 5 | 打开小米盒子 6 | 关闭小米盒子 7 | 打开客厅的空调 8 | 关闭客厅的空调 9 | 把客厅的空调设为25度 10 | 把客厅的空调调为25度 11 | 将客厅的空调设为25度 12 | 把卧室的空调设为25度 13 | 把空调设为25度 14 | 我要听音乐 15 | 今天的天气怎样？ 16 | 今天需要带伞吗？ 17 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/TAR0287/0287.vocab: -------------------------------------------------------------------------------- 1 | 25 2 | 今天 3 | 关闭 4 | 卧室 5 | 台灯 6 | 吗 7 | 听 8 | 天气 9 | 客厅 10 | 将 11 | 小米 12 | 带伞 13 | 度 14 | 怎样 15 | 我要 16 | 打开 17 | 把 18 | 电视 19 | 的 20 | 盒子 21 | 空调 22 | 设为 23 | 调为 24 | 需要 25 | 音乐 26 | ？ 27 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/TAR0287/0287.dic: -------------------------------------------------------------------------------- 1 | 25 T UW F AY V 2 | 今天 3 | 关闭 4 | 卧室 5 | 台灯 6 | 吗 7 | 听 8 | 天气 9 | 客厅 10 | 将 11 | 小米 12 | 带伞 13 | 度 14 | 怎样 15 | 我要 16 | 打开 17 | 把 18 | 电视 19 | 的 20 | 盒子 21 | 空调 22 | 设为 23 | 调为 24 | 需要 25 | 音乐 26 | ？ 27 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/tdt_sc_8k/feat.params: -------------------------------------------------------------------------------- 1 | -nfilt 20 2 | -lowerf 1 3 | -upperf 4000 4 | -wlen 0.025 5 | -transform dct 6 | -round_filters no 7 | -remove_dc yes 8 | -feat 1s_c_d_dd 9 | -svspec 0-12/13-25/26-38 10 | -agc none 11 | -cmn current 12 | -cmninit 54,-1,2 13 | -varnorm no 14 | -------------------------------------------------------------------------------- /sphinx-models/output.txt: -------------------------------------------------------------------------------- 1 | 打开台灯 2 | 关闭台灯 3 | 打开电视 4 | 关闭电视 5 | 打开小米盒子 6 | 关闭小米盒子 7 | 打开客厅的空调 8 | 关闭客厅的空调 9 | 把客厅的空调设为 25 度 10 | 把客厅的空调调为 25 度 11 | 将客厅的空调设为 25 度 12 | 把卧室的空调设为 25 度 13 | 把空调设为 25 度 14 | 我要听音乐 15 | 今天的天气怎样？ 16 | 今天需要带伞吗？ 17 | -------------------------------------------------------------------------------- /sphinx-models/genearate_keywords.py: -------------------------------------------------------------------------------- 1 | import jieba 2 | 3 | output = open('output.txt', 'w') 4 | with open("basic.txt") as f: 5 | for line in f: 6 | seg_list = jieba.cut(line) 7 | seg_list_with_split = " ".join(seg_list) 8 | output.write(seg_list_with_split) 9 | 10 | output.close() 11 | -------------------------------------------------------------------------------- /sphinx-models/new.dic: -------------------------------------------------------------------------------- 1 | 今天 j in t ian 2 | 关闭 g uan b i 3 | 卧室 w o sh ib 4 | 台灯 t ai d eng 5 | 吗 m a 6 | 听 t ing 7 | 天气 t ian q i 8 | 客厅 k e t ing 9 | 将 j iang 10 | 小米 x iao m i 11 | 度 d u 12 | 怎样 z en y ang 13 | 我要 w o y ao 14 | 打开 d a k ai 15 | 把 b a 16 | 电视 d ian sh ib 17 | 的 d e 18 | 盒子 h e z if 19 | 空调 k ong t iao 20 | 需要 x ux y ao 21 | 音乐 y in uxs uxe 22 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/TAR0287/0287.sent: -------------------------------------------------------------------------------- 1 | ~~打开台灯~~ 2 | ~~关闭台灯~~ 3 | ~~打开电视~~ 4 | ~~关闭电视~~ 5 | ~~打开小米盒子~~ 6 | ~~关闭小米盒子~~ 7 | ~~打开客厅的空调~~ 8 | ~~关闭客厅的空调~~ 9 | ~~把客厅的空调设为 25 度~~ 10 | ~~把客厅的空调调为 25 度~~ 11 | ~~将客厅的空调设为 25 度~~ 12 | ~~把卧室的空调设为 25 度~~ 13 | ~~把空调设为 25 度~~ 14 | ~~我要听音乐~~ 15 | ~~今天的天气怎样？~~ 16 | ~~今天需要带伞吗？~~ 17 | -------------------------------------------------------------------------------- /sphinx-models/create_symbol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | new_dict = open('new.dic', 'w', encoding='UTF-8') 5 | origin_dicts = open('TAR0287/0287.dic', encoding='UTF-8') 6 | with origin_dicts as origin_file: 7 | print(origin_file) 8 | for origin_dict in origin_file: 9 | origin_key = origin_dict.split("\t")[0] 10 | with open("zh_broadcastnews_utf8.dic", encoding='UTF-8') as f: 11 | for line in f: 12 | split = line.split(" ") 13 | if len(split) >= 2: 14 | key = split[0] 15 | value = split[1] 16 | if key == origin_key: 17 | new_line = origin_key + "\t" + line[len(key) + len(" "):] 18 | new_dict.write(new_line) 19 | 20 | origin_dicts.close() 21 | new_dict.close() 22 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from microphone import Microphone 19 | from spi import SPI, spi 20 | from player import Player 21 | from pixel_ring import PixelRing, pixel_ring 22 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/TAR0287/0287.log_pronounce: -------------------------------------------------------------------------------- 1 | I think this is a non-word: 25 2 | pronounce: verbosity is 1 3 | TWO - Morpheme: TWO 4 | FIVE - Morpheme: FIVE 5 | I think this is a non-word: 今天 6 | I think this is a non-word: 关闭 7 | I think this is a non-word: 卧室 8 | I think this is a non-word: 台灯 9 | I think this is a non-word: 吗 10 | I think this is a non-word: 听 11 | I think this is a non-word: 天气 12 | I think this is a non-word: 客厅 13 | I think this is a non-word: 将 14 | I think this is a non-word: 小米 15 | I think this is a non-word: 带伞 16 | I think this is a non-word: 度 17 | I think this is a non-word: 怎样 18 | I think this is a non-word: 我要 19 | I think this is a non-word: 打开 20 | I think this is a non-word: 把 21 | I think this is a non-word: 电视 22 | I think this is a non-word: 的 23 | I think this is a non-word: 盒子 24 | I think this is a non-word: 空调 25 | I think this is a non-word: 设为 26 | I think this is a non-word: 调为 27 | I think this is a non-word: 需要 28 | I think this is a non-word: 音乐 29 | I think this is a non-word: ？ 30 | 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Phodal Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # how-to-diy-smart-speaker 2 | 3 | How to DIY a Chinese Smart Speaker 4 | 5 | Sphinx 生成 6 | --- 7 | 8 | 打开 [http://www.speech.cs.cmu.edu/tools/lmtool-new.html](http://www.speech.cs.cmu.edu/tools/lmtool-new.html) 9 | 10 | 1. 点击 Choose File 上传文件 11 | 2. 点击 COMPILE KNOWLEDGE BASE 进行转换 12 | 3. 打开 [http://www.speech.cs.cmu.edu/tools/product/1503889661_19829/](http://www.speech.cs.cmu.edu/tools/product/1503889661_19829/) 下载 tgz 文件 13 | 14 | 15 | ```` 16 | JackShmReadWritePtr::~JackShmReadWritePtr - Init not done for -1, skipping unlock 17 | JackShmReadWritePtr::~JackShmReadWritePtr - Init not done for -1, skipping unlock 18 | INFO:mic:Use ReSpeaker MicArray UAC2.0: USB Audio (hw:2,0) 19 | 20 | INFO:mic:Start detecting 21 | INFO:mic:Detected 空调 22 | INFO:mic:Detected 空调 23 | INFO:mic:Detected 度 24 | INFO:mic:Detected 今天 25 | INFO:mic:Detected 今天 26 | INFO:mic:Detected 空调 27 | ``` 28 | 29 | 下载的 txt 需要生成模型 30 | 31 | 执行: 32 | 33 | ``` 34 | cd sphinx-models 35 | python create_symbol.py 36 | ``` 37 | 38 | ### Mac OS 39 | 40 | ``` 41 | brew install portaudio hidapi 42 | ``` 43 | 44 | 45 | ``` 46 | python assistant.py  22:48:35 47 | ERROR:root:cython-hidapi is required on a Mac OS X Machine 48 | ``` 49 | 50 | 51 | ``` 52 | pip install cython 53 | git clone https://github.com/gbishop/cython-hidapi.git 54 | python setup.py install 55 | ``` 56 | 57 | Text Generate ? 58 | --- 59 | 60 | 61 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/assistant.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. 13 | """ 14 | 15 | import logging 16 | import time 17 | from threading import Thread, Event 18 | 19 | from microphone import Microphone 20 | 21 | def task(quit_event): 22 | mic = Microphone(quit_event=quit_event) 23 | 24 | while not quit_event.is_set(): 25 | if mic.wakeup('respeaker'): 26 | print('Wake up') 27 | data = mic.listen() 28 | text = mic.recognize(data) 29 | if text: 30 | print('Recognized %s' % text) 31 | 32 | 33 | def main(): 34 | logging.basicConfig(level=logging.DEBUG) 35 | 36 | quit_event = Event() 37 | thread = Thread(target=task, args=(quit_event,)) 38 | thread.start() 39 | while True: 40 | try: 41 | time.sleep(1) 42 | except KeyboardInterrupt: 43 | print('Quit') 44 | quit_event.set() 45 | break 46 | thread.join() 47 | 48 | if __name__ == '__main__': 49 | main() -------------------------------------------------------------------------------- /sphinx-models/respeaker/usb_hid/interface.py: -------------------------------------------------------------------------------- 1 | """ 2 | USB HID API from pyOCD project 3 | Copyright (c) 2006-2013 ARM Limited 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | 19 | class Interface(object): 20 | 21 | def __init__(self): 22 | self.vid = 0 23 | self.pid = 0 24 | self.vendor_name = "" 25 | self.product_name = "" 26 | self.packet_count = 1 27 | return 28 | 29 | def init(self): 30 | return 31 | 32 | def write(self, data): 33 | return 34 | 35 | def read(self, size=-1, timeout=-1): 36 | return 37 | 38 | def getInfo(self): 39 | return self.vendor_name + " " + \ 40 | self.product_name + " (" + \ 41 | str(hex(self.vid)) + ", " + \ 42 | str(hex(self.pid)) + ")" 43 | 44 | def setPacketCount(self, count): 45 | # Unless overridden the packet count cannot be changed 46 | return 47 | 48 | def getPacketCount(self): 49 | return self.packet_count 50 | 51 | def close(self): 52 | return 53 | -------------------------------------------------------------------------------- /sphinx-models/chinese_dicit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: binux(17175297.hk@gmail.com) 4 | # https://github.com/binux/binux-tools/blob/master/python/chinese_digit.py 5 | 6 | dict ={u'零':0, u'一':1, u'二':2, u'三':3, u'四':4, u'五':5, u'六':6, u'七':7, u'八':8, u'九':9, u'十':10, u'百':100, u'千':1000, u'万':10000, 7 | u'０':0, u'１':1, u'２':2, u'３':3, u'４':4, u'５':5, u'６':6, u'７':7, u'８':8, u'９':9, 8 | u'壹':1, u'贰':2, u'叁':3, u'肆':4, u'伍':5, u'陆':6, u'柒':7, u'捌':8, u'玖':9, u'拾':10, u'佰':100, u'仟':1000, u'萬':10000, 9 | u'亿':100000000} 10 | 11 | def getResultForDigit(a, encoding="utf-8"): 12 | if isinstance(a, str): 13 | a = a.decode(encoding) 14 | 15 | count = 0 16 | result = 0 17 | tmp = 0 18 | Billion = 0 19 | while count < len(a): 20 | tmpChr = a[count] 21 | #print tmpChr 22 | tmpNum = dict.get(tmpChr, None) 23 | #如果等于1亿 24 | if tmpNum == 100000000: 25 | result = result + tmp 26 | result = result * tmpNum 27 | #获得亿以上的数量，将其保存在中间变量Billion中并清空result 28 | Billion = Billion * 100000000 + result 29 | result = 0 30 | tmp = 0 31 | #如果等于1万 32 | elif tmpNum == 10000: 33 | result = result + tmp 34 | result = result * tmpNum 35 | tmp = 0 36 | #如果等于十或者百，千 37 | elif tmpNum >= 10: 38 | if tmp == 0: 39 | tmp = 1 40 | result = result + tmpNum * tmp 41 | tmp = 0 42 | #如果是个位数 43 | elif tmpNum is not None: 44 | tmp = tmp * 10 + tmpNum 45 | count += 1 46 | result = result + tmp 47 | result = result + Billion 48 | return result -------------------------------------------------------------------------------- /sphinx-models/respeaker/usb_hid/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | USB HID API from pyOCD project 3 | Copyright (c) 2006-2013 ARM Limited 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import os 19 | import logging 20 | from usb_hid.hidapi_backend import HidApiUSB 21 | from usb_hid.pyusb_backend import PyUSB 22 | from usb_hid.pywinusb_backend import PyWinUSB 23 | 24 | INTERFACE = { 25 | 'hidapiusb': HidApiUSB, 26 | 'pyusb': PyUSB, 27 | 'pywinusb': PyWinUSB, 28 | } 29 | 30 | # Allow user to override backend with an environment variable. 31 | usb_backend = os.getenv('PYOCD_USB_BACKEND', "") 32 | 33 | # Check validity of backend env var. 34 | if usb_backend and ((usb_backend not in INTERFACE.keys()) or (not INTERFACE[usb_backend].isAvailable)): 35 | logging.error("Invalid USB backend specified in PYOCD_USB_BACKEND: " + usb_backend) 36 | usb_backend = "" 37 | 38 | # Select backend based on OS and availability. 39 | if not usb_backend: 40 | if os.name == "nt": 41 | # Prefer hidapi over pyWinUSB for Windows, since pyWinUSB has known bug(s) 42 | if HidApiUSB.isAvailable: 43 | usb_backend = "hidapiusb" 44 | elif PyWinUSB.isAvailable: 45 | usb_backend = "pywinusb" 46 | else: 47 | raise Exception("No USB backend found") 48 | elif os.name == "posix": 49 | # Select hidapi for OS X and pyUSB for Linux. 50 | if os.uname()[0] == 'Darwin': 51 | usb_backend = "hidapiusb" 52 | else: 53 | usb_backend = "pyusb" 54 | else: 55 | raise Exception("No USB backend found") 56 | 57 | 58 | devices = None 59 | if INTERFACE[usb_backend].isAvailable: 60 | devices = INTERFACE[usb_backend].getAllConnectedInterface() 61 | 62 | 63 | def get(index=0): 64 | global devices 65 | 66 | if not devices: 67 | if INTERFACE[usb_backend].isAvailable: 68 | devices = INTERFACE[usb_backend].getAllConnectedInterface() 69 | if devices and len(devices) > index: 70 | return devices[index] 71 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/spectrum_analyzer.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import array 19 | import math 20 | from fft import FFT 21 | 22 | 23 | class SpectrumAnalyzer: 24 | def __init__(self, size, sample_rate=16000, band_number=12, window=[50, 8000]): 25 | self.size = 1 << math.frexp(size - 1)[1] 26 | self.sample_rate = float(sample_rate) 27 | self.resolution = self.sample_rate / self.size # (sample_rate/2) / (band/2) 28 | 29 | self.set_band(band_number, window) 30 | 31 | self.fft = FFT(self.size) 32 | 33 | def set_band(self, n, window=[50, 8000]): 34 | self.band = n 35 | self.breakpoints = [0] * (n + 1) 36 | self.frequencies = [0.0] * (n + 1) 37 | self.strength = [0.0] * n 38 | 39 | delta = math.pow(float(window[1]) / window[0], 1.0 / n) 40 | for i in range(n + 1): 41 | self.frequencies[i] = math.pow(delta, i) * window[0] 42 | 43 | breakpoint = 0 44 | for i in range(1, self.size / 2): 45 | if self.resolution * i >= self.frequencies[breakpoint]: 46 | self.breakpoints[breakpoint] = i 47 | breakpoint += 1 48 | if breakpoint > n: 49 | break 50 | 51 | self.breakpoints[n] = self.size / 2 + 1 52 | self.band_size = [self.breakpoints[i + 1] - self.breakpoints[i] for i in range(n)] 53 | # print self.frequencies 54 | # print self.breakpoints 55 | 56 | def analyze(self, data): 57 | amplitude = self.fft.dft(data) 58 | for i in range(self.band): 59 | self.strength[i] = sum(amplitude[self.breakpoints[i]:self.breakpoints[i + 1]]) # / self.band_size[i] 60 | 61 | return self.strength 62 | 63 | 64 | if __name__ == '__main__': 65 | N = 2048 66 | rate = 16000 67 | 68 | data = array.array('h', [0] * N) 69 | w = 2 * math.pi * 50 / rate 70 | for t in range(N): 71 | data[t] = int(100 * math.sin(w * t)) 72 | 73 | analyzer = SpectrumAnalyzer(N, rate) 74 | strength = analyzer.analyze(data.tostring()) 75 | print([int(f) for f in analyzer.frequencies]) 76 | print([int(s) for s in strength]) 77 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/vad.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import collections 19 | import sys 20 | 21 | import webrtcvad 22 | 23 | 24 | class WebRTCVAD: 25 | def __init__(self, sample_rate=16000, level=0): 26 | """ 27 | 28 | Args: 29 | sample_rate: audio sample rate 30 | level: between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive. 31 | """ 32 | self.sample_rate = sample_rate 33 | 34 | self.frame_ms = 30 35 | self.frame_bytes = int(2 * self.frame_ms * self.sample_rate / 1000) # S16_LE, 2 bytes width 36 | 37 | self.vad = webrtcvad.Vad(level) 38 | self.active = False 39 | self.data = b'' 40 | self.history = collections.deque(maxlen=128) 41 | 42 | def is_speech(self, data): 43 | self.data += data 44 | while len(self.data) >= self.frame_bytes: 45 | frame = self.data[:self.frame_bytes] 46 | self.data = self.data[self.frame_bytes:] 47 | 48 | if self.vad.is_speech(frame, self.sample_rate): 49 | sys.stdout.write('1') 50 | self.history.append(1) 51 | else: 52 | sys.stdout.write('0') 53 | self.history.append(0) 54 | 55 | num_voiced = 0 56 | for i in range(-8, 0): 57 | try: 58 | num_voiced += self.history[i] 59 | except IndexError: 60 | continue 61 | 62 | if not self.active: 63 | if num_voiced >= 4: 64 | sys.stdout.write('+') 65 | self.active = True 66 | break 67 | elif len(self.history) == self.history.maxlen and sum(self.history) == 0: 68 | sys.stdout.write('Todo: increase capture volume') 69 | for _ in range(self.history.maxlen // 2): 70 | self.history.popleft() 71 | 72 | else: 73 | if num_voiced < 1: 74 | sys.stdout.write('-') 75 | self.active = False 76 | elif sum(self.history) > self.history.maxlen * 0.9: 77 | sys.stdout.write('Todo: decrease capture volume') 78 | for _ in range(int(self.history.maxlen / 2)): 79 | self.history.popleft() 80 | 81 | return self.active 82 | 83 | def reset(self): 84 | self.data = b'' 85 | self.active = False 86 | self.history.clear() 87 | 88 | 89 | vad = WebRTCVAD() 90 | 91 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/pixel_ring.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import usb_hid 19 | from spi import spi 20 | 21 | 22 | class PixelRing: 23 | mono_mode = 1 24 | listening_mode = 2 25 | waiting_mode = 3 26 | speaking_mode = 4 27 | 28 | def __init__(self): 29 | self.hid = usb_hid.get() 30 | 31 | def off(self): 32 | self.set_color(rgb=0) 33 | 34 | def set_color(self, rgb=None, r=0, g=0, b=0): 35 | if rgb: 36 | self.write(0, [self.mono_mode, rgb & 0xFF, (rgb >> 8) & 0xFF, (rgb >> 16) & 0xFF]) 37 | else: 38 | self.write(0, [self.mono_mode, b, g, r]) 39 | 40 | def listen(self, direction=None): 41 | if direction is None: 42 | self.write(0, [7, 0, 0, 0]) 43 | else: 44 | self.write(0, [2, 0, direction & 0xFF, (direction >> 8) & 0xFF]) 45 | 46 | def wait(self): 47 | self.write(0, [self.waiting_mode, 0, 0, 0]) 48 | 49 | def speak(self, strength, direction): 50 | self.write(0, [self.speaking_mode, strength, direction & 0xFF, (direction >> 8) & 0xFF]) 51 | 52 | def set_volume(self, volume): 53 | self.write(0, [5, 0, 0, volume]) 54 | 55 | @staticmethod 56 | def to_bytearray(data): 57 | if type(data) is int: 58 | array = bytearray([data & 0xFF]) 59 | elif type(data) is bytearray: 60 | array = data 61 | elif type(data) is str: 62 | array = bytearray(data) 63 | elif type(data) is list: 64 | array = bytearray(data) 65 | else: 66 | raise TypeError('%s is not supported' % type(data)) 67 | 68 | return array 69 | 70 | def write(self, address, data): 71 | data = self.to_bytearray(data) 72 | length = len(data) 73 | if self.hid: 74 | packet = bytearray([address & 0xFF, (address >> 8) & 0xFF, length & 0xFF, (length >> 8) & 0xFF]) + data 75 | self.hid.write(packet) 76 | print(packet) 77 | spi.write(address=address, data=data) 78 | 79 | def close(self): 80 | if self.hid: 81 | self.hid.close() 82 | 83 | 84 | pixel_ring = PixelRing() 85 | 86 | 87 | if __name__ == '__main__': 88 | import time 89 | 90 | pixel_ring.listen() 91 | time.sleep(3) 92 | pixel_ring.wait() 93 | time.sleep(3) 94 | for level in range(2, 8): 95 | pixel_ring.speak(level, 0) 96 | time.sleep(1) 97 | pixel_ring.set_volume(4) 98 | time.sleep(3) 99 | 100 | color = 0x800000 101 | while True: 102 | try: 103 | pixel_ring.set_color(rgb=color) 104 | color += 0x10 105 | time.sleep(1) 106 | except KeyboardInterrupt: 107 | break 108 | 109 | pixel_ring.off() 110 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/fft.py: -------------------------------------------------------------------------------- 1 | """ 2 | DFT wrapper of FFTW3 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import array 19 | import ctypes 20 | import math 21 | import os 22 | import logging 23 | 24 | 25 | class FFT: 26 | def __init__(self, size): 27 | self.size = 1 << math.frexp(size - 1)[1] 28 | 29 | self.real_input = array.array('f', [0.0] * self.size) 30 | self.complex_output = array.array('f', [0.0] * (self.size * 2)) 31 | self.amplitude = array.array('f', [0.0] * (self.size / 2 + 1)) 32 | self.phase = array.array('f', [0.0] * (self.size / 2 + 1)) 33 | 34 | try: 35 | if os.name == "nt": 36 | self.fftw3f = ctypes.CDLL('libfftw3f-3.dll') 37 | else: 38 | self.fftw3f = ctypes.CDLL('libfftw3f.so') 39 | 40 | # fftw_plan fftw_plan_dft_r2c_1d(int band_number, double *in, fftw_complex *out, unsigned flags); 41 | self.fftwf_plan_dft_r2c_1d = self.fftw3f.fftwf_plan_dft_r2c_1d 42 | self.fftwf_plan_dft_r2c_1d.argtypes = (ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p, ctypes.c_uint) 43 | self.fftwf_plan_dft_r2c_1d.restype = ctypes.c_void_p 44 | 45 | # void fftwf_execute(const fftwf_plan plan) 46 | self.fftwf_execute = self.fftw3f.fftwf_execute 47 | self.fftwf_execute.argtypes = (ctypes.c_void_p,) 48 | self.fftwf_execute.restype = None 49 | 50 | input_ptr, _ = self.real_input.buffer_info() 51 | output_ptr, _ = self.complex_output.buffer_info() 52 | self.fftwf_plan = self.fftwf_plan_dft_r2c_1d(self.size, input_ptr, output_ptr, 1) 53 | except Exception as e: 54 | logging.warn('Can not find libffw3f dynamic library, return error - {}'.format(e.message)) 55 | self.fftwf_execute = lambda x: None 56 | self.fftwf_plan = None 57 | 58 | def dft(self, data, typecode='h'): 59 | if type(data) is str: 60 | a = array.array(typecode, data) 61 | for index, value in enumerate(a): 62 | self.real_input[index] = float(value) 63 | elif type(data) is array.array: 64 | for index, value in enumerate(data): 65 | self.real_input[index] = float(value) 66 | 67 | self.fftwf_execute(self.fftwf_plan) 68 | 69 | for i in range(len(self.amplitude)): 70 | self.amplitude[i] = math.hypot(self.complex_output[i * 2], self.complex_output[i * 2 + 1]) 71 | # self.phase[i] = math.atan2(self.complex_output[i * 2 + 1], self.complex_output[i * 2]) 72 | 73 | return self.amplitude # , self.phase 74 | 75 | 76 | if __name__ == '__main__': 77 | N = 128 78 | rate = 16000 79 | 80 | data = array.array('h', [1] * N) 81 | w = 2 * math.pi * 1000 / rate 82 | for t in range(N): 83 | data[t] = 10 + int(100 * math.sin(w * t)) + int(200 * math.sin(2 * w * t)) 84 | 85 | fft = FFT(N) 86 | print(fft.dft(data)) 87 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/TAR0287/0287.lm: -------------------------------------------------------------------------------- 1 | Language model created by QuickLM on Sun Aug 27 23:07:42 EDT 2017 2 | Copyright (c) 1996-2010 Carnegie Mellon University and Alexander I. Rudnicky 3 | 4 | The model is in standard ARPA format, designed by Doug Paul while he was at MITRE. 5 | 6 | The code that was used to produce this language model is available in Open Source. 7 | Please visit http://www.speech.cs.cmu.edu/tools/ for more information 8 | 9 | The (fixed) discount mass is 0.5. The backoffs are computed using the ratio method. 10 | This model based on a corpus of 16 sentences and 28 words 11 | 12 | \data\ 13 | ngram 1=28 14 | ngram 2=44 15 | ngram 3=48 16 | 17 | \1-grams: 18 | -1.6021 25 -0.2900 19 | -1.0969 -0.3010 20 | -1.0969 -0.2648 21 | -2.0000 今天 -0.2833 22 | -1.6990 关闭 -0.2765 23 | -2.3010 卧室 -0.2856 24 | -2.0000 台灯 -0.2648 25 | -2.3010 吗 -0.2967 26 | -2.3010 听 -0.2989 27 | -2.3010 天气 -0.2989 28 | -1.6021 客厅 -0.2856 29 | -2.3010 将 -0.2900 30 | -2.0000 小米 -0.2967 31 | -2.3010 带伞 -0.2989 32 | -1.6021 度 -0.2648 33 | -2.3010 怎样 -0.2967 34 | -2.3010 我要 -0.2989 35 | -1.6990 打开 -0.2765 36 | -1.6990 把 -0.2718 37 | -2.0000 电视 -0.2648 38 | -1.4559 的 -0.2833 39 | -2.0000 盒子 -0.2648 40 | -1.4559 空调 -0.2529 41 | -1.6990 设为 -0.2900 42 | -2.3010 调为 -0.2900 43 | -2.3010 需要 -0.2989 44 | -2.3010 音乐 -0.2648 45 | -2.0000 ？ -0.2648 46 | 47 | \2-grams: 48 | -0.3010 25 度 0.0000 49 | -1.2041 今天 0.0000 50 | -0.9031 关闭 0.0000 51 | -1.5051 将 0.0000 52 | -1.5051 我要 0.0000 53 | -0.9031 打开 0.0000 54 | -0.9031 把 0.0000 55 | -0.6021 今天的 -0.2688 56 | -0.6021 今天需要 0.0000 57 | -0.9031 关闭台灯 0.0000 58 | -0.9031 关闭客厅 0.0000 59 | -0.9031 关闭小米 0.0000 60 | -0.9031 关闭电视 0.0000 61 | -0.3010 卧室的 -0.0580 62 | -0.3010 台灯 -0.3010 63 | -0.3010 吗？ 0.0000 64 | -0.3010 听音乐 0.0000 65 | -0.3010 天气怎样 0.0000 66 | -0.3010 客厅的 -0.0580 67 | -0.3010 将客厅 0.0000 68 | -0.3010 小米盒子 0.0000 69 | -0.3010 带伞吗 0.0000 70 | -0.3010 度 -0.3010 71 | -0.3010 怎样？ 0.0000 72 | -0.3010 我要听 0.0000 73 | -0.9031 打开台灯 0.0000 74 | -0.9031 打开客厅 0.0000 75 | -0.9031 打开小米 0.0000 76 | -0.9031 打开电视 0.0000 77 | -0.9031 把卧室 0.0000 78 | -0.6021 把客厅 0.0000 79 | -0.9031 把空调 -0.1549 80 | -0.3010 电视 -0.3010 81 | -1.1461 的天气 0.0000 82 | -0.3680 的空调 0.0000 83 | -0.3010 盒子 -0.3010 84 | -0.8451 空调 -0.3010 85 | -0.5441 空调设为 0.0000 86 | -1.1461 空调调为 0.0000 87 | -0.3010 设为 25 0.0000 88 | -0.3010 调为 25 0.0000 89 | -0.3010 需要带伞 0.0000 90 | -0.3010 音乐 -0.3010 91 | -0.3010 ？ -0.3010 92 | 93 | \3-grams: 94 | -0.3010 25 度 95 | -0.6021 今天的 96 | -0.6021 今天需要 97 | -0.9031 关闭台灯 98 | -0.9031 关闭客厅 99 | -0.9031 关闭小米 100 | -0.9031 关闭电视 101 | -0.3010 将客厅 102 | -0.3010 我要听 103 | -0.9031 打开台灯 104 | -0.9031 打开客厅 105 | -0.9031 打开小米 106 | -0.9031 打开电视 107 | -0.9031 把卧室 108 | -0.6021 把客厅 109 | -0.9031 ~~把空调 110 | -0.3010 今天的天气 111 | -0.3010 今天需要带伞 112 | -0.3010 关闭台灯~~ 113 | -0.3010 关闭客厅的 114 | -0.3010 关闭小米盒子 115 | -0.3010 关闭电视 116 | -0.3010 卧室的空调 117 | -0.3010 吗？ 118 | -0.3010 听音乐 119 | -0.3010 天气怎样？ 120 | -0.3010 客厅的空调 121 | -0.3010 将客厅的 122 | -0.3010 小米盒子 123 | -0.3010 带伞吗？ 124 | -0.3010 怎样？ 125 | -0.3010 我要听音乐 126 | -0.3010 打开台灯 127 | -0.3010 打开客厅的 128 | -0.3010 打开小米盒子 129 | -0.3010 打开电视 130 | -0.3010 把卧室的 131 | -0.3010 把客厅的 132 | -0.3010 把空调设为 133 | -0.3010 的天气怎样 134 | -0.7782 的空调 135 | -0.6021 的空调设为 136 | -1.0792 的空调调为 137 | -0.3010 空调设为 25 138 | -0.3010 空调调为 25 139 | -0.3010 设为 25 度 140 | -0.3010 调为 25 度 141 | -0.3010 需要带伞吗 142 | 143 | \end\ 144 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/usb_hid/hidapi_backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | USB HID API from pyOCD project 3 | Copyright (c) 2006-2013 ARM Limited 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from usb_hid.interface import Interface 19 | import logging, os 20 | 21 | try: 22 | import hid 23 | except: 24 | if os.name == "posix" and os.uname()[0] == 'Darwin': 25 | logging.error("cython-hidapi is required on a Mac OS X Machine") 26 | isAvailable = False 27 | else: 28 | isAvailable = True 29 | 30 | class HidApiUSB(Interface): 31 | """ 32 | This class provides basic functions to access 33 | a USB HID device using cython-hidapi: 34 | - write/read an endpoint 35 | """ 36 | vid = 0 37 | pid = 0 38 | 39 | isAvailable = isAvailable 40 | 41 | def __init__(self): 42 | super(HidApiUSB, self).__init__() 43 | # Vendor page and usage_id = 2 44 | self.device = None 45 | 46 | def open(self): 47 | pass 48 | 49 | @staticmethod 50 | def getAllConnectedInterface(): 51 | """ 52 | returns all the connected devices which matches HidApiUSB.vid/HidApiUSB.pid. 53 | returns an array of HidApiUSB (Interface) objects 54 | """ 55 | 56 | devices = hid.enumerate() 57 | 58 | if not devices: 59 | logging.debug("No Mbed device connected") 60 | return [] 61 | 62 | boards = [] 63 | 64 | for deviceInfo in devices: 65 | product_name = deviceInfo['product_string'] 66 | if (product_name.find("MicArray") < 0): 67 | # Skip non cmsis-dap devices 68 | continue 69 | 70 | try: 71 | dev = hid.device(vendor_id=deviceInfo['vendor_id'], product_id=deviceInfo['product_id'], 72 | path=deviceInfo['path']) 73 | except IOError: 74 | logging.debug("Failed to open Mbed device") 75 | continue 76 | 77 | # Create the USB interface object for this device. 78 | new_board = HidApiUSB() 79 | new_board.vendor_name = deviceInfo['manufacturer_string'] 80 | new_board.product_name = deviceInfo['product_string'] 81 | new_board.serial_number = deviceInfo['serial_number'] 82 | new_board.vid = deviceInfo['vendor_id'] 83 | new_board.pid = deviceInfo['product_id'] 84 | new_board.device_info = deviceInfo 85 | new_board.device = dev 86 | try: 87 | dev.open_path(deviceInfo['path']) 88 | except AttributeError: 89 | pass 90 | except IOError: 91 | # Ignore failure to open a device by skipping the device. 92 | continue 93 | 94 | boards.append(new_board) 95 | 96 | return boards 97 | 98 | def write(self, data): 99 | """ 100 | write data on the OUT endpoint associated to the HID interface 101 | """ 102 | for _ in range(64 - len(data)): 103 | data.append(0) 104 | #logging.debug("send: %s", data) 105 | self.device.write(bytearray([0]) + data) 106 | return 107 | 108 | 109 | def read(self, timeout=-1): 110 | """ 111 | read data on the IN endpoint associated to the HID interface 112 | """ 113 | return self.device.read(64) 114 | 115 | def getSerialNumber(self): 116 | return self.serial_number 117 | 118 | def close(self): 119 | """ 120 | close the interface 121 | """ 122 | logging.debug("closing interface") 123 | self.device.close() 124 | 125 | def setPacketCount(self, count): 126 | # No interface level restrictions on count 127 | self.packet_count = count 128 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/usb_hid/pywinusb_backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | USB HID API from pyOCD project 3 | Copyright (c) 2006-2013 ARM Limited 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from usb_hid.interface import Interface 19 | import logging, os, collections 20 | from time import time 21 | 22 | try: 23 | import pywinusb.hid as hid 24 | except: 25 | if os.name == "nt": 26 | logging.error("PyWinUSB is required on a Windows Machine") 27 | isAvailable = False 28 | else: 29 | isAvailable = True 30 | 31 | class PyWinUSB(Interface): 32 | """ 33 | This class provides basic functions to access 34 | a USB HID device using pywinusb: 35 | - write/read an endpoint 36 | """ 37 | vid = 0 38 | pid = 0 39 | 40 | isAvailable = isAvailable 41 | 42 | def __init__(self): 43 | super(PyWinUSB, self).__init__() 44 | # Vendor page and usage_id = 2 45 | self.report = [] 46 | # deque used here instead of synchronized Queue 47 | # since read speeds are ~10-30% faster and are 48 | # comprable to a based list implmentation. 49 | self.rcv_data = collections.deque() 50 | self.device = None 51 | return 52 | 53 | # handler called when a report is received 54 | def rx_handler(self, data): 55 | #logging.debug("rcv: %s", data[1:]) 56 | self.rcv_data.append(data[1:]) 57 | 58 | def open(self): 59 | self.device.set_raw_data_handler(self.rx_handler) 60 | self.device.open(shared=False) 61 | 62 | @staticmethod 63 | def getAllConnectedInterface(): 64 | """ 65 | returns all the connected CMSIS-DAP devices 66 | """ 67 | all_devices = hid.find_all_hid_devices() 68 | 69 | # find devices with good vid/pid 70 | all_mbed_devices = [] 71 | for d in all_devices: 72 | if (d.product_name.find("MicArray") >= 0): 73 | all_mbed_devices.append(d) 74 | 75 | boards = [] 76 | for dev in all_mbed_devices: 77 | try: 78 | dev.open(shared=False) 79 | report = dev.find_output_reports() 80 | if (len(report) == 1): 81 | new_board = PyWinUSB() 82 | new_board.report = report[0] 83 | new_board.vendor_name = dev.vendor_name 84 | new_board.product_name = dev.product_name 85 | new_board.serial_number = dev.serial_number 86 | new_board.vid = dev.vendor_id 87 | new_board.pid = dev.product_id 88 | new_board.device = dev 89 | new_board.device.set_raw_data_handler(new_board.rx_handler) 90 | 91 | boards.append(new_board) 92 | except Exception as e: 93 | logging.error("Receiving Exception: %s", e) 94 | dev.close() 95 | 96 | return boards 97 | 98 | def write(self, data): 99 | """ 100 | write data on the OUT endpoint associated to the HID interface 101 | """ 102 | for _ in range(64 - len(data)): 103 | data.append(0) 104 | #logging.debug("send: %s", data) 105 | self.report.send(bytearray([0]) + data) 106 | return 107 | 108 | 109 | def read(self, timeout=1.0): 110 | """ 111 | read data on the IN endpoint associated to the HID interface 112 | """ 113 | start = time() 114 | while len(self.rcv_data) == 0: 115 | if time() - start > timeout: 116 | # Read operations should typically take ~1-2ms. 117 | # If this exception occurs, then it could indicate 118 | # a problem in one of the following areas: 119 | # 1. Bad usb driver causing either a dropped read or write 120 | # 2. CMSIS-DAP firmware problem cause a dropped read or write 121 | # 3. CMSIS-DAP is performing a long operation or is being 122 | # halted in a debugger 123 | raise Exception("Read timed out") 124 | return self.rcv_data.popleft() 125 | 126 | def setPacketCount(self, count): 127 | # No interface level restrictions on count 128 | self.packet_count = count 129 | 130 | def getSerialNumber(self): 131 | return self.serial_number 132 | 133 | def close(self): 134 | """ 135 | close the interface 136 | """ 137 | logging.debug("closing interface") 138 | self.device.close() 139 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/spi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import platform 19 | 20 | 21 | CRC8_TABLE = ( 22 | 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, 23 | 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 24 | 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 25 | 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 26 | 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, 27 | 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, 28 | 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 29 | 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 30 | 0xc7, 0xc0, 0xc9, 0xce, 0xdb, 0xdc, 0xd5, 0xd2, 31 | 0xff, 0xf8, 0xf1, 0xf6, 0xe3, 0xe4, 0xed, 0xea, 32 | 0xb7, 0xb0, 0xb9, 0xbe, 0xab, 0xac, 0xa5, 0xa2, 33 | 0x8f, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9d, 0x9a, 34 | 0x27, 0x20, 0x29, 0x2e, 0x3b, 0x3c, 0x35, 0x32, 35 | 0x1f, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0d, 0x0a, 36 | 0x57, 0x50, 0x59, 0x5e, 0x4b, 0x4c, 0x45, 0x42, 37 | 0x6f, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7d, 0x7a, 38 | 0x89, 0x8e, 0x87, 0x80, 0x95, 0x92, 0x9b, 0x9c, 39 | 0xb1, 0xb6, 0xbf, 0xb8, 0xad, 0xaa, 0xa3, 0xa4, 40 | 0xf9, 0xfe, 0xf7, 0xf0, 0xe5, 0xe2, 0xeb, 0xec, 41 | 0xc1, 0xc6, 0xcf, 0xc8, 0xdd, 0xda, 0xd3, 0xd4, 42 | 0x69, 0x6e, 0x67, 0x60, 0x75, 0x72, 0x7b, 0x7c, 43 | 0x51, 0x56, 0x5f, 0x58, 0x4d, 0x4a, 0x43, 0x44, 44 | 0x19, 0x1e, 0x17, 0x10, 0x05, 0x02, 0x0b, 0x0c, 45 | 0x21, 0x26, 0x2f, 0x28, 0x3d, 0x3a, 0x33, 0x34, 46 | 0x4e, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5c, 0x5b, 47 | 0x76, 0x71, 0x78, 0x7f, 0x6a, 0x6d, 0x64, 0x63, 48 | 0x3e, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2c, 0x2b, 49 | 0x06, 0x01, 0x08, 0x0f, 0x1a, 0x1d, 0x14, 0x13, 50 | 0xae, 0xa9, 0xa0, 0xa7, 0xb2, 0xb5, 0xbc, 0xbb, 51 | 0x96, 0x91, 0x98, 0x9f, 0x8a, 0x8d, 0x84, 0x83, 52 | 0xde, 0xd9, 0xd0, 0xd7, 0xc2, 0xc5, 0xcc, 0xcb, 53 | 0xe6, 0xe1, 0xe8, 0xef, 0xfa, 0xfd, 0xf4, 0xf3 54 | ) 55 | 56 | 57 | def crc8(data): 58 | result = 0 59 | for b in data: 60 | result = CRC8_TABLE[result ^ b] 61 | return result 62 | 63 | 64 | if platform.machine() == 'mips': 65 | from gpio import * 66 | from threading import RLock 67 | import time 68 | 69 | 70 | class SPI: 71 | def __init__(self, sck=15, mosi=17, miso=16, cs=14): 72 | self.sck = Gpio(sck, OUTPUT) 73 | self.mosi = Gpio(mosi, OUTPUT) 74 | self.miso = Gpio(miso, INPUT) 75 | self.cs = Gpio(cs, OUTPUT) 76 | 77 | self.cs.write(1) 78 | 79 | self.frequency(10000000) 80 | self.format(8, 0) 81 | self.lock = RLock() 82 | 83 | def frequency(self, hz=10000000): 84 | self.freq = hz 85 | 86 | def format(self, bits=8, mode=0): 87 | self.bits = bits 88 | self.mode = mode 89 | self.polarity = (mode >> 1) & 1 90 | self.phase = mode & 1 91 | self.sck.write(self.polarity) 92 | 93 | def _exchange(self, data): 94 | read = 0 95 | for bit in range(self.bits - 1, -1, -1): 96 | self.mosi.write((data >> bit) & 0x01) 97 | 98 | if 0 == self.phase: 99 | read |= self.miso.read() << bit 100 | 101 | self.sck.write(1 - self.polarity) 102 | # time.sleep(0.5 / self.freq) 103 | 104 | if 1 == self.phase: 105 | read |= self.miso.read() << bit 106 | 107 | self.sck.write(self.polarity) 108 | # time.sleep(0.5 / self.freq) 109 | 110 | return read 111 | 112 | def _write(self, data): 113 | response = bytearray() 114 | self.cs.write(0) 115 | if type(data) is int: 116 | response.append(self._exchange(data)) 117 | elif type(data) is bytearray: 118 | for b in data: 119 | response.append(self._exchange(b)) 120 | elif type(data) is str: 121 | for b in bytearray(data): 122 | response.append(self._exchange(b)) 123 | elif type(data) is list: 124 | for item in data: 125 | self.write(item) 126 | else: 127 | self.cs.write(1) 128 | raise TypeError('%s is not supported' % type(data)) 129 | 130 | self.cs.write(1) 131 | return response 132 | 133 | def write(self, data=None, address=None): 134 | with self.lock: 135 | if address is not None: 136 | data = bytearray([0xA5, address & 0xFF, len(data) & 0xFF]) + data + bytearray([crc8(data)]) 137 | response = self._write(data)[3:-1] 138 | else: 139 | response = self._write(data) 140 | 141 | return response 142 | 143 | def close(self): 144 | pass 145 | else: 146 | class SPI: 147 | def __init__(self): 148 | pass 149 | 150 | def write(self, data=None, address=None): 151 | pass 152 | 153 | def close(self): 154 | pass 155 | 156 | 157 | spi = SPI() 158 | 159 | 160 | if __name__ == '__main__': 161 | while True: 162 | spi.write('hello\n') 163 | time.sleep(1) 164 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/player.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | import audioop 19 | import threading 20 | import platform 21 | import subprocess 22 | import tempfile 23 | import types 24 | import wave 25 | 26 | try: # Python 2 27 | import Queue 28 | except: # Python 3 29 | import queue as Queue 30 | 31 | import pyaudio 32 | 33 | from pixel_ring import pixel_ring 34 | from spectrum_analyzer import SpectrumAnalyzer 35 | from spi import spi 36 | 37 | CHUNK_SIZE = 1024 38 | BAND_NUMBER = 16 39 | 40 | 41 | class Player: 42 | def __init__(self, pyaudio_instance=None): 43 | self.pyaudio_instance = pyaudio_instance if pyaudio_instance else pyaudio.PyAudio() 44 | self.stop_event = threading.Event() 45 | 46 | def ignite(queue): 47 | data = queue.get() 48 | analyzer = SpectrumAnalyzer(len(data), band_number=BAND_NUMBER) 49 | while True: 50 | while not queue.empty(): 51 | data = queue.get() 52 | 53 | amplitude = analyzer.analyze(data) 54 | level = bytearray(len(amplitude)) 55 | for i, v in enumerate(amplitude): 56 | l = int(v / 1024 / 128) 57 | if l > 255: 58 | l = 255 59 | level[i] = l 60 | 61 | spi.write(address=0xA0, data=level) 62 | 63 | data = queue.get() 64 | 65 | self.queue = Queue.Queue() 66 | self.thread = threading.Thread(target=ignite, args=(self.queue,)) 67 | self.thread.daemon = True 68 | self.thread.start() 69 | 70 | def _play(self, data, rate=16000, channels=1, width=2, spectrum=True): 71 | stream = self.pyaudio_instance.open( 72 | format=self.pyaudio_instance.get_format_from_width(width), 73 | channels=channels, 74 | rate=rate, 75 | output=True, 76 | # output_device_index=1, 77 | frames_per_buffer=CHUNK_SIZE, 78 | ) 79 | 80 | if isinstance(data, types.GeneratorType): 81 | for d in data: 82 | if self.stop_event.is_set(): 83 | break 84 | 85 | stream.write(d) 86 | 87 | if spectrum: 88 | if channels == 2: 89 | d = audioop.tomono(d, 2, 0.5, 0.5) 90 | self.queue.put(d) 91 | else: 92 | stream.write(data) 93 | 94 | stream.close() 95 | 96 | def play(self, wav=None, data=None, rate=16000, channels=1, width=2, block=True, spectrum=None): 97 | """ 98 | play wav file or raw audio (string or generator) 99 | Args: 100 | wav: wav file path 101 | data: raw audio data, str or iterator 102 | rate: sample rate, only for raw audio 103 | channels: channel number, only for raw data 104 | width: raw audio data width, 16 bit is 2, only for raw data 105 | block: if true, block until audio is played. 106 | spectrum: if true, use a spectrum analyzer thread to analyze data 107 | """ 108 | if wav: 109 | f = wave.open(wav, 'rb') 110 | rate = f.getframerate() 111 | channels = f.getnchannels() 112 | width = f.getsampwidth() 113 | 114 | def gen(w): 115 | d = w.readframes(CHUNK_SIZE) 116 | while d: 117 | yield d 118 | d = w.readframes(CHUNK_SIZE) 119 | w.close() 120 | 121 | data = gen(f) 122 | 123 | self.stop_event.clear() 124 | if block: 125 | self._play(data, rate, channels, width, spectrum) 126 | else: 127 | thread = threading.Thread(target=self._play, args=(data, rate, channels, width, spectrum)) 128 | thread.start() 129 | 130 | def play_raw(self, data, rate=16000, channels=1, width=2): 131 | self.play(data=data, rate=rate, channels=channels, width=width) 132 | 133 | def play_mp3(self, mp3=None, data=None, block=True): 134 | """ 135 | It supports GeneratorType mp3 stream or mp3 data string 136 | Args: 137 | mp3: mp3 file 138 | data: mp3 generator or data 139 | block: if true, block until audio is played. 140 | """ 141 | if platform.machine() == 'mips': 142 | command = 'madplay -o wave:- - | aplay -M' 143 | else: 144 | command = 'ffplay -autoexit -nodisp -' 145 | 146 | if mp3: 147 | def gen(m): 148 | with open(m, 'rb') as f: 149 | d = f.read(1024) 150 | while d: 151 | yield d 152 | d = f.read(1024) 153 | 154 | data = gen(mp3) 155 | 156 | if isinstance(data, types.GeneratorType): 157 | p = subprocess.Popen(command, stdin=subprocess.PIPE, shell=True) 158 | for d in data: 159 | p.stdin.write(d) 160 | 161 | p.stdin.close() 162 | else: 163 | with tempfile.NamedTemporaryFile(mode='w+b') as f: 164 | f.write(data) 165 | f.flush() 166 | f.seek(0) 167 | p = subprocess.Popen(command, stdin=f, shell=True) 168 | 169 | if block: 170 | p.wait() 171 | 172 | def stop(self): 173 | self.stop_event.set() 174 | 175 | def close(self): 176 | pass 177 | 178 | 179 | def main(): 180 | import sys 181 | 182 | if len(sys.argv) < 2: 183 | print('Usage: python {} music.wav'.format(sys.argv[0])) 184 | sys.exit(1) 185 | 186 | player = Player() 187 | player.play(sys.argv[1], spectrum=True) 188 | 189 | 190 | if __name__ == '__main__': 191 | main() 192 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/usb_hid/pyusb_backend.py: -------------------------------------------------------------------------------- 1 | """ 2 | USB HID API from pyOCD project 3 | Copyright (c) 2006-2013 ARM Limited 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | from usb_hid.interface import Interface 19 | import logging, os, threading 20 | 21 | try: 22 | import usb.core 23 | import usb.util 24 | except: 25 | if os.name == "posix" and not os.uname()[0] == 'Darwin': 26 | logging.error("PyUSB is required on a Linux Machine") 27 | isAvailable = False 28 | else: 29 | isAvailable = True 30 | 31 | class PyUSB(Interface): 32 | """ 33 | This class provides basic functions to access 34 | a USB HID device using pyusb: 35 | - write/read an endpoint 36 | """ 37 | 38 | vid = 0 39 | pid = 0 40 | intf_number = 0 41 | 42 | isAvailable = isAvailable 43 | 44 | def __init__(self): 45 | super(PyUSB, self).__init__() 46 | self.ep_out = None 47 | self.ep_in = None 48 | self.dev = None 49 | self.closed = False 50 | self.rcv_data = [] 51 | self.read_sem = threading.Semaphore(0) 52 | 53 | def start_rx(self): 54 | self.thread = threading.Thread(target=self.rx_task) 55 | self.thread.daemon = True 56 | self.thread.start() 57 | 58 | def rx_task(self): 59 | while not self.closed: 60 | self.read_sem.acquire() 61 | if not self.closed: 62 | # Timeouts appear to corrupt data occasionally. Because of this the 63 | # timeout is set to infinite. 64 | self.rcv_data.append(self.ep_in.read(self.ep_in.wMaxPacketSize, -1)) 65 | 66 | @staticmethod 67 | def getAllConnectedInterface(): 68 | """ 69 | returns all the connected devices which matches PyUSB.vid/PyUSB.pid. 70 | returns an array of PyUSB (Interface) objects 71 | """ 72 | # find all devices matching the vid/pid specified 73 | all_devices = usb.core.find(find_all=True) 74 | 75 | if not all_devices: 76 | logging.debug("No device connected") 77 | return [] 78 | 79 | boards = [] 80 | 81 | # iterate on all devices found 82 | for board in all_devices: 83 | interface_number = -1 84 | try: 85 | # The product string is read over USB when accessed. 86 | # This can cause an exception to be thrown if the device 87 | # is malfunctioning. 88 | product = board.product 89 | except usb.core.USBError as error: 90 | logging.warning("Exception getting product string: %s", error) 91 | continue 92 | if (product is None) or (product.find("MicArray") < 0): 93 | # Not a ReSpeaker MicArray device so close it 94 | usb.util.dispose_resources(board) 95 | continue 96 | 97 | # get active config 98 | config = board.get_active_configuration() 99 | 100 | # iterate on all interfaces: 101 | # - if we found a HID interface 102 | for interface in config: 103 | if interface.bInterfaceClass == 0x03: 104 | interface_number = interface.bInterfaceNumber 105 | break 106 | 107 | if interface_number == -1: 108 | continue 109 | 110 | try: 111 | if board.is_kernel_driver_active(interface_number): 112 | board.detach_kernel_driver(interface_number) 113 | except Exception as e: 114 | print(e) 115 | 116 | ep_in, ep_out = None, None 117 | for ep in interface: 118 | if ep.bEndpointAddress & 0x80: 119 | ep_in = ep 120 | else: 121 | ep_out = ep 122 | 123 | """If there is no EP for OUT then we can use CTRL EP""" 124 | if not ep_in: 125 | logging.error('Endpoints not found') 126 | return None 127 | 128 | new_board = PyUSB() 129 | new_board.ep_in = ep_in 130 | new_board.ep_out = ep_out 131 | new_board.dev = board 132 | new_board.vid = board.idVendor 133 | new_board.pid = board.idProduct 134 | new_board.intf_number = interface_number 135 | new_board.product_name = product 136 | new_board.vendor_name = board.manufacturer 137 | new_board.serial_number = board.serial_number 138 | new_board.start_rx() 139 | boards.append(new_board) 140 | 141 | return boards 142 | 143 | def write(self, data): 144 | """ 145 | write data on the OUT endpoint associated to the HID interface 146 | """ 147 | 148 | # report_size = 64 149 | # if self.ep_out: 150 | # report_size = self.ep_out.wMaxPacketSize 151 | # 152 | # for _ in range(report_size - len(data)): 153 | # data.append(0) 154 | 155 | self.read_sem.release() 156 | 157 | if not self.ep_out: 158 | bmRequestType = 0x21 #Host to device request of type Class of Recipient Interface 159 | bmRequest = 0x09 #Set_REPORT (HID class-specific request for transferring data over EP0) 160 | wValue = 0x200 #Issuing an OUT report 161 | wIndex = self.intf_number #mBed Board interface number for HID 162 | self.dev.ctrl_transfer(bmRequestType, bmRequest, wValue, wIndex, data) 163 | return 164 | #raise ValueError('EP_OUT endpoint is NULL') 165 | 166 | self.ep_out.write(data) 167 | #logging.debug('sent: %s', data) 168 | return 169 | 170 | 171 | def read(self): 172 | """ 173 | read data on the IN endpoint associated to the HID interface 174 | """ 175 | while len(self.rcv_data) == 0: 176 | pass 177 | return self.rcv_data.pop(0) 178 | 179 | def setPacketCount(self, count): 180 | # No interface level restrictions on count 181 | self.packet_count = count 182 | 183 | def getSerialNumber(self): 184 | return self.serial_number 185 | 186 | def close(self): 187 | """ 188 | close the interface 189 | """ 190 | logging.debug("closing interface") 191 | self.closed = True 192 | self.read_sem.release() 193 | self.thread.join() 194 | usb.util.dispose_resources(self.dev) 195 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/gpio.py: -------------------------------------------------------------------------------- 1 | """ 2 | Linux SysFS-based native GPIO implementation Based on https://github.com/derekstavis/python-sysfs-gpio 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2016 Yihui Xiong 7 | Copyright (c) 2014 Derek Willian Stavis 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | """ 27 | 28 | import logging 29 | import os 30 | import select 31 | from threading import Thread 32 | 33 | Logger = logging.getLogger(__file__) 34 | 35 | # Sysfs constants 36 | 37 | SYSFS_BASE_PATH = '/sys/class/gpio' 38 | 39 | SYSFS_EXPORT_PATH = SYSFS_BASE_PATH + '/export' 40 | SYSFS_UNEXPORT_PATH = SYSFS_BASE_PATH + '/unexport' 41 | 42 | SYSFS_GPIO_PATH = SYSFS_BASE_PATH + '/gpio%d' 43 | SYSFS_GPIO_DIRECTION_PATH = SYSFS_GPIO_PATH + '/direction' 44 | SYSFS_GPIO_EDGE_PATH = SYSFS_GPIO_PATH + '/edge' 45 | SYSFS_GPIO_VALUE_PATH = SYSFS_GPIO_PATH + '/value' 46 | SYSFS_GPIO_ACTIVE_LOW_PATH = SYSFS_GPIO_PATH + '/active_low' 47 | 48 | SYSFS_GPIO_VALUE_LOW = '0' 49 | SYSFS_GPIO_VALUE_HIGH = '1' 50 | 51 | EPOLL_TIMEOUT = 1 # second 52 | 53 | # Public interface 54 | 55 | INPUT = 'in' 56 | OUTPUT = 'out' 57 | 58 | # Compatible with MRAA 59 | DIR_IN = INPUT 60 | DIR_OUT = OUTPUT 61 | 62 | RISING = 'rising' 63 | FALLING = 'falling' 64 | BOTH = 'both' 65 | 66 | ACTIVE_LOW_ON = 1 67 | ACTIVE_LOW_OFF = 0 68 | 69 | DIRECTIONS = (INPUT, OUTPUT) 70 | EDGES = (RISING, FALLING, BOTH) 71 | ACTIVE_LOW_MODES = (ACTIVE_LOW_ON, ACTIVE_LOW_OFF) 72 | 73 | 74 | class Gpio(object): 75 | """ 76 | Represent a pin in SysFS 77 | """ 78 | 79 | def __init__(self, number, direction=INPUT, callback=None, edge=None, active_low=0): 80 | """ 81 | @type number: int 82 | @param number: The pin number 83 | @type direction: int 84 | @param direction: Pin direction, enumerated by C{Direction} 85 | @type callback: callable 86 | @param callback: Method be called when pin changes state 87 | @type edge: int 88 | @param edge: The edge transition that triggers callback, 89 | enumerated by C{Edge} 90 | @type active_low: int 91 | @param active_low: Indicator of whether this pin uses inverted 92 | logic for HIGH-LOW transitions. 93 | """ 94 | self._number = number 95 | self._direction = direction 96 | self._callback = callback 97 | self._active_low = active_low 98 | 99 | if not os.path.isdir(self._sysfs_gpio_value_path()): 100 | with open(SYSFS_EXPORT_PATH, 'w') as export: 101 | export.write('%d' % number) 102 | else: 103 | Logger.debug("SysfsGPIO: Pin %d already exported" % number) 104 | 105 | self._fd = open(self._sysfs_gpio_value_path(), 'r+') 106 | 107 | if callback and not edge: 108 | raise Exception('You must supply a edge to trigger callback on') 109 | 110 | with open(self._sysfs_gpio_direction_path(), 'w') as fsdir: 111 | fsdir.write(direction) 112 | 113 | if edge: 114 | with open(self._sysfs_gpio_edge_path(), 'w') as fsedge: 115 | fsedge.write(edge) 116 | self._poll = select.epoll() 117 | self._poll.register(self, (select.EPOLLPRI | select.EPOLLET)) 118 | self.thread = Thread(target=self._run) 119 | self.thread.daemon = True 120 | self._running = True 121 | self.start() 122 | 123 | if active_low: 124 | if active_low not in ACTIVE_LOW_MODES: 125 | raise Exception('You must supply a value for active_low which is either 0 or 1.') 126 | with open(self._sysfs_gpio_active_low_path(), 'w') as fsactive_low: 127 | fsactive_low.write(str(active_low)) 128 | 129 | @property 130 | def callback(self): 131 | """ 132 | Gets this pin callback 133 | """ 134 | return self._callback 135 | 136 | @callback.setter 137 | def callback(self, value): 138 | """ 139 | Sets this pin callback 140 | """ 141 | self._callback = value 142 | 143 | @property 144 | def direction(self): 145 | """ 146 | Pin direction 147 | """ 148 | return self._direction 149 | 150 | @property 151 | def number(self): 152 | """ 153 | Pin number 154 | """ 155 | return self._number 156 | 157 | @property 158 | def active_low(self): 159 | """ 160 | Pin number 161 | """ 162 | return self._active_low 163 | 164 | def dir(self, direction): 165 | self._direction = direction 166 | with open(self._sysfs_gpio_direction_path(), 'w') as fsdir: 167 | fsdir.write(direction) 168 | 169 | def set(self): 170 | """ 171 | Set pin to HIGH logic setLevel 172 | """ 173 | self._fd.write(SYSFS_GPIO_VALUE_HIGH) 174 | self._fd.seek(0) 175 | 176 | def reset(self): 177 | """ 178 | Set pin to LOW logic setLevel 179 | """ 180 | self._fd.write(SYSFS_GPIO_VALUE_LOW) 181 | self._fd.seek(0) 182 | 183 | def read(self): 184 | """ 185 | Read pin value 186 | 187 | @rtype: int 188 | @return: I{0} when LOW, I{1} when HIGH 189 | """ 190 | val = self._fd.read() 191 | self._fd.seek(0) 192 | return int(val) 193 | 194 | def write(self, value): 195 | if value: 196 | self.set() 197 | else: 198 | self.reset() 199 | 200 | def close(self): 201 | self._running = False 202 | self._fd.close() 203 | 204 | def fileno(self): 205 | """ 206 | Get the file descriptor associated with this pin. 207 | 208 | @rtype: int 209 | @return: File descriptor 210 | """ 211 | return self._fd.fileno() 212 | 213 | def changed(self, state): 214 | if callable(self._callback): 215 | self._callback(self.number, state) 216 | 217 | def _run(self): 218 | 219 | while self._running: 220 | events = self._poll.poll(EPOLL_TIMEOUT) 221 | for fd, event in events: 222 | if not (event & (select.EPOLLPRI | select.EPOLLET)): 223 | continue 224 | 225 | self.changed(self.read()) 226 | 227 | def _sysfs_gpio_value_path(self): 228 | """ 229 | Get the file that represent the value of this pin. 230 | 231 | @rtype: str 232 | @return: the path to sysfs value file 233 | """ 234 | return SYSFS_GPIO_VALUE_PATH % self.number 235 | 236 | def _sysfs_gpio_direction_path(self): 237 | """ 238 | Get the file that represent the direction of this pin. 239 | 240 | @rtype: str 241 | @return: the path to sysfs direction file 242 | """ 243 | return SYSFS_GPIO_DIRECTION_PATH % self.number 244 | 245 | def _sysfs_gpio_edge_path(self): 246 | """ 247 | Get the file that represent the edge that will trigger an interrupt. 248 | 249 | @rtype: str 250 | @return: the path to sysfs edge file 251 | """ 252 | return SYSFS_GPIO_EDGE_PATH % self.number 253 | 254 | def _sysfs_gpio_active_low_path(self): 255 | """ 256 | Get the file that represents the active_low setting for this pin. 257 | 258 | @rtype: str 259 | @return: the path to sysfs active_low file 260 | """ 261 | return SYSFS_GPIO_ACTIVE_LOW_PATH % self.number 262 | 263 | 264 | __all__ = ('DIRECTIONS', 'INPUT', 'OUTPUT', 'DIR_IN', 'DIR_OUT', 'EDGES', 'RISING', 'FALLING', 'BOTH', 'Gpio') 265 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/bing_speech_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Bing Speech To Text (STT) and Text To Speech (TTS) 3 | 4 | ReSpeaker Python Library 5 | Copyright (c) 2016 Seeed Technology Limited. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | """ 19 | 20 | import io 21 | import os 22 | import types 23 | import uuid 24 | import wave 25 | 26 | import requests 27 | 28 | try: # Python 2 and Python <= 3.2 29 | from monotonic import monotonic 30 | except: # Python >= 3.3 31 | from time import monotonic 32 | 33 | 34 | class RequestError(Exception): 35 | pass 36 | 37 | 38 | class BingSpeechAPI: 39 | def __init__(self, key=os.getenv('BING_KEY', '')): 40 | self.key = key 41 | self.access_token = None 42 | self.expire_time = None 43 | self.locales = { 44 | "ar-eg": {"Female": "Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)"}, 45 | "de-DE": {"Female": "Microsoft Server Speech Text to Speech Voice (de-DE, Hedda)", 46 | "Male": "Microsoft Server Speech Text to Speech Voice (de-DE, Stefan, Apollo)"}, 47 | "en-AU": {"Female": "Microsoft Server Speech Text to Speech Voice (en-AU, Catherine)"}, 48 | "en-CA": {"Female": "Microsoft Server Speech Text to Speech Voice (en-CA, Linda)"}, 49 | "en-GB": {"Female": "Microsoft Server Speech Text to Speech Voice (en-GB, Susan, Apollo)", 50 | "Male": "Microsoft Server Speech Text to Speech Voice (en-GB, George, Apollo)"}, 51 | "en-IN": {"Male": "Microsoft Server Speech Text to Speech Voice (en-IN, Ravi, Apollo)"}, 52 | "en-US": {"Female": "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", 53 | "Male": "Microsoft Server Speech Text to Speech Voice (en-US, BenjaminRUS)"}, 54 | "es-ES": {"Female": "Microsoft Server Speech Text to Speech Voice (es-ES, Laura, Apollo)", 55 | "Male": "Microsoft Server Speech Text to Speech Voice (es-ES, Pablo, Apollo)"}, 56 | "es-MX": {"Male": "Microsoft Server Speech Text to Speech Voice (es-MX, Raul, Apollo)"}, 57 | "fr-CA": {"Female": "Microsoft Server Speech Text to Speech Voice (fr-CA, Caroline)"}, 58 | "fr-FR": {"Female": "Microsoft Server Speech Text to Speech Voice (fr-FR, Julie, Apollo)", 59 | "Male": "Microsoft Server Speech Text to Speech Voice (fr-FR, Paul, Apollo)"}, 60 | "it-IT": {"Male": "Microsoft Server Speech Text to Speech Voice (it-IT, Cosimo, Apollo)"}, 61 | "ja-JP": {"Female": "Microsoft Server Speech Text to Speech Voice (ja-JP, Ayumi, Apollo)", 62 | "Male": "Microsoft Server Speech Text to Speech Voice (ja-JP, Ichiro, Apollo)"}, 63 | "pt-BR": {"Male": "Microsoft Server Speech Text to Speech Voice (pt-BR, Daniel, Apollo)"}, 64 | "ru-RU": {"Female": "Microsoft Server Speech Text to Speech Voice (pt-BR, Daniel, Apollo)", 65 | "Male": "Microsoft Server Speech Text to Speech Voice (ru-RU, Pavel, Apollo)"}, 66 | "zh-CN": {"Female": "Microsoft Server Speech Text to Speech Voice (zh-CN, HuihuiRUS)", 67 | "Female2": "Microsoft Server Speech Text to Speech Voice (zh-CN, Yaoyao, Apollo)", 68 | "Male": "Microsoft Server Speech Text to Speech Voice (zh-CN, Kangkang, Apollo)"}, 69 | "zh-HK": {"Female": "Microsoft Server Speech Text to Speech Voice (zh-HK, Tracy, Apollo)", 70 | "Male": "Microsoft Server Speech Text to Speech Voice (zh-HK, Danny, Apollo)"}, 71 | "zh-TW": {"Female": "Microsoft Server Speech Text to Speech Voice (zh-TW, Yating, Apollo)", 72 | "Male": "Microsoft Server Speech Text to Speech Voice (zh-TW, Zhiwei, Apollo)"} 73 | } 74 | 75 | self.session = requests.Session() 76 | 77 | def authenticate(self): 78 | if self.expire_time is None or monotonic() > self.expire_time: # first credential request, or the access token from the previous one expired 79 | # get an access token using OAuth 80 | credential_url = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken" 81 | headers = {"Ocp-Apim-Subscription-Key": self.key} 82 | 83 | start_time = monotonic() 84 | response = self.session.post(credential_url, headers=headers) 85 | 86 | if response.status_code != 200: 87 | raise RequestError("http request error with status code {}".format(response.status_code)) 88 | 89 | self.access_token = response.content 90 | expiry_seconds = 590 # document mentions the access token is expired in 10 minutes 91 | 92 | self.expire_time = start_time + expiry_seconds 93 | 94 | def recognize(self, audio_data, language="en-US", show_all=False): 95 | self.authenticate() 96 | if isinstance(audio_data, types.GeneratorType): 97 | def generate(audio): 98 | yield self.get_wav_header() 99 | for a in audio: 100 | yield a 101 | 102 | data = generate(audio_data) 103 | else: 104 | data = self.to_wav(audio_data) 105 | 106 | params = { 107 | "version": "3.0", 108 | "requestid": uuid.uuid4(), 109 | "appID": "D4D52672-91D7-4C74-8AD8-42B1D98141A5", 110 | "format": "json", 111 | "locale": language, 112 | "device.os": "wp7", 113 | "scenarios": "ulm", 114 | "instanceid": uuid.uuid4(), 115 | "result.profanitymarkup": "0", 116 | } 117 | 118 | headers = { 119 | "Authorization": "Bearer {0}".format(self.access_token), 120 | "Content-Type": "audio/wav; samplerate=16000; sourcerate=16000; trustsourcerate=true", 121 | } 122 | 123 | url = "https://speech.platform.bing.com/recognize/query" 124 | response = self.session.post(url, params=params, headers=headers, data=data) 125 | 126 | if response.status_code != 200: 127 | raise RequestError("http request error with status code {}".format(response.status_code)) 128 | 129 | result = response.json() 130 | 131 | if show_all: 132 | return result 133 | if "header" not in result or "lexical" not in result["header"]: 134 | raise ValueError('Unexpected response: {}'.format(result)) 135 | return result["header"]["lexical"] 136 | 137 | def synthesize(self, text, language="en-US", gender="Female", stream=None, chunk_size=4096): 138 | self.authenticate() 139 | 140 | if language not in self.locales.keys(): 141 | raise ValueError("language is not supported.") 142 | 143 | lang = self.locales.get(language) 144 | 145 | if gender not in ["Female", "Male", "Female2"]: 146 | gender = "Female" 147 | 148 | if len(lang) == 1: 149 | gender = lang.keys()[0] 150 | 151 | service_name = lang[gender] 152 | 153 | body = "\ 154 | %s\ 155 | " % (language, gender, service_name, text) 156 | 157 | headers = { 158 | "Content-type": "application/ssml+xml", 159 | "X-Microsoft-OutputFormat": "raw-16khz-16bit-mono-pcm", 160 | "Authorization": "Bearer " + self.access_token, 161 | "X-Search-AppId": "07D3234E49CE426DAA29772419F436CA", 162 | "X-Search-ClientID": str(uuid.uuid1()).replace('-', ''), 163 | "User-Agent": "TTSForPython" 164 | } 165 | 166 | url = "https://speech.platform.bing.com/synthesize" 167 | response = self.session.post(url, headers=headers, data=body, stream=stream) 168 | if stream: 169 | data = response.iter_content(chunk_size=chunk_size) 170 | else: 171 | data = response.content 172 | 173 | return data 174 | 175 | @staticmethod 176 | def to_wav(raw_data): 177 | # generate the WAV file contents 178 | with io.BytesIO() as wav_file: 179 | wav_writer = wave.open(wav_file, "wb") 180 | try: # note that we can't use context manager, since that was only added in Python 3.4 181 | wav_writer.setframerate(16000) 182 | wav_writer.setsampwidth(2) 183 | wav_writer.setnchannels(1) 184 | wav_writer.writeframes(raw_data) 185 | wav_data = wav_file.getvalue() 186 | finally: # make sure resources are cleaned up 187 | wav_writer.close() 188 | return wav_data 189 | 190 | @staticmethod 191 | def get_wav_header(): 192 | # generate the WAV header 193 | with io.BytesIO() as f: 194 | w = wave.open(f, "wb") 195 | try: 196 | w.setframerate(16000) 197 | w.setsampwidth(2) 198 | w.setnchannels(1) 199 | w.writeframes('') 200 | header = f.getvalue() 201 | finally: 202 | w.close() 203 | return header 204 | 205 | 206 | def main(): 207 | import timeit 208 | import logging 209 | 210 | logging.basicConfig(level=logging.DEBUG) 211 | 212 | bing = BingSpeechAPI() 213 | 214 | def test(text, stream=None): 215 | try: 216 | print('TTS:{}'.format(text)) 217 | speech = bing.synthesize(text, stream=stream) 218 | text = bing.recognize(speech, language='en-US') 219 | print('STT:{}'.format(text.encode('utf-8'))) 220 | print('Stream mode:{}'.format('yes' if stream else 'no')) 221 | except RequestError as e: 222 | print("Could not request results from Microsoft Bing Voice Recognition service; {0}".format(e)) 223 | 224 | texts = [ 225 | 'Your beliefs become your thoughts', 226 | 'Your thoughts become your words', 227 | 'Your words become your actions', 228 | 'Your actions become your habits', 229 | 'Your habits become your values', 230 | 'Your values become your destiny', 231 | ] 232 | 233 | for n, text in enumerate(texts): 234 | print('No.{} try'.format(n)) 235 | print(timeit.timeit(lambda: test(text, n & 1), number=1)) 236 | 237 | if __name__ == '__main__': 238 | main() 239 | -------------------------------------------------------------------------------- /sphinx-models/respeaker/microphone.py: -------------------------------------------------------------------------------- 1 | """ 2 | ReSpeaker Python Library 3 | Copyright (c) 2016 Seeed Technology Limited. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | """ 17 | 18 | 19 | import os 20 | import wave 21 | import types 22 | import collections 23 | import random 24 | import string 25 | import logging 26 | from threading import Thread, Event 27 | 28 | try: # Python 2 29 | import Queue 30 | except: # Python 3 31 | import queue as Queue 32 | 33 | import pyaudio 34 | 35 | from pixel_ring import pixel_ring 36 | from vad import vad 37 | 38 | 39 | logger = logger = logging.getLogger('mic') 40 | collecting_audio = os.getenv('COLLECTING_AUDIO', 'no') 41 | 42 | 43 | def random_string(length): 44 | return ''.join(random.choice(string.digits) for _ in range(length)) 45 | 46 | 47 | def save_as_wav(data, prefix): 48 | prefix = prefix.replace(' ', '_') 49 | filename = prefix + random_string(8) + '.wav' 50 | while os.path.isfile(filename): 51 | filename = prefix + random_string(8) + '.wav' 52 | 53 | f = wave.open(filename, 'wb') 54 | f.setframerate(16000) 55 | f.setsampwidth(2) 56 | f.setnchannels(1) 57 | f.writeframes(data) 58 | f.close() 59 | 60 | logger.info('Save audio as %s' % filename) 61 | 62 | 63 | class Microphone: 64 | sample_rate = 16000 65 | frames_per_buffer = 512 66 | listening_mask = (1 << 0) 67 | detecting_mask = (1 << 1) 68 | recording_mask = (1 << 2) 69 | 70 | def __init__(self, pyaudio_instance=None, quit_event=None, decoder=None): 71 | pixel_ring.set_color(rgb=0x400000) 72 | 73 | self.pyaudio_instance = pyaudio_instance if pyaudio_instance else pyaudio.PyAudio() 74 | 75 | self.device_index = None 76 | for i in range(self.pyaudio_instance.get_device_count()): 77 | dev = self.pyaudio_instance.get_device_info_by_index(i) 78 | name = dev['name'].encode('utf-8') 79 | # print(i, name, dev['maxInputChannels'], dev['maxOutputChannels']) 80 | if name.lower().find(b'respeaker') >= 0 and dev['maxInputChannels'] > 0: 81 | logger.info('Use {}'.format(name)) 82 | self.device_index = i 83 | break 84 | 85 | if not self.device_index: 86 | device = self.pyaudio_instance.get_default_input_device_info() 87 | self.device_index = device['index'] 88 | self.stream = self.pyaudio_instance.open( 89 | input=True, 90 | start=False, 91 | format=pyaudio.paInt16, 92 | channels=1, 93 | rate=self.sample_rate, 94 | frames_per_buffer=self.frames_per_buffer, 95 | stream_callback=self._callback, 96 | input_device_index=self.device_index, 97 | ) 98 | 99 | self.quit_event = quit_event if quit_event else Event() 100 | 101 | self.listen_queue = Queue.Queue() 102 | self.detect_queue = Queue.Queue() 103 | 104 | self.decoder = decoder if decoder else self.create_decoder() 105 | self.decoder.start_utt() 106 | 107 | self.status = 0 108 | self.active = False 109 | 110 | self.listen_history = collections.deque(maxlen=8) 111 | self.detect_history = collections.deque(maxlen=48) 112 | 113 | self.wav = None 114 | self.record_countdown = None 115 | self.listen_countdown = [0, 0] 116 | 117 | @staticmethod 118 | def create_decoder(): 119 | from pocketsphinx.pocketsphinx import Decoder 120 | 121 | path = os.path.dirname(os.path.realpath(__file__)) 122 | pocketsphinx_data = os.getenv('POCKETSPHINX_DATA', os.path.join(path, 'pocketsphinx-data')) 123 | hmm = os.getenv('POCKETSPHINX_HMM', os.path.join(pocketsphinx_data, 'hmm')) 124 | dict = os.getenv('POCKETSPHINX_DIC', os.path.join(pocketsphinx_data, 'dictionary.txt')) 125 | kws = os.getenv('POCKETSPHINX_KWS', os.path.join(pocketsphinx_data, 'keywords.txt')) 126 | 127 | config = Decoder.default_config() 128 | config.set_string('-hmm', hmm) 129 | config.set_string('-dict', dict) 130 | config.set_string('-kws', kws) 131 | # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu 132 | config.set_int('-nfft', 512) 133 | config.set_float('-vad_threshold', 2.7) 134 | config.set_string('-logfn', os.devnull) 135 | 136 | return Decoder(config) 137 | 138 | def recognize(self, data): 139 | self.decoder.end_utt() 140 | self.decoder.start_utt() 141 | 142 | if not data: 143 | return '' 144 | 145 | if isinstance(data, types.GeneratorType): 146 | for d in data: 147 | self.decoder.process_raw(d, False, False) 148 | else: 149 | self.decoder.process_raw(data, False, True) 150 | 151 | hypothesis = self.decoder.hyp() 152 | if hypothesis: 153 | logger.info('Recognized {}'.format(hypothesis.hypstr)) 154 | return hypothesis.hypstr 155 | 156 | return '' 157 | 158 | def detect(self, keyword=None): 159 | self.decoder.end_utt() 160 | self.decoder.start_utt() 161 | 162 | pixel_ring.off() 163 | 164 | self.detect_history.clear() 165 | 166 | self.detect_queue.queue.clear() 167 | self.status |= self.detecting_mask 168 | self.stream.start_stream() 169 | 170 | result = None 171 | logger.info('Start detecting') 172 | while not self.quit_event.is_set(): 173 | size = self.detect_queue.qsize() 174 | if size > 4: 175 | logger.info('Too many delays, {} in queue'.format(size)) 176 | 177 | data = self.detect_queue.get() 178 | self.detect_history.append(data) 179 | self.decoder.process_raw(data, False, False) 180 | 181 | hypothesis = self.decoder.hyp() 182 | if hypothesis: 183 | logger.info('Detected {}'.format(hypothesis.hypstr)) 184 | if collecting_audio != 'no': 185 | logger.debug(collecting_audio) 186 | save_as_wav(b''.join(self.detect_history), hypothesis.hypstr) 187 | self.detect_history.clear() 188 | if keyword: 189 | if hypothesis.hypstr.find(keyword) >= 0: 190 | result = hypothesis.hypstr 191 | break 192 | else: 193 | self.decoder.end_utt() 194 | self.decoder.start_utt() 195 | self.detect_history.clear() 196 | else: 197 | result = hypothesis.hypstr 198 | break 199 | 200 | self.status &= ~self.detecting_mask 201 | self.stop() 202 | 203 | return result 204 | 205 | wakeup = detect 206 | 207 | def listen(self, duration=9, timeout=3): 208 | vad.reset() 209 | 210 | self.listen_countdown[0] = (duration * self.sample_rate + self.frames_per_buffer - 1) / self.frames_per_buffer 211 | self.listen_countdown[1] = (timeout * self.sample_rate + self.frames_per_buffer - 1) / self.frames_per_buffer 212 | 213 | self.listen_queue.queue.clear() 214 | self.status |= self.listening_mask 215 | self.start() 216 | pixel_ring.listen() 217 | 218 | logger.info('Start listening') 219 | 220 | def _listen(): 221 | try: 222 | data = self.listen_queue.get(timeout=timeout) 223 | while data and not self.quit_event.is_set(): 224 | yield data 225 | data = self.listen_queue.get(timeout=timeout) 226 | except Queue.Empty: 227 | pass 228 | 229 | self.stop() 230 | 231 | return _listen() 232 | 233 | def record(self, file_name, seconds=1800): 234 | self.wav = wave.open(file_name, 'wb') 235 | self.wav.setsampwidth(2) 236 | self.wav.setnchannels(1) 237 | self.wav.setframerate(self.sample_rate) 238 | self.record_countdown = (seconds * self.sample_rate + self.frames_per_buffer - 1) / self.frames_per_buffer 239 | self.status |= self.recording_mask 240 | self.start() 241 | 242 | def quit(self): 243 | self.status = 0 244 | self.quit_event.set() 245 | self.listen_queue.put('') 246 | if self.wav: 247 | self.wav.close() 248 | self.wav = None 249 | 250 | def start(self): 251 | if self.stream.is_stopped(): 252 | self.stream.start_stream() 253 | 254 | def stop(self): 255 | if not self.status and self.stream.is_active(): 256 | self.stream.stop_stream() 257 | 258 | def close(self): 259 | self.quit() 260 | self.stream.close() 261 | 262 | def _callback(self, in_data, frame_count, time_info, status): 263 | if self.status & self.recording_mask: 264 | pass 265 | 266 | if self.status & self.detecting_mask: 267 | self.detect_queue.put(in_data) 268 | 269 | if self.status & self.listening_mask: 270 | active = vad.is_speech(in_data) 271 | if active: 272 | if not self.active: 273 | for d in self.listen_history: 274 | self.listen_queue.put(d) 275 | self.listen_countdown[0] -= 1 276 | 277 | self.listen_history.clear() 278 | 279 | self.listen_queue.put(in_data) 280 | self.listen_countdown[0] -= 1 281 | else: 282 | if self.active: 283 | self.listen_queue.put(in_data) 284 | else: 285 | self.listen_history.append(in_data) 286 | 287 | self.listen_countdown[1] -= 1 288 | 289 | if self.listen_countdown[0] <= 0 or self.listen_countdown[1] <= 0: 290 | self.listen_queue.put('') 291 | self.status &= ~self.listening_mask 292 | pixel_ring.wait() 293 | logger.info('Stop listening') 294 | 295 | self.active = active 296 | 297 | return None, pyaudio.paContinue 298 | 299 | 300 | def task(quit_event): 301 | import time 302 | 303 | mic = Microphone(quit_event=quit_event) 304 | 305 | while not quit_event.is_set(): 306 | if mic.wakeup('respeaker'): 307 | print('Wake up') 308 | data = mic.listen() 309 | text = mic.recognize(data) 310 | if text: 311 | time.sleep(3) 312 | print('Recognized %s' % text) 313 | 314 | 315 | def main(): 316 | import time 317 | 318 | logging.basicConfig(level=logging.DEBUG) 319 | 320 | q = Event() 321 | t = Thread(target=task, args=(q,)) 322 | t.start() 323 | while True: 324 | try: 325 | time.sleep(1) 326 | except KeyboardInterrupt: 327 | print('Quit') 328 | q.set() 329 | break 330 | t.join() 331 | 332 | if __name__ == '__main__': 333 | main() 334 | --------------------------------------------------------------------------------