├── .gitignore ├── .gitmodules ├── .isort.cfg ├── LICENSE.md ├── README.md ├── bin ├── asr_adapter_raw2text.py ├── asr_adapter_wav2text.py ├── asr_transcribe.py ├── asr_transcribe_stream.py ├── asr_transcribe_wav.py ├── client_unix_socket.py ├── config_print.py ├── handle_adapter_json.py ├── handle_adapter_text.py ├── handle_intent.py ├── handle_text.py ├── intent_recognize.py ├── mic_adapter_raw.py ├── mic_record_sample.py ├── mic_test_energy.py ├── pipeline_run.py ├── program_download.py ├── program_install.py ├── satellite_run.py ├── server_run.py ├── snd_adapter_raw.py ├── snd_play.py ├── tts_adapter_http.py ├── tts_adapter_text2wav.py ├── tts_speak.py ├── tts_synthesize.py ├── vad_adapter_raw.py ├── vad_segment_wav.py ├── wake_adapter_raw.py └── wake_detect.py ├── config └── .gitignore ├── docs ├── README.md ├── adapters.md ├── domains.md ├── home_assistant.md ├── img │ ├── adapter.png │ ├── adapter.svg │ ├── ha_token.png │ ├── wyoming.png │ └── wyoming.svg ├── satellite.md ├── tutorial.md └── wyoming.md ├── etc ├── play_the_beatles.wav ├── porcupine.wav ├── set_timer.wav ├── sounds │ ├── beep_error.wav │ ├── beep_hi.wav │ └── beep_lo.wav ├── this_is_a_test.wav ├── turn_on_the_lamp.wav ├── what_time_is_it.wav └── whats_the_date_today.wav ├── examples └── satellite │ └── configuration.yaml ├── img ├── banner.png ├── pipeline.png ├── pipeline.svg ├── rhasspy-logo-notext.svg └── rhasspy-logo.svg ├── local └── data │ └── .gitkeep ├── mypy.ini ├── programs ├── asr │ ├── coqui-stt │ │ ├── README.md │ │ ├── bin │ │ │ ├── coqui_stt_raw2text.py │ │ │ ├── coqui_stt_server.py │ │ │ └── coqui_stt_wav2text.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── download.py │ │ │ ├── raw2text │ │ │ ├── server │ │ │ ├── setup │ │ │ └── wav2text │ ├── faster-whisper │ │ ├── README.md │ │ ├── bin │ │ │ ├── faster_whisper_server.py │ │ │ └── faster_whisper_wav2text.py │ │ ├── script │ │ │ ├── download.py │ │ │ ├── server │ │ │ ├── setup │ │ │ └── wav2text │ │ └── src │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── faster_whisper │ │ │ ├── __init__.py │ │ │ ├── audio.py │ │ │ ├── feature_extractor.py │ │ │ └── transcribe.py │ │ │ ├── requirements.conversion.txt │ │ │ ├── requirements.txt │ │ │ └── setup.py │ ├── pocketsphinx │ │ ├── README.md │ │ ├── bin │ │ │ ├── pocketsphinx_raw2text.py │ │ │ ├── pocketsphinx_server.py │ │ │ └── pocketsphinx_wav2text.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── download.py │ │ │ ├── raw2text │ │ │ ├── server │ │ │ ├── setup │ │ │ └── wav2text │ ├── vosk │ │ ├── README.md │ │ ├── bin │ │ │ ├── vosk_raw2text.py │ │ │ ├── vosk_server.py │ │ │ └── vosk_wav2text.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── download.py │ │ │ ├── raw2text │ │ │ ├── server │ │ │ ├── setup │ │ │ └── wav2text │ ├── whisper-cpp │ │ ├── .gitignore │ │ ├── Dockerfile.libwhisper │ │ ├── Dockerfile.libwhisper.dockerignore │ │ ├── README.md │ │ ├── bin │ │ │ ├── whisper_cpp_server.py │ │ │ └── whisper_cpp_wav2text.py │ │ ├── lib │ │ │ ├── Makefile │ │ │ └── whisper_cpp.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── build_libwhisper │ │ │ ├── download.py │ │ │ ├── server │ │ │ ├── setup │ │ │ ├── setup.py │ │ │ └── wav2text │ └── whisper │ │ ├── README.md │ │ ├── bin │ │ ├── whisper_server.py │ │ └── whisper_wav2text.py │ │ ├── requirements.txt │ │ └── script │ │ ├── server │ │ ├── setup │ │ └── wav2text ├── handle │ ├── date_time │ │ └── bin │ │ │ └── date_time.py │ └── home_assistant │ │ └── bin │ │ └── converse.py ├── intent │ └── regex │ │ └── bin │ │ └── regex.py ├── mic │ ├── pyaudio │ │ ├── README.md │ │ ├── bin │ │ │ ├── pyaudio_events.py │ │ │ ├── pyaudio_list_mics.py │ │ │ ├── pyaudio_raw.py │ │ │ └── pyaudio_shared.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── events │ │ │ ├── list_mics │ │ │ ├── raw │ │ │ └── setup │ ├── sounddevice │ │ ├── README.md │ │ ├── bin │ │ │ ├── sounddevice_events.py │ │ │ ├── sounddevice_list_mics.py │ │ │ ├── sounddevice_raw.py │ │ │ └── sounddevice_shared.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── events │ │ │ ├── list_mics │ │ │ ├── raw │ │ │ └── setup │ └── udp_raw │ │ └── bin │ │ └── udp_raw.py ├── remote │ └── websocket │ │ ├── bin │ │ └── stream2stream.py │ │ ├── requirements.txt │ │ └── script │ │ ├── run │ │ └── setup ├── snd │ └── udp_raw │ │ └── bin │ │ └── udp_raw.py ├── tts │ ├── coqui-tts │ │ ├── README.md │ │ ├── requirements.txt │ │ └── script │ │ │ ├── list_models │ │ │ ├── server │ │ │ └── setup │ ├── flite │ │ └── script │ │ │ ├── download.py │ │ │ └── setup │ ├── larynx │ │ ├── README.md │ │ ├── bin │ │ │ └── larynx_client.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── list_models │ │ │ ├── server │ │ │ └── setup │ ├── marytts │ │ └── bin │ │ │ └── marytts.py │ ├── mimic3 │ │ ├── README.md │ │ ├── bin │ │ │ └── mimic3_server.py │ │ ├── requirements.txt │ │ └── script │ │ │ ├── server │ │ │ └── setup │ └── piper │ │ ├── README.md │ │ ├── bin │ │ └── piper_server.py │ │ └── script │ │ ├── download.py │ │ ├── server │ │ └── setup.py ├── vad │ ├── energy │ │ └── bin │ │ │ └── energy_speech_prob.py │ ├── silero │ │ ├── README.md │ │ ├── bin │ │ │ └── silero_speech_prob.py │ │ ├── requirements.txt │ │ ├── script │ │ │ ├── setup │ │ │ └── speech_prob │ │ └── share │ │ │ └── silero_vad.onnx │ └── webrtcvad │ │ ├── README.md │ │ ├── bin │ │ └── webrtcvad_speech_prob.py │ │ ├── requirements.txt │ │ └── script │ │ ├── setup │ │ └── speech_prob └── wake │ ├── porcupine1 │ ├── bin │ │ ├── list_models.py │ │ ├── porcupine_raw_text.py │ │ ├── porcupine_shared.py │ │ └── porcupine_stream.py │ ├── requirements.txt │ └── script │ │ ├── download.py │ │ ├── list_models │ │ ├── raw2text │ │ └── setup │ ├── precise-lite │ ├── bin │ │ └── precise.py │ ├── requirements.txt │ ├── script │ │ └── setup │ └── share │ │ └── hey_mycroft.tflite │ └── snowboy │ ├── bin │ └── snowboy_raw_text.py │ ├── requirements.txt │ ├── script │ └── setup │ └── share │ ├── hey_extreme.umdl │ ├── jarvis.umdl │ ├── neoya.umdl │ ├── smart_mirror.umdl │ ├── snowboy.umdl │ ├── subex.umdl │ └── view_glass.umdl ├── pylintrc ├── requirements_dev.txt ├── requirements_http_api.txt ├── rhasspy3 ├── VERSION ├── __init__.py ├── asr.py ├── audio.py ├── config.py ├── configuration.yaml ├── core.py ├── event.py ├── handle.py ├── intent.py ├── mic.py ├── pipeline.py ├── program.py ├── py.typed ├── remote.py ├── snd.py ├── tts.py ├── util │ ├── __init__.py │ ├── dataclasses_json.py │ └── jaml.py ├── vad.py └── wake.py ├── rhasspy3_http_api ├── __init__.py ├── __main__.py ├── asr.py ├── css │ └── main.css ├── handle.py ├── img │ ├── banner.png │ └── favicon.png ├── intent.py ├── js │ ├── main.js │ └── recorder.worklet.js ├── pipeline.py ├── snd.py ├── templates │ ├── asr.html │ ├── index.html │ ├── layout.html │ ├── pipeline.html │ ├── satellite.html │ └── tts.html ├── tts.py └── wake.py ├── script ├── format ├── http_server ├── lint ├── run ├── setup ├── setup_http_server └── test ├── setup.cfg ├── setup.py ├── tests ├── test_dataclasses_json.py └── test_jaml.py └── tools └── websocket-client ├── bin └── websocket_client.py ├── requirements.txt └── script ├── run └── setup /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | *.log 4 | tmp/ 5 | 6 | *.py[cod] 7 | *.egg 8 | /build 9 | htmlcov 10 | 11 | .projectile 12 | .venv/ 13 | venv/ 14 | .mypy_cache/ 15 | *.egg-info/ 16 | 17 | /local/ 18 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "programs/asr/whisper.cpp/build/whisper.cpp"] 2 | path = programs/asr/whisper.cpp/build/whisper.cpp 3 | url = https://github.com/ggerganov/whisper.cpp 4 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output=3 3 | include_trailing_comma=True 4 | force_grid_wrap=0 5 | use_parentheses=True 6 | line_length=88 7 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Michael Hansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bin/asr_adapter_raw2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shlex 5 | import subprocess 6 | from pathlib import Path 7 | 8 | from rhasspy3.asr import Transcript 9 | from rhasspy3.audio import AudioChunk, AudioChunkConverter, AudioStop 10 | from rhasspy3.event import read_event, write_event 11 | 12 | _FILE = Path(__file__) 13 | _DIR = _FILE.parent 14 | _LOGGER = logging.getLogger(_FILE.stem) 15 | 16 | 17 | def main() -> None: 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument( 20 | "command", 21 | help="Command to run", 22 | ) 23 | parser.add_argument("--shell", action="store_true") 24 | # 25 | parser.add_argument( 26 | "--rate", 27 | type=int, 28 | help="Sample rate (hz)", 29 | ) 30 | parser.add_argument( 31 | "--width", 32 | type=int, 33 | help="Sample width bytes", 34 | ) 35 | parser.add_argument( 36 | "--channels", 37 | type=int, 38 | help="Sample channel count", 39 | ) 40 | parser.add_argument( 41 | "--debug", action="store_true", help="Print DEBUG messages to console" 42 | ) 43 | args = parser.parse_args() 44 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 45 | 46 | if args.shell: 47 | command = args.command 48 | else: 49 | command = shlex.split(args.command) 50 | 51 | proc = subprocess.Popen( 52 | command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=args.shell 53 | ) 54 | text = "" 55 | converter = AudioChunkConverter(args.rate, args.width, args.channels) 56 | 57 | with proc: 58 | assert proc.stdin is not None 59 | assert proc.stdout is not None 60 | 61 | while True: 62 | event = read_event() 63 | if event is None: 64 | break 65 | 66 | if AudioChunk.is_type(event.type): 67 | chunk = AudioChunk.from_event(event) 68 | chunk = converter.convert(chunk) 69 | proc.stdin.write(chunk.audio) 70 | proc.stdin.flush() 71 | elif AudioStop.is_type(event.type): 72 | break 73 | 74 | stdout, _stderr = proc.communicate() 75 | text = stdout.decode() 76 | 77 | write_event(Transcript(text=text.strip()).event()) 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | -------------------------------------------------------------------------------- /bin/asr_adapter_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shlex 5 | import subprocess 6 | import tempfile 7 | import wave 8 | from pathlib import Path 9 | 10 | from rhasspy3.asr import Transcript 11 | from rhasspy3.audio import AudioChunk, AudioStop 12 | from rhasspy3.event import read_event, write_event 13 | 14 | _FILE = Path(__file__) 15 | _DIR = _FILE.parent 16 | _LOGGER = logging.getLogger(_FILE.stem) 17 | 18 | 19 | def main() -> None: 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument( 22 | "command", 23 | help="Command to run", 24 | ) 25 | parser.add_argument("--shell", action="store_true") 26 | parser.add_argument( 27 | "--debug", action="store_true", help="Print DEBUG messages to console" 28 | ) 29 | args = parser.parse_args() 30 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 31 | 32 | with tempfile.NamedTemporaryFile(mode="wb+", suffix=".wav") as wav_io: 33 | args.command = args.command.format(wav_file=wav_io.name) 34 | if args.shell: 35 | command = args.command 36 | else: 37 | command = shlex.split(args.command) 38 | 39 | wav_params_set = False 40 | wav_file: wave.Wave_write = wave.open(wav_io, "wb") 41 | try: 42 | with wav_file: 43 | while True: 44 | event = read_event() 45 | if event is None: 46 | break 47 | 48 | if AudioChunk.is_type(event.type): 49 | chunk = AudioChunk.from_event(event) 50 | if not wav_params_set: 51 | wav_file.setframerate(chunk.rate) 52 | wav_file.setsampwidth(chunk.width) 53 | wav_file.setnchannels(chunk.channels) 54 | wav_params_set = True 55 | 56 | wav_file.writeframes(chunk.audio) 57 | elif AudioStop.is_type(event.type): 58 | break 59 | 60 | wav_io.seek(0) 61 | text = subprocess.check_output(command, shell=args.shell).decode() 62 | write_event(Transcript(text=text.strip()).event()) 63 | except wave.Error: 64 | pass 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /bin/client_unix_socket.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import socket 5 | import threading 6 | 7 | from rhasspy3.event import read_event, write_event 8 | 9 | _LOGGER = logging.getLogger("wrapper_unix_socket") 10 | 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("socketfile", help="Path to Unix domain socket file") 15 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 16 | args = parser.parse_args() 17 | 18 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 19 | 20 | _LOGGER.debug("Connecting to %s", args.socketfile) 21 | sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) 22 | sock.connect(args.socketfile) 23 | _LOGGER.debug("Connected") 24 | 25 | try: 26 | with sock.makefile(mode="rwb") as conn_file: 27 | read_thread = threading.Thread( 28 | target=read_proc, args=(conn_file,), daemon=True 29 | ) 30 | read_thread.start() 31 | 32 | write_thread = threading.Thread( 33 | target=write_proc, args=(conn_file,), daemon=True 34 | ) 35 | write_thread.start() 36 | write_thread.join() 37 | except KeyboardInterrupt: 38 | pass 39 | 40 | 41 | def read_proc(conn_file): 42 | try: 43 | while True: 44 | event = read_event(conn_file) 45 | if event is None: 46 | break 47 | 48 | write_event(event) 49 | except Exception: 50 | _LOGGER.exception("Unexpected error in read thread") 51 | 52 | 53 | def write_proc(conn_file): 54 | try: 55 | while True: 56 | event = read_event() 57 | if event is None: 58 | break 59 | 60 | write_event(event, conn_file) 61 | except Exception: 62 | _LOGGER.exception("Unexpected error in write thread") 63 | 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /bin/config_print.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Prints configuration as JSON.""" 3 | import argparse 4 | import json 5 | import logging 6 | import sys 7 | from pathlib import Path 8 | 9 | from rhasspy3.core import Rhasspy 10 | 11 | _FILE = Path(__file__) 12 | _DIR = _FILE.parent 13 | _LOGGER = logging.getLogger(_FILE.stem) 14 | 15 | 16 | def main() -> None: 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument( 19 | "-c", 20 | "--config", 21 | default=_DIR.parent / "config", 22 | help="Configuration directory", 23 | ) 24 | parser.add_argument("--indent", type=int, default=4) 25 | parser.add_argument( 26 | "--debug", action="store_true", help="Print DEBUG messages to console" 27 | ) 28 | args = parser.parse_args() 29 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 30 | 31 | rhasspy = Rhasspy.load(args.config) 32 | json.dump(rhasspy.config_dict, sys.stdout, indent=args.indent, ensure_ascii=False) 33 | 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /bin/handle_adapter_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shlex 5 | import subprocess 6 | from pathlib import Path 7 | 8 | from rhasspy3.asr import Transcript 9 | from rhasspy3.event import read_event, write_event 10 | from rhasspy3.handle import Handled, NotHandled 11 | 12 | _FILE = Path(__file__) 13 | _DIR = _FILE.parent 14 | _LOGGER = logging.getLogger(_FILE.stem) 15 | 16 | 17 | def main() -> None: 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument( 20 | "command", 21 | help="Command to run", 22 | ) 23 | parser.add_argument("--shell", action="store_true") 24 | parser.add_argument( 25 | "--debug", action="store_true", help="Print DEBUG messages to console" 26 | ) 27 | args = parser.parse_args() 28 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 29 | 30 | if args.shell: 31 | command = args.command 32 | else: 33 | command = shlex.split(args.command) 34 | 35 | proc = subprocess.Popen( 36 | command, 37 | stdin=subprocess.PIPE, 38 | stdout=subprocess.PIPE, 39 | shell=args.shell, 40 | universal_newlines=True, 41 | ) 42 | with proc: 43 | assert proc.stdin is not None 44 | assert proc.stdout is not None 45 | 46 | while True: 47 | event = read_event() 48 | if event is None: 49 | break 50 | 51 | if Transcript.is_type(event.type): 52 | transcript = Transcript.from_event(event) 53 | stdout, _stderr = proc.communicate(input=transcript.text) 54 | handled = False 55 | for line in stdout.splitlines(): 56 | line = line.strip() 57 | if line: 58 | write_event(Handled(text=line).event()) 59 | handled = True 60 | break 61 | 62 | if not handled: 63 | write_event(NotHandled().event()) 64 | 65 | break 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /bin/handle_intent.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Handle text or intent.""" 3 | import argparse 4 | import asyncio 5 | import json 6 | import logging 7 | import os 8 | import sys 9 | from pathlib import Path 10 | from typing import Iterable 11 | 12 | from rhasspy3.core import Rhasspy 13 | from rhasspy3.handle import handle 14 | from rhasspy3.intent import Intent 15 | 16 | _FILE = Path(__file__) 17 | _DIR = _FILE.parent 18 | _LOGGER = logging.getLogger(_FILE.stem) 19 | 20 | 21 | async def main() -> None: 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument( 24 | "-c", 25 | "--config", 26 | default=_DIR.parent / "config", 27 | help="Configuration directory", 28 | ) 29 | parser.add_argument( 30 | "-p", "--pipeline", default="default", help="Name of pipeline to use" 31 | ) 32 | parser.add_argument( 33 | "--handle-program", help="Name of handle program to use (overrides pipeline)" 34 | ) 35 | parser.add_argument("intent", nargs="*", help="Intent JSON event(s) to handle") 36 | # 37 | parser.add_argument( 38 | "--debug", action="store_true", help="Print DEBUG messages to console" 39 | ) 40 | args = parser.parse_args() 41 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 42 | 43 | rhasspy = Rhasspy.load(args.config) 44 | handle_program = args.handle_program 45 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 46 | 47 | if not handle_program: 48 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 49 | handle_program = pipeline.handle 50 | 51 | assert handle_program, "No handle program" 52 | 53 | for line in get_input(args): 54 | # Intent JSON 55 | handle_input: Intent = Intent.from_dict(json.loads(line)) 56 | handle_result = await handle(rhasspy, handle_program, handle_input) 57 | if handle_result is None: 58 | _LOGGER.warning("No result") 59 | continue 60 | 61 | _LOGGER.debug(handle_result) 62 | json.dump(handle_result.event().to_dict(), sys.stdout, ensure_ascii=False) 63 | 64 | 65 | def get_input(args: argparse.Namespace) -> Iterable[str]: 66 | """Get input from stdin or args.""" 67 | if args.intent: 68 | for event_json in args.intent: 69 | yield event_json 70 | else: 71 | if os.isatty(sys.stdin.fileno()): 72 | print("Reading input from stdin", file=sys.stderr) 73 | 74 | for line in sys.stdin: 75 | line = line.strip() 76 | if line: 77 | yield line 78 | 79 | 80 | if __name__ == "__main__": 81 | asyncio.run(main()) 82 | -------------------------------------------------------------------------------- /bin/handle_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Handle text or intent.""" 3 | import argparse 4 | import asyncio 5 | import json 6 | import logging 7 | import os 8 | import sys 9 | from pathlib import Path 10 | from typing import Iterable 11 | 12 | from rhasspy3.asr import Transcript 13 | from rhasspy3.core import Rhasspy 14 | from rhasspy3.handle import handle 15 | 16 | _FILE = Path(__file__) 17 | _DIR = _FILE.parent 18 | _LOGGER = logging.getLogger(_FILE.stem) 19 | 20 | 21 | async def main() -> None: 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument( 24 | "-c", 25 | "--config", 26 | default=_DIR.parent / "config", 27 | help="Configuration directory", 28 | ) 29 | parser.add_argument( 30 | "-p", "--pipeline", default="default", help="Name of pipeline to use" 31 | ) 32 | parser.add_argument( 33 | "--handle-program", help="Name of handle program to use (overrides pipeline)" 34 | ) 35 | parser.add_argument("text", nargs="*", help="Text input to handle") 36 | # 37 | parser.add_argument( 38 | "--debug", action="store_true", help="Print DEBUG messages to console" 39 | ) 40 | args = parser.parse_args() 41 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 42 | 43 | rhasspy = Rhasspy.load(args.config) 44 | handle_program = args.handle_program 45 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 46 | 47 | if not handle_program: 48 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 49 | handle_program = pipeline.handle 50 | 51 | assert handle_program, "No handle program" 52 | 53 | for line in get_input(args): 54 | # Text 55 | handle_input = Transcript(text=line) 56 | handle_result = await handle(rhasspy, handle_program, handle_input) 57 | if handle_result is None: 58 | _LOGGER.warning("No result") 59 | continue 60 | 61 | _LOGGER.debug(handle_result) 62 | json.dump(handle_result.event().to_dict(), sys.stdout, ensure_ascii=False) 63 | 64 | 65 | def get_input(args: argparse.Namespace) -> Iterable[str]: 66 | """Get input from stdin or args.""" 67 | if args.text: 68 | for text in args.text: 69 | yield text 70 | else: 71 | if os.isatty(sys.stdin.fileno()): 72 | print("Reading input from stdin", file=sys.stderr) 73 | 74 | for line in sys.stdin: 75 | line = line.strip() 76 | if line: 77 | yield line 78 | 79 | 80 | if __name__ == "__main__": 81 | asyncio.run(main()) 82 | -------------------------------------------------------------------------------- /bin/intent_recognize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import asyncio 4 | import json 5 | import logging 6 | import os 7 | import sys 8 | from pathlib import Path 9 | from typing import Iterable 10 | 11 | from rhasspy3.core import Rhasspy 12 | from rhasspy3.intent import recognize 13 | 14 | _FILE = Path(__file__) 15 | _DIR = _FILE.parent 16 | _LOGGER = logging.getLogger(_FILE.stem) 17 | 18 | 19 | async def main() -> None: 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | default=_DIR.parent / "config", 25 | help="Configuration directory", 26 | ) 27 | parser.add_argument( 28 | "-p", "--pipeline", default="default", help="Name of pipeline to use" 29 | ) 30 | parser.add_argument( 31 | "--intent-program", help="Name of intent program to use (overrides pipeline)" 32 | ) 33 | parser.add_argument("text", nargs="*", help="Text to recognize") 34 | parser.add_argument( 35 | "--debug", action="store_true", help="Print DEBUG messages to console" 36 | ) 37 | args = parser.parse_args() 38 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 39 | 40 | rhasspy = Rhasspy.load(args.config) 41 | intent_program = args.intent_program 42 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 43 | 44 | if not intent_program: 45 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 46 | intent_program = pipeline.intent 47 | 48 | assert intent_program, "No intent program" 49 | 50 | for text in get_texts(args): 51 | intent_result = await recognize(rhasspy, intent_program, text) 52 | if intent_result is None: 53 | continue 54 | 55 | json.dump(intent_result.event().data, sys.stdout, ensure_ascii=False) 56 | print("", flush=True) 57 | 58 | 59 | def get_texts(args: argparse.Namespace) -> Iterable[str]: 60 | if args.text: 61 | for text in args.text: 62 | yield text 63 | else: 64 | if os.isatty(sys.stdin.fileno()): 65 | print("Reading text from stdin", file=sys.stderr) 66 | 67 | for line in sys.stdin: 68 | line = line.strip() 69 | if line: 70 | yield line 71 | 72 | 73 | if __name__ == "__main__": 74 | asyncio.run(main()) 75 | -------------------------------------------------------------------------------- /bin/mic_adapter_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Reads raw audio chunks from stdin.""" 3 | import argparse 4 | import logging 5 | import shlex 6 | import subprocess 7 | import time 8 | from pathlib import Path 9 | 10 | from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK, AudioChunk, AudioStart 11 | from rhasspy3.event import write_event 12 | 13 | _FILE = Path(__file__) 14 | _DIR = _FILE.parent 15 | _LOGGER = logging.getLogger(_FILE.stem) 16 | 17 | 18 | def main() -> None: 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument( 21 | "command", 22 | help="Command to run", 23 | ) 24 | parser.add_argument("--shell", action="store_true", help="Run command with shell") 25 | # 26 | parser.add_argument( 27 | "--samples-per-chunk", 28 | type=int, 29 | default=DEFAULT_SAMPLES_PER_CHUNK, 30 | help="Number of samples to read at a time from command", 31 | ) 32 | parser.add_argument( 33 | "--rate", 34 | type=int, 35 | required=True, 36 | help="Sample rate (hz)", 37 | ) 38 | parser.add_argument( 39 | "--width", 40 | type=int, 41 | required=True, 42 | help="Sample width bytes", 43 | ) 44 | parser.add_argument( 45 | "--channels", 46 | type=int, 47 | required=True, 48 | help="Sample channel count", 49 | ) 50 | # 51 | parser.add_argument( 52 | "--debug", action="store_true", help="Print DEBUG messages to console" 53 | ) 54 | args = parser.parse_args() 55 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 56 | 57 | bytes_per_chunk = args.samples_per_chunk * args.width * args.channels 58 | 59 | if args.shell: 60 | command = args.command 61 | else: 62 | command = shlex.split(args.command) 63 | 64 | proc = subprocess.Popen(command, stdout=subprocess.PIPE) 65 | with proc: 66 | assert proc.stdout is not None 67 | 68 | write_event( 69 | AudioStart( 70 | args.rate, args.width, args.channels, timestamp=time.monotonic_ns() 71 | ).event() 72 | ) 73 | while True: 74 | audio_bytes = proc.stdout.read(bytes_per_chunk) 75 | if not audio_bytes: 76 | break 77 | 78 | write_event( 79 | AudioChunk( 80 | args.rate, 81 | args.width, 82 | args.channels, 83 | audio_bytes, 84 | timestamp=time.monotonic_ns(), 85 | ).event() 86 | ) 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /bin/program_download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shlex 5 | import string 6 | import subprocess 7 | from pathlib import Path 8 | 9 | from rhasspy3.core import Rhasspy 10 | 11 | _FILE = Path(__file__) 12 | _DIR = _FILE.parent 13 | _LOGGER = logging.getLogger(_FILE.stem) 14 | 15 | 16 | def main() -> None: 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("domain") 19 | parser.add_argument("program") 20 | parser.add_argument("model") 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | default=_DIR.parent / "config", 25 | help="Configuration directory", 26 | ) 27 | parser.add_argument( 28 | "--debug", action="store_true", help="Print DEBUG messages to console" 29 | ) 30 | args = parser.parse_args() 31 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 32 | 33 | rhasspy = Rhasspy.load(args.config) 34 | program_config = rhasspy.config.programs.get(args.domain, {}).get(args.program) 35 | assert program_config is not None, f"No config for {args.domain} {args.program}" 36 | 37 | install = program_config.install 38 | assert install is not None, f"No install config for {args.domain} {args.program}" 39 | 40 | downloads = install.downloads 41 | assert downloads is not None, f"No downloads for {args.domain} {args.program}" 42 | 43 | model = downloads.get(args.model) 44 | assert ( 45 | model is not None 46 | ), f"No download named {args.model} for {args.domain} {args.program}" 47 | 48 | program_dir = rhasspy.programs_dir / args.domain / args.program 49 | data_dir = rhasspy.data_dir / args.domain / args.program 50 | 51 | default_mapping = { 52 | "program_dir": str(program_dir.absolute()), 53 | "data_dir": str(data_dir.absolute()), 54 | "model": str(args.model), 55 | } 56 | 57 | # Check if already installed 58 | if model.check_file is not None: 59 | check_file = Path( 60 | string.Template(model.check_file).safe_substitute(default_mapping) 61 | ) 62 | if check_file.exists(): 63 | _LOGGER.info("Installed: %s", check_file) 64 | return 65 | 66 | download = install.download 67 | assert download is not None, f"No download config for {args.domain} {args.program}" 68 | 69 | download_command = string.Template(download.command).safe_substitute( 70 | default_mapping 71 | ) 72 | _LOGGER.info(download_command) 73 | 74 | cwd = program_dir if program_dir.exists() else rhasspy.config_dir 75 | 76 | if download.shell: 77 | subprocess.check_call(download_command, shell=True, cwd=cwd) 78 | else: 79 | subprocess.check_call(shlex.split(download_command), cwd=cwd) 80 | 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /bin/program_install.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shlex 5 | import string 6 | import subprocess 7 | from pathlib import Path 8 | 9 | from rhasspy3.core import Rhasspy 10 | 11 | _FILE = Path(__file__) 12 | _DIR = _FILE.parent 13 | _LOGGER = logging.getLogger(_FILE.stem) 14 | 15 | 16 | def main() -> None: 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("domain") 19 | parser.add_argument("program") 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | default=_DIR.parent / "config", 24 | help="Configuration directory", 25 | ) 26 | parser.add_argument( 27 | "--debug", action="store_true", help="Print DEBUG messages to console" 28 | ) 29 | args = parser.parse_args() 30 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 31 | 32 | rhasspy = Rhasspy.load(args.config) 33 | program_config = rhasspy.config.programs.get(args.domain, {}).get(args.program) 34 | assert program_config is not None, f"No config for {args.domain} {args.program}" 35 | 36 | install = program_config.install 37 | assert install is not None, f"No install config for {args.domain} {args.program}" 38 | 39 | program_dir = rhasspy.programs_dir / args.domain / args.program 40 | data_dir = rhasspy.data_dir / args.domain / args.program 41 | 42 | default_mapping = { 43 | "program_dir": str(program_dir.absolute()), 44 | "data_dir": str(data_dir.absolute()), 45 | } 46 | 47 | # Check if already installed 48 | if install.check_file is not None: 49 | check_file = Path( 50 | string.Template(install.check_file).safe_substitute(default_mapping) 51 | ) 52 | if check_file.exists(): 53 | _LOGGER.info("Installed: %s", check_file) 54 | return 55 | 56 | install_command = string.Template(install.command).safe_substitute(default_mapping) 57 | _LOGGER.debug(install_command) 58 | 59 | cwd = program_dir if program_dir.exists() else rhasspy.config_dir 60 | 61 | if install.shell: 62 | subprocess.check_call(install_command, shell=True, cwd=cwd) 63 | else: 64 | subprocess.check_call(shlex.split(install_command), cwd=cwd) 65 | 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /bin/snd_adapter_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Play audio through a command that accepts raw PCM.""" 3 | import argparse 4 | import logging 5 | import shlex 6 | import subprocess 7 | from pathlib import Path 8 | 9 | from rhasspy3.audio import ( 10 | DEFAULT_OUT_CHANNELS, 11 | DEFAULT_OUT_RATE, 12 | DEFAULT_OUT_WIDTH, 13 | AudioChunk, 14 | AudioChunkConverter, 15 | AudioStop, 16 | ) 17 | from rhasspy3.event import read_event, write_event 18 | from rhasspy3.snd import Played 19 | 20 | _FILE = Path(__file__) 21 | _DIR = _FILE.parent 22 | _LOGGER = logging.getLogger(_FILE.stem) 23 | 24 | 25 | def main() -> None: 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument( 28 | "command", 29 | help="Command to run", 30 | ) 31 | parser.add_argument( 32 | "--rate", type=int, default=DEFAULT_OUT_RATE, help="Sample rate (hertz)" 33 | ) 34 | parser.add_argument( 35 | "--width", type=int, default=DEFAULT_OUT_WIDTH, help="Sample width (bytes)" 36 | ) 37 | parser.add_argument( 38 | "--channels", 39 | type=int, 40 | default=DEFAULT_OUT_CHANNELS, 41 | help="Sample channel count", 42 | ) 43 | parser.add_argument("--shell", action="store_true", help="Run command with shell") 44 | parser.add_argument( 45 | "--debug", action="store_true", help="Print DEBUG messages to console" 46 | ) 47 | args = parser.parse_args() 48 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 49 | 50 | if args.shell: 51 | command = args.command 52 | else: 53 | command = shlex.split(args.command) 54 | 55 | try: 56 | proc = subprocess.Popen( 57 | command, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL 58 | ) 59 | assert proc.stdin is not None 60 | 61 | converter = AudioChunkConverter(args.rate, args.width, args.channels) 62 | with proc: 63 | while True: 64 | event = read_event() 65 | if event is None: 66 | break 67 | 68 | if AudioChunk.is_type(event.type): 69 | chunk = AudioChunk.from_event(event) 70 | chunk = converter.convert(chunk) 71 | proc.stdin.write(chunk.audio) 72 | proc.stdin.flush() 73 | elif AudioStop.is_type(event.type): 74 | break 75 | finally: 76 | write_event(Played().event()) 77 | 78 | 79 | if __name__ == "__main__": 80 | main() 81 | -------------------------------------------------------------------------------- /bin/snd_play.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import asyncio 4 | import logging 5 | import os 6 | import sys 7 | from pathlib import Path 8 | 9 | from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK 10 | from rhasspy3.core import Rhasspy 11 | from rhasspy3.snd import play 12 | 13 | _FILE = Path(__file__) 14 | _DIR = _FILE.parent 15 | _LOGGER = logging.getLogger(_FILE.stem) 16 | 17 | 18 | async def main() -> None: 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("wav_file", nargs="*", help="Path to WAV file(s) to play") 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | default=_DIR.parent / "config", 25 | help="Configuration directory", 26 | ) 27 | parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline") 28 | parser.add_argument("--snd-program", help="Audio output program name") 29 | parser.add_argument( 30 | "--samples-per-chunk", 31 | type=int, 32 | default=DEFAULT_SAMPLES_PER_CHUNK, 33 | help="Samples to send to snd program at a time", 34 | ) 35 | parser.add_argument( 36 | "--debug", action="store_true", help="Print DEBUG messages to console" 37 | ) 38 | args = parser.parse_args() 39 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 40 | 41 | rhasspy = Rhasspy.load(args.config) 42 | snd_program = args.snd_program 43 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 44 | 45 | if not snd_program: 46 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 47 | snd_program = pipeline.snd 48 | 49 | assert snd_program, "No snd program" 50 | 51 | if args.wav_file: 52 | for wav_path in args.wav_file: 53 | with open(wav_path, "rb") as wav_file: 54 | await play(rhasspy, snd_program, wav_file, args.samples_per_chunk) 55 | else: 56 | if os.isatty(sys.stdin.fileno()): 57 | print("Reading WAV data from stdin", file=sys.stderr) 58 | 59 | await play( 60 | rhasspy, 61 | snd_program, 62 | sys.stdin.buffer, 63 | args.samples_per_chunk, 64 | ) 65 | 66 | 67 | if __name__ == "__main__": 68 | asyncio.run(main()) 69 | -------------------------------------------------------------------------------- /bin/tts_speak.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Synthesize and speak audio.""" 3 | import argparse 4 | import asyncio 5 | import io 6 | import json 7 | import logging 8 | import os 9 | import sys 10 | from pathlib import Path 11 | 12 | from rhasspy3.core import Rhasspy 13 | from rhasspy3.snd import play 14 | from rhasspy3.tts import synthesize 15 | 16 | _FILE = Path(__file__) 17 | _DIR = _FILE.parent 18 | _LOGGER = logging.getLogger(_FILE.stem) 19 | 20 | 21 | async def main() -> None: 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("text", nargs="*", help="Text to speak (default: stdin)") 24 | parser.add_argument( 25 | "-c", 26 | "--config", 27 | default=_DIR.parent / "config", 28 | help="Configuration directory", 29 | ) 30 | parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline") 31 | parser.add_argument("--tts-program", help="TTS program name") 32 | parser.add_argument("--snd-program", help="Audio output program name") 33 | parser.add_argument( 34 | "--samples-per-chunk", 35 | type=int, 36 | default=1024, 37 | help="Samples to send to snd program at a time", 38 | ) 39 | parser.add_argument( 40 | "--debug", action="store_true", help="Print DEBUG messages to console" 41 | ) 42 | args = parser.parse_args() 43 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 44 | 45 | rhasspy = Rhasspy.load(args.config) 46 | tts_program = args.tts_program 47 | snd_program = args.snd_program 48 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 49 | 50 | if not tts_program: 51 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 52 | tts_program = pipeline.tts 53 | 54 | if not snd_program: 55 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 56 | snd_program = pipeline.snd 57 | 58 | assert tts_program, "No tts program" 59 | assert snd_program, "No snd program" 60 | 61 | if args.text: 62 | lines = args.text 63 | else: 64 | lines = sys.stdin 65 | if os.isatty(sys.stdin.fileno()): 66 | print("Reading text from stdin", file=sys.stderr) 67 | 68 | for line in lines: 69 | line = line.strip() 70 | if not line: 71 | continue 72 | 73 | with io.BytesIO() as wav_io: 74 | await synthesize(rhasspy, tts_program, line, wav_io) 75 | wav_io.seek(0) 76 | play_result = await play( 77 | rhasspy, snd_program, wav_io, args.samples_per_chunk 78 | ) 79 | if play_result is not None: 80 | json.dump(play_result.event().to_dict(), sys.stdout, ensure_ascii=False) 81 | print("", flush=True) 82 | 83 | 84 | if __name__ == "__main__": 85 | asyncio.run(main()) 86 | -------------------------------------------------------------------------------- /bin/tts_synthesize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Synthesize WAV audio from text.""" 3 | import argparse 4 | import asyncio 5 | import io 6 | import logging 7 | import sys 8 | from pathlib import Path 9 | 10 | from rhasspy3.core import Rhasspy 11 | from rhasspy3.tts import synthesize 12 | 13 | _FILE = Path(__file__) 14 | _DIR = _FILE.parent 15 | _LOGGER = logging.getLogger(_FILE.stem) 16 | 17 | 18 | async def main() -> None: 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("text", help="Text to speak") 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | default=_DIR.parent / "config", 25 | help="Configuration directory", 26 | ) 27 | parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline") 28 | parser.add_argument("--tts-program", help="TTS program name") 29 | parser.add_argument("-f", "--file", help="Write to file instead of stdout") 30 | parser.add_argument( 31 | "--debug", action="store_true", help="Print DEBUG messages to console" 32 | ) 33 | args = parser.parse_args() 34 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 35 | 36 | rhasspy = Rhasspy.load(args.config) 37 | tts_program = args.tts_program 38 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 39 | 40 | if not tts_program: 41 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 42 | tts_program = pipeline.tts 43 | 44 | assert tts_program, "No tts program" 45 | 46 | with io.BytesIO() as wav_out: 47 | await synthesize(rhasspy, tts_program, args.text, wav_out) 48 | wav_bytes = wav_out.getvalue() 49 | 50 | if args.file: 51 | output_path = Path(args.file) 52 | output_path.parent.mkdir(parents=True, exist_ok=True) 53 | output_path.write_bytes(wav_bytes) 54 | else: 55 | sys.stdout.buffer.write(wav_bytes) 56 | 57 | 58 | if __name__ == "__main__": 59 | asyncio.run(main()) 60 | -------------------------------------------------------------------------------- /bin/wake_adapter_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Wake word detection with a command that accepts raw PCM audio and prints a line for each detection.""" 3 | import argparse 4 | import logging 5 | import shlex 6 | import subprocess 7 | import threading 8 | import time 9 | from dataclasses import dataclass 10 | from pathlib import Path 11 | from typing import IO 12 | 13 | from rhasspy3.audio import AudioChunk, AudioStop 14 | from rhasspy3.event import read_event, write_event 15 | from rhasspy3.wake import Detection, NotDetected 16 | 17 | _FILE = Path(__file__) 18 | _DIR = _FILE.parent 19 | _LOGGER = logging.getLogger(_FILE.stem) 20 | 21 | 22 | @dataclass 23 | class State: 24 | timestamp: int = 0 25 | detected: bool = False 26 | 27 | 28 | def main() -> None: 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument( 31 | "command", 32 | help="Command to run", 33 | ) 34 | parser.add_argument( 35 | "--debug", action="store_true", help="Print DEBUG messages to console" 36 | ) 37 | args = parser.parse_args() 38 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 39 | 40 | command = shlex.split(args.command) 41 | with subprocess.Popen( 42 | command, stdin=subprocess.PIPE, stdout=subprocess.PIPE 43 | ) as proc: 44 | assert proc.stdin is not None 45 | assert proc.stdout is not None 46 | 47 | state = State() 48 | threading.Thread(target=write_proc, args=(proc.stdout, state)).start() 49 | 50 | while not state.detected: 51 | event = read_event() 52 | if event is None: 53 | break 54 | 55 | if AudioChunk.is_type(event.type): 56 | chunk = AudioChunk.from_event(event) 57 | state.timestamp = ( 58 | chunk.timestamp 59 | if chunk.timestamp is not None 60 | else time.monotonic_ns() 61 | ) 62 | proc.stdin.write(chunk.audio) 63 | proc.stdin.flush() 64 | elif AudioStop.is_type(event.type): 65 | proc.stdin.close() 66 | break 67 | 68 | if not state.detected: 69 | write_event(NotDetected().event()) 70 | 71 | 72 | def write_proc(reader: IO[bytes], state: State): 73 | try: 74 | for line in reader: 75 | line = line.strip() 76 | if line: 77 | write_event( 78 | Detection(name=line.decode(), timestamp=state.timestamp).event() 79 | ) 80 | state.detected = True 81 | break 82 | except Exception: 83 | _LOGGER.exception("Unexpected error in write thread") 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /bin/wake_detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Wait for wake word to be detected.""" 3 | import argparse 4 | import asyncio 5 | import json 6 | import logging 7 | import sys 8 | from pathlib import Path 9 | 10 | from rhasspy3.core import Rhasspy 11 | from rhasspy3.mic import DOMAIN as MIC_DOMAIN 12 | from rhasspy3.program import create_process 13 | from rhasspy3.wake import detect 14 | 15 | _FILE = Path(__file__) 16 | _DIR = _FILE.parent 17 | _LOGGER = logging.getLogger(_FILE.stem) 18 | 19 | 20 | async def main() -> None: 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument( 23 | "-c", 24 | "--config", 25 | default=_DIR.parent / "config", 26 | help="Configuration directory", 27 | ) 28 | parser.add_argument( 29 | "-p", "--pipeline", default="default", help="Name of pipeline to use" 30 | ) 31 | parser.add_argument( 32 | "--mic-program", help="Name of mic program to use (overrides pipeline)" 33 | ) 34 | parser.add_argument( 35 | "--wake-program", help="Name of wake program to use (overrides pipeline)" 36 | ) 37 | # 38 | parser.add_argument("--loop", action="store_true", help="Keep detecting wake words") 39 | parser.add_argument( 40 | "--debug", action="store_true", help="Print DEBUG messages to console" 41 | ) 42 | args = parser.parse_args() 43 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 44 | 45 | rhasspy = Rhasspy.load(args.config) 46 | mic_program = args.mic_program 47 | wake_program = args.wake_program 48 | pipeline = rhasspy.config.pipelines.get(args.pipeline) 49 | 50 | if not mic_program: 51 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 52 | mic_program = pipeline.mic 53 | 54 | assert mic_program, "No mic program" 55 | _LOGGER.debug("mic program: %s", mic_program) 56 | 57 | if not wake_program: 58 | assert pipeline is not None, f"No pipeline named {args.pipeline}" 59 | wake_program = pipeline.wake 60 | 61 | assert wake_program, "No wake program" 62 | _LOGGER.debug("wake program: %s", wake_program) 63 | 64 | # Detect wake word 65 | while True: 66 | async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc: 67 | assert mic_proc.stdout is not None 68 | _LOGGER.debug("Detecting wake word") 69 | detection = await detect(rhasspy, wake_program, mic_proc.stdout) 70 | if detection is not None: 71 | json.dump(detection.event().to_dict(), sys.stdout, ensure_ascii=False) 72 | print("", flush=True) 73 | 74 | if not args.loop: 75 | break 76 | 77 | 78 | if __name__ == "__main__": 79 | try: 80 | asyncio.run(main()) 81 | except KeyboardInterrupt: 82 | pass 83 | -------------------------------------------------------------------------------- /config/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Rhasspy 3 2 | 3 | * [Tutorial](tutorial.md) 4 | * [Domains](domains.md) 5 | * [Wyoming Protcol](wyoming.md) 6 | * [Adapters](adapters.md) 7 | -------------------------------------------------------------------------------- /docs/adapters.md: -------------------------------------------------------------------------------- 1 | # Adapters 2 | 3 | Scripts in `bin/`: 4 | 5 | * `asr_adapter_raw2text.py` 6 | * Raw audio stream in, text or JSON out 7 | * `asr_adapter_wav2text.py` 8 | * WAV file(s) in, text or JSON out (per file) 9 | * `handle_adapter_json.py` 10 | * Intent JSON in, text response out 11 | * `handle_adapter_text.py` 12 | * Transcription in, text response out 13 | * `mic_adapter_raw.py` 14 | * Raw audio stream in 15 | * `snd_adapter_raw.py` 16 | * Raw audio stream out 17 | * `tts_adapter_http.py` 18 | * HTTP POST to endpoint with text, WAV out 19 | * `tts_adapter_text2wav.py` 20 | * Text in, WAV out 21 | * `vad_adapter_raw.py` 22 | * Raw audio stream in, speech probability out (one line per chunk) 23 | * `wake_adapter_raw.py` 24 | * Raw audio stream in, name of detected model out (one line per detection) 25 | * `client_unix_socket.py` 26 | * Send/receive events over Unix domain socket 27 | 28 | 29 | ![Wyoming protocol adapter](img/adapter.png) 30 | -------------------------------------------------------------------------------- /docs/domains.md: -------------------------------------------------------------------------------- 1 | # Domains 2 | 3 | Programs belong to a specific domain. This defines the kinds of [events](wyoming.md) they are expected to receive and emit. 4 | 5 | ## mic 6 | 7 | Emits `audio-chunk` events, ideally with a `timestamp`. 8 | 9 | 10 | ## wake 11 | 12 | Receives `audio-chunk` events. 13 | Emits `detection` event(s) or a `not-detected` event if the program exits without a detection. 14 | 15 | 16 | ## asr 17 | 18 | Receives an `audio-start` event, followed by zero or more `audio-chunk` events. 19 | 20 | An `audio-stop` event must trigger a `transcript` event to be emitted. 21 | 22 | 23 | ## vad 24 | 25 | Receives `audio-chunk` events. 26 | 27 | Emits `voice-started` with the `timestamp` of the `audio-chunk` when the user started speaking. 28 | 29 | Emits `voice-stopped` with the `timestamp` of the `audio-chunk` when the user finished speaking. 30 | 31 | 32 | ## intent 33 | 34 | Optional. The `handle` domain can handle `transcript` events directly. 35 | 36 | Receives `recognize` events. 37 | 38 | Emits either an `intent` or a `not-recognized` event. 39 | 40 | 41 | ## handle 42 | 43 | Receives one of the following event types: `transcript`, `intent`, or `not-recognized`. 44 | 45 | Emits either a `handle` or `not-handled` event. 46 | 47 | 48 | ## tts 49 | 50 | Receives a `synthesize` event. 51 | 52 | Emits an `audio-start` event followed by zero or more `audio-chunk` events, and then an `audio-stop` event. 53 | 54 | 55 | ## snd 56 | 57 | Receives `audio-chunk` events until an `audio-stop` event. 58 | 59 | Must emit `played` event when audio has finished playing. 60 | -------------------------------------------------------------------------------- /docs/home_assistant.md: -------------------------------------------------------------------------------- 1 | # Home Assistant 2 | 3 | This will connect Rhasspy to Home Assistant via [Assist](https://www.home-assistant.io/docs/assist). 4 | 5 | Install the Home Assistant intent handler: 6 | 7 | ```sh 8 | mkdir -p config/programs/handle/ 9 | cp -R programs/handle/home_assistant config/programs/handle/ 10 | ``` 11 | 12 | Create a long-lived access token in Home Assistant (inside your profile): 13 | 14 | ![Long-lived access token](img/ha_token.png) 15 | 16 | Copy the **entire** access token (with CTRL+A, not just selecting what you can see) and put it in the data directory: 17 | 18 | ```sh 19 | mkdir -p config/data/handle/home_assistant/ 20 | echo "MY-LONG-LIVED-ACCESS-TOKEN" > config/data/handle/home_assistant/token 21 | ``` 22 | 23 | Add to your `configuration.yaml`: 24 | 25 | 26 | ```yaml 27 | programs: 28 | handle: 29 | home_assistant: 30 | command: | 31 | bin/converse.py --language "${language}" "${url}" "${token_file}" 32 | adapter: | 33 | handle_adapter_text.py 34 | template_args: 35 | url: "http://localhost:8123/api/conversation/process" 36 | token_file: "${data_dir}/token" 37 | language: "en" 38 | 39 | pipelines: 40 | default: 41 | mic: ... 42 | vad: ... 43 | asr: ... 44 | wake: ... 45 | handle: 46 | name: home_assistant 47 | tts: ... 48 | snd: ... 49 | ``` 50 | 51 | Make sure your Home Assistant server is running, and test out a command: 52 | 53 | ```sh 54 | script/run bin/handle_text.py "Turn on the bed light" 55 | ``` 56 | 57 | Replace "bed light" with the name of a device you have connected to Home Assistant. 58 | 59 | If successful, you should see JSON printed with the response text, like: 60 | 61 | ```sh 62 | {"type": "handled", "data": {"text": "Turned on light"}} 63 | ``` 64 | 65 | This also works over HTTP: 66 | 67 | ```sh 68 | curl -X POST --data 'Turn on the bed light' 'localhost:13331/handle/handle' 69 | ``` 70 | 71 | Now you can run your full pipeline and control Home Assistant! 72 | -------------------------------------------------------------------------------- /docs/img/adapter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/docs/img/adapter.png -------------------------------------------------------------------------------- /docs/img/ha_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/docs/img/ha_token.png -------------------------------------------------------------------------------- /docs/img/wyoming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/docs/img/wyoming.png -------------------------------------------------------------------------------- /docs/satellite.md: -------------------------------------------------------------------------------- 1 | # Satellite 2 | 3 | Once you have a Rhasspy HTTP server running, you can use Rhasspy as a satellite on a separate device. 4 | 5 | **NOTE:** Rhasspy satellites do not need to run Python or any Rhasspy software. They can use the websocket API directly, or talk directly to a running pipeline. 6 | 7 | On your satellite, clone the repo: 8 | 9 | ```sh 10 | git clone https://github.com/rhasspy/rhasspy3 11 | cd rhasspy3 12 | ``` 13 | 14 | Install the websocket utility: 15 | 16 | ```sh 17 | mkdir -p config/programs/remote/ 18 | cp -R programs/remote/websocket config/programs/remote/ 19 | config/programs/remote/websocket/script/setup 20 | ``` 21 | 22 | Install [Porcupine](https://github.com/Picovoice/porcupine): 23 | 24 | ```sh 25 | mkdir -p config/programs/wake/ 26 | cp -R programs/wake/porcupine1 config/programs/wake/ 27 | config/programs/wake/porcupine1/script/setup 28 | ``` 29 | 30 | Check available wake word models by running 31 | 32 | ```sh 33 | config/programs/wake/porcupine1/script/list_models 34 | ``` 35 | 36 | and choose one. We'll use "porcupine_linux.ppn" as an example, but this will be **different on a Raspberry Pi**. 37 | 38 | Next, create `config/configuration.yaml` with: 39 | 40 | ```yaml 41 | programs: 42 | mic: 43 | arecord: 44 | command: | 45 | arecord -q -r 16000 -c 1 -f S16_LE -t raw - 46 | adapter: | 47 | mic_adapter_raw.py --samples-per-chunk 1024 --rate 16000 --width 2 --channels 1 48 | 49 | wake: 50 | porcupine1: 51 | command: | 52 | .venv/bin/python3 bin/porcupine_stream.py --model "${model}" 53 | template_args: 54 | model: "porcupine_linux.ppn" 55 | 56 | remote: 57 | websocket: 58 | command: | 59 | script/run "${uri}" 60 | template_args: 61 | uri: "ws://localhost:13331/pipeline/asr-tts" 62 | 63 | satellites: 64 | default: 65 | mic: 66 | name: arecord 67 | wake: 68 | name: porcupine1 69 | remote: 70 | name: websocket 71 | snd: 72 | name: aplay 73 | ``` 74 | 75 | Replace the model in `porcupine1` with your selection, and adjust the URI in `websocket` to point to your Rhasspy server. 76 | 77 | Now you can run your satellite: 78 | 79 | ```sh 80 | script/run bin/satellite_run.py --debug --loop 81 | ``` 82 | 83 | (say "porcupine", *pause*, say voice command, *wait*) 84 | 85 | If everything is working, you should hear a response being spoken. Press CTRL+C to quit. 86 | -------------------------------------------------------------------------------- /etc/play_the_beatles.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/play_the_beatles.wav -------------------------------------------------------------------------------- /etc/porcupine.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/porcupine.wav -------------------------------------------------------------------------------- /etc/set_timer.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/set_timer.wav -------------------------------------------------------------------------------- /etc/sounds/beep_error.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/sounds/beep_error.wav -------------------------------------------------------------------------------- /etc/sounds/beep_hi.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/sounds/beep_hi.wav -------------------------------------------------------------------------------- /etc/sounds/beep_lo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/sounds/beep_lo.wav -------------------------------------------------------------------------------- /etc/this_is_a_test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/this_is_a_test.wav -------------------------------------------------------------------------------- /etc/turn_on_the_lamp.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/turn_on_the_lamp.wav -------------------------------------------------------------------------------- /etc/what_time_is_it.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/what_time_is_it.wav -------------------------------------------------------------------------------- /etc/whats_the_date_today.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/etc/whats_the_date_today.wav -------------------------------------------------------------------------------- /examples/satellite/configuration.yaml: -------------------------------------------------------------------------------- 1 | satellites: 2 | default: 3 | mic: 4 | name: arecord 5 | template_args: 6 | device: "default" 7 | wake: 8 | name: porcupine1 9 | template_args: 10 | model: "porcupine_raspberry-pi.ppn" 11 | remote: 12 | name: websocket 13 | template_args: 14 | uri: "ws://homeassistant.local:13331/pipeline/asr-tts" 15 | snd: 16 | name: aplay 17 | template_args: 18 | device: "default" 19 | -------------------------------------------------------------------------------- /img/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/img/banner.png -------------------------------------------------------------------------------- /img/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/img/pipeline.png -------------------------------------------------------------------------------- /local/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/local/data/.gitkeep -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = true 3 | 4 | [mypy-setuptools.*] 5 | ignore_missing_imports = True 6 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/README.md: -------------------------------------------------------------------------------- 1 | # Coqui STT 2 | 3 | Speech to text service for Rhasspy based on [Coqui STT](https://stt.readthedocs.io/en/latest/). 4 | 5 | Additional models can be downloaded here: https://coqui.ai/models/ 6 | 7 | 8 | ## Installation 9 | 10 | 1. Copy the contents of this directory to `config/programs/asr/coqui-stt/` 11 | 2. Run `script/setup` 12 | 3. Download a model with `script/download.py` 13 | * Example: `script/download.py en_large` 14 | * Models are downloaded to `config/data/asr/coqui-stt` directory 15 | 4. Test with `script/wav2text` 16 | * Example `script/wav2text /path/to/english_v1.0.0-large-vocab/ /path/to/test.wav` 17 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/bin/coqui_stt_raw2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | from stt import Model 9 | 10 | _LOGGER = logging.getLogger("coqui_stt_raw2text") 11 | 12 | 13 | def main() -> None: 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("model", help="Path to Coqui STT model directory") 16 | parser.add_argument( 17 | "--scorer", help="Path to scorer (default: .scorer file in model directory)" 18 | ) 19 | parser.add_argument( 20 | "--alpha-beta", 21 | type=float, 22 | nargs=2, 23 | metavar=("alpha", "beta"), 24 | help="Scorer alpha/beta", 25 | ) 26 | parser.add_argument( 27 | "--samples-per-chunk", 28 | type=int, 29 | default=1024, 30 | help="Number of samples to process at a time", 31 | ) 32 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 33 | args = parser.parse_args() 34 | 35 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 36 | 37 | model_dir = Path(args.model) 38 | model_path = next(model_dir.glob("*.tflite")) 39 | if args.scorer: 40 | scorer_path = Path(args.scorer) 41 | else: 42 | scorer_path = next(model_dir.glob("*.scorer")) 43 | 44 | _LOGGER.debug("Loading model: %s, scorer: %s", model_path, scorer_path) 45 | model = Model(str(model_path)) 46 | model.enableExternalScorer(str(scorer_path)) 47 | 48 | if args.alpha_beta is not None: 49 | model.setScorerAlphaBeta(*args.alpha_beta) 50 | 51 | model_stream = model.createStream() 52 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 53 | _LOGGER.debug("Processing audio") 54 | while chunk: 55 | chunk_array = np.frombuffer(chunk, dtype=np.int16) 56 | model_stream.feedAudioContent(chunk_array) 57 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 58 | 59 | text = model_stream.finishStream() 60 | _LOGGER.debug(text) 61 | 62 | print(text.strip()) 63 | 64 | 65 | # ----------------------------------------------------------------------------- 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/bin/coqui_stt_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import wave 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | from stt import Model 9 | 10 | _LOGGER = logging.getLogger("coqui_stt_wav2text") 11 | 12 | 13 | def main() -> None: 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument("model", help="Path to Coqui STT model directory") 16 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 17 | parser.add_argument( 18 | "--scorer", help="Path to scorer (default: .scorer file in model directory)" 19 | ) 20 | parser.add_argument( 21 | "--alpha-beta", 22 | type=float, 23 | nargs=2, 24 | metavar=("alpha", "beta"), 25 | help="Scorer alpha/beta", 26 | ) 27 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 28 | args = parser.parse_args() 29 | 30 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 31 | 32 | model_dir = Path(args.model) 33 | model_path = next(model_dir.glob("*.tflite")) 34 | if args.scorer: 35 | scorer_path = Path(args.scorer) 36 | else: 37 | scorer_path = next(model_dir.glob("*.scorer")) 38 | 39 | _LOGGER.debug("Loading model: %s, scorer: %s", model_path, scorer_path) 40 | model = Model(str(model_path)) 41 | model.enableExternalScorer(str(scorer_path)) 42 | 43 | if args.alpha_beta is not None: 44 | model.setScorerAlphaBeta(*args.alpha_beta) 45 | 46 | for wav_path in args.wav_file: 47 | _LOGGER.debug("Processing %s", wav_path) 48 | wav_file: wave.Wave_read = wave.open(wav_path, "rb") 49 | with wav_file: 50 | assert wav_file.getframerate() == 16000, "16Khz sample rate required" 51 | assert wav_file.getsampwidth() == 2, "16-bit samples required" 52 | assert wav_file.getnchannels() == 1, "Mono audio required" 53 | audio_bytes = wav_file.readframes(wav_file.getnframes()) 54 | 55 | model_stream = model.createStream() 56 | audio_array = np.frombuffer(audio_bytes, dtype=np.int16) 57 | model_stream.feedAudioContent(audio_array) 58 | 59 | text = model_stream.finishStream() 60 | print(text.strip()) 61 | 62 | 63 | # ----------------------------------------------------------------------------- 64 | 65 | if __name__ == "__main__": 66 | main() 67 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/requirements.txt: -------------------------------------------------------------------------------- 1 | stt>=1.4.0,<2.0 2 | numpy 3 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import itertools 4 | import logging 5 | import tarfile 6 | from pathlib import Path 7 | from urllib.request import urlopen 8 | 9 | _DIR = Path(__file__).parent 10 | _LOGGER = logging.getLogger("setup") 11 | 12 | MODELS = {"en_large": "english_v1.0.0-large-vocab"} 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "model", 19 | nargs="+", 20 | choices=list(itertools.chain(MODELS.keys(), MODELS.values())), 21 | help="Coqui STT model(s) to download", 22 | ) 23 | parser.add_argument( 24 | "--destination", help="Path to destination directory (default: share)" 25 | ) 26 | parser.add_argument( 27 | "--link-format", 28 | default="https://github.com/rhasspy/models/releases/download/v1.0/asr_coqui-stt-{model}.tar.gz", 29 | help="Format string for download URLs", 30 | ) 31 | args = parser.parse_args() 32 | logging.basicConfig(level=logging.INFO) 33 | 34 | if args.destination: 35 | args.destination = Path(args.destination) 36 | else: 37 | # Assume we're in programs/asr/coqui-stt/script 38 | data_dir = _DIR.parent.parent.parent.parent / "data" 39 | args.destination = data_dir / "asr" / "coqui-stt" 40 | 41 | args.destination.parent.mkdir(parents=True, exist_ok=True) 42 | 43 | for model in args.model: 44 | model = MODELS.get(model, model) 45 | url = args.link_format.format(model=model) 46 | _LOGGER.info("Downloading %s", url) 47 | with urlopen(url) as response: 48 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 49 | _LOGGER.info("Extracting to %s", args.destination) 50 | tar_gz.extractall(args.destination) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/script/raw2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/coqui_stt_raw2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/coqui_stt_server.py" --socketfile "${socket_dir}/coqui-stt.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/asr/coqui-stt/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/coqui_stt_wav2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/README.md: -------------------------------------------------------------------------------- 1 | # Faster Whisper 2 | 3 | Speech to text service for Rhasspy based on [faster-whisper](https://github.com/guillaumekln/faster-whisper/). 4 | 5 | Additional models can be downloaded here: https://github.com/rhasspy/models/releases/tag/v1.0 6 | 7 | ## Installation 8 | 9 | 1. Copy the contents of this directory to `config/programs/asr/faster-whisper/` 10 | 2. Run `script/setup.py` 11 | 3. Download a model with `script/download.py` 12 | * Example: `script/download.py tiny-int8` 13 | * Models are downloaded to `config/data/asr/faster-whisper` directory 14 | 4. Test with `script/wav2text` 15 | * Example `script/wav2text /path/to/tiny-int8/ /path/to/test.wav` 16 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/bin/faster_whisper_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import time 5 | from pathlib import Path 6 | 7 | from faster_whisper import WhisperModel 8 | 9 | _FILE = Path(__file__) 10 | _DIR = _FILE.parent 11 | _LOGGER = logging.getLogger(_FILE.stem) 12 | 13 | 14 | def main() -> None: 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("model", help="Path to faster-whisper model directory") 17 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 18 | parser.add_argument( 19 | "--device", 20 | default="cpu", 21 | help="Device to use for inference (default: cpu)", 22 | ) 23 | parser.add_argument( 24 | "--language", 25 | help="Language to set for transcription", 26 | ) 27 | parser.add_argument( 28 | "--compute-type", 29 | default="default", 30 | help="Compute type (float16, int8, etc.)", 31 | ) 32 | parser.add_argument( 33 | "--beam-size", 34 | type=int, 35 | default=1, 36 | ) 37 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 38 | args = parser.parse_args() 39 | 40 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 41 | 42 | # Load converted faster-whisper model 43 | _LOGGER.debug("Loading model: %s", args.model) 44 | model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type) 45 | _LOGGER.info("Model loaded") 46 | 47 | for wav_path in args.wav_file: 48 | _LOGGER.debug("Processing %s", wav_path) 49 | start_time = time.monotonic_ns() 50 | segments, _info = model.transcribe( 51 | wav_path, 52 | beam_size=args.beam_size, 53 | language=args.language, 54 | ) 55 | text = " ".join(segment.text for segment in segments) 56 | end_time = time.monotonic_ns() 57 | _LOGGER.debug( 58 | "Transcribed %s in %s second(s)", wav_path, (end_time - start_time) / 1e9 59 | ) 60 | _LOGGER.debug(text) 61 | 62 | print(text, flush=True) 63 | 64 | 65 | # ----------------------------------------------------------------------------- 66 | 67 | if __name__ == "__main__": 68 | main() 69 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import tarfile 5 | from pathlib import Path 6 | from urllib.request import urlopen 7 | 8 | _DIR = Path(__file__).parent 9 | _LOGGER = logging.getLogger("setup") 10 | 11 | MODELS = [ 12 | "tiny", 13 | "tiny-int8", 14 | "base", 15 | "base-int8", 16 | "small", 17 | "small-int8", 18 | ] 19 | 20 | 21 | def main() -> None: 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument( 24 | "model", 25 | nargs="+", 26 | choices=MODELS, 27 | help="faster-whisper model(s) to download", 28 | ) 29 | parser.add_argument( 30 | "--destination", help="Path to destination directory (default: share)" 31 | ) 32 | parser.add_argument( 33 | "--link-format", 34 | default="https://github.com/rhasspy/models/releases/download/v1.0/asr_faster-whisper-{model}.tar.gz", 35 | help="Format string for download URLs", 36 | ) 37 | args = parser.parse_args() 38 | logging.basicConfig(level=logging.INFO) 39 | 40 | if args.destination: 41 | args.destination = Path(args.destination) 42 | else: 43 | # Assume we're in programs/asr/faster-whisper/script 44 | data_dir = _DIR.parent.parent.parent.parent / "data" 45 | args.destination = data_dir / "asr" / "faster-whisper" 46 | 47 | args.destination.parent.mkdir(parents=True, exist_ok=True) 48 | 49 | for model in args.model: 50 | url = args.link_format.format(model=model) 51 | _LOGGER.info("Downloading %s", url) 52 | with urlopen(url) as response: 53 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 54 | _LOGGER.info("Extracting to %s", args.destination) 55 | tar_gz.extractall(args.destination) 56 | 57 | 58 | if __name__ == "__main__": 59 | main() 60 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/faster_whisper_server.py" --socketfile "${socket_dir}/faster-whisper.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | 32 | # Install Python dependencies 33 | echo 'Installing Python dependencies' 34 | pip3 install -e "${base_dir}/src" 35 | 36 | # Install rhasspy3 37 | rhasspy3_dir="${base_dir}/../../../.." 38 | pip3 install -e "${rhasspy3_dir}" 39 | 40 | # ----------------------------------------------------------------------------- 41 | 42 | echo "OK" 43 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/faster_whisper_wav2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Guillaume Klein 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/README.md: -------------------------------------------------------------------------------- 1 | # Faster Whisper transcription with CTranslate2 2 | 3 | This repository demonstrates how to implement the Whisper transcription using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models. 4 | 5 | This implementation is about 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU. 6 | 7 | ## Installation 8 | 9 | ```bash 10 | pip install -e .[conversion] 11 | ``` 12 | 13 | The model conversion requires the modules `transformers` and `torch` which are installed by the `[conversion]` requirement. Once a model is converted, these modules are no longer needed and the installation could be simplified to: 14 | 15 | ```bash 16 | pip install -e . 17 | ``` 18 | 19 | ## Usage 20 | 21 | ### Model conversion 22 | 23 | A Whisper model should be first converted into the CTranslate2 format. For example the command below converts the "medium" Whisper model and saves the weights in FP16: 24 | 25 | ```bash 26 | ct2-transformers-converter --model openai/whisper-medium --output_dir whisper-medium-ct2 --quantization float16 27 | ``` 28 | 29 | If needed, models can also be converted from the code. See the [conversion API](https://opennmt.net/CTranslate2/python/ctranslate2.converters.TransformersConverter.html). 30 | 31 | ### Transcription 32 | 33 | ```python 34 | from faster_whisper import WhisperModel 35 | 36 | model_path = "whisper-medium-ct2/" 37 | 38 | # Run on GPU with FP16 39 | model = WhisperModel(model_path, device="cuda", compute_type="float16") 40 | 41 | # or run on GPU with INT8 42 | # model = WhisperModel(model_path, device="cuda", compute_type="int8_float16") 43 | # or run on CPU with INT8 44 | # model = WhisperModel(model_path, device="cpu", compute_type="int8") 45 | 46 | segments, info = model.transcribe("audio.mp3", beam_size=5) 47 | 48 | print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) 49 | 50 | for segment in segments: 51 | print("[%ds -> %ds] %s" % (segment.start, segment.end, segment.text)) 52 | ``` 53 | 54 | ## Comparing performance against openai/whisper 55 | 56 | If you are comparing the performance against [openai/whisper](https://github.com/openai/whisper), you should make sure to use the same settings in both frameworks. In particular: 57 | 58 | * In openai/whisper, `model.transcribe` uses a beam size of 1 by default. A different beam size will have an important impact on performance so make sure to use the same. 59 | * When running on CPU, make sure to set the same number of threads. Both frameworks will read the environment variable `OMP_NUM_THREADS`, which can be set when running your script: 60 | 61 | ```bash 62 | OMP_NUM_THREADS=4 python3 my_script.py 63 | ``` 64 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/faster_whisper/__init__.py: -------------------------------------------------------------------------------- 1 | from faster_whisper.transcribe import WhisperModel 2 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/faster_whisper/audio.py: -------------------------------------------------------------------------------- 1 | import av 2 | import numpy as np 3 | 4 | 5 | def decode_audio(input_file, sampling_rate=16000): 6 | """Decodes the audio. 7 | 8 | Args: 9 | input_file: Path to the input file or a file-like object. 10 | sampling_rate: Resample the audio to this sample rate. 11 | 12 | Returns: 13 | A float32 Numpy array. 14 | """ 15 | fifo = av.audio.fifo.AudioFifo() 16 | resampler = av.audio.resampler.AudioResampler( 17 | format="s16", 18 | layout="mono", 19 | rate=sampling_rate, 20 | ) 21 | 22 | with av.open(input_file) as container: 23 | # Decode and resample each audio frame. 24 | for frame in container.decode(audio=0): 25 | frame.pts = None 26 | for new_frame in resampler.resample(frame): 27 | fifo.write(new_frame) 28 | 29 | # Flush the resampler. 30 | for new_frame in resampler.resample(None): 31 | fifo.write(new_frame) 32 | 33 | frame = fifo.read() 34 | 35 | # Convert s16 back to f32. 36 | return frame.to_ndarray().flatten().astype(np.float32) / 32768.0 37 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/requirements.conversion.txt: -------------------------------------------------------------------------------- 1 | transformers[torch]>=4.23 2 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/requirements.txt: -------------------------------------------------------------------------------- 1 | av==10.* 2 | ctranslate2>=3.5,<4 3 | tokenizers==0.13.* 4 | -------------------------------------------------------------------------------- /programs/asr/faster-whisper/src/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import find_packages, setup 4 | 5 | 6 | def get_requirements(path): 7 | with open(path, encoding="utf-8") as requirements: 8 | return [requirement.strip() for requirement in requirements] 9 | 10 | 11 | base_dir = os.path.dirname(os.path.abspath(__file__)) 12 | install_requires = get_requirements(os.path.join(base_dir, "requirements.txt")) 13 | conversion_requires = get_requirements( 14 | os.path.join(base_dir, "requirements.conversion.txt") 15 | ) 16 | 17 | setup( 18 | name="faster-whisper", 19 | version="0.1.0", 20 | description="Faster Whisper transcription with CTranslate2", 21 | author="Guillaume Klein", 22 | python_requires=">=3.7", 23 | install_requires=install_requires, 24 | extras_require={ 25 | "conversion": conversion_requires, 26 | }, 27 | packages=find_packages(), 28 | ) 29 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/README.md: -------------------------------------------------------------------------------- 1 | # Pocketsphinx 2 | 3 | Speech to text service for Rhasspy based on [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx). 4 | 5 | Additional models can be downloaded here: https://github.com/synesthesiam/voice2json-profiles 6 | 7 | Model directories should have this layout: 8 | 9 | * model/ 10 | * acoustic_model/ 11 | * dictionary.txt 12 | * language_model.txt 13 | 14 | These correspond to the `-hmm`, `-dict`, and `-lm` decoder arguments. 15 | 16 | ## Installation 17 | 18 | 1. Copy the contents of this directory to `config/programs/asr/pocketsphinx/` 19 | 2. Run `script/setup` 20 | 3. Download a model with `script/download.py` 21 | * Example: `script/download.py en_cmu` 22 | * Models are downloaded to `config/data/asr/pocketsphinx` directory 23 | 4. Test with `script/wav2text` 24 | * Example `script/wav2text /path/to/en-us_pocketsphinx-cmu/ /path/to/test.wav` 25 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/bin/pocketsphinx_raw2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | 7 | import pocketsphinx 8 | 9 | _LOGGER = logging.getLogger("pocketsphinx_raw2text") 10 | 11 | 12 | def main() -> None: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("model", help="Path to Pocketsphinx model directory") 15 | parser.add_argument( 16 | "--samples-per-chunk", 17 | type=int, 18 | default=1024, 19 | help="Number of samples to process at a time", 20 | ) 21 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 22 | args = parser.parse_args() 23 | 24 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 25 | 26 | model_dir = Path(args.model) 27 | 28 | _LOGGER.debug("Loading model from %s", model_dir.absolute()) 29 | decoder_config = pocketsphinx.Decoder.default_config() 30 | decoder_config.set_string("-hmm", str(model_dir / "acoustic_model")) 31 | decoder_config.set_string("-dict", str(model_dir / "dictionary.txt")) 32 | decoder_config.set_string("-lm", str(model_dir / "language_model.txt")) 33 | decoder = pocketsphinx.Decoder(decoder_config) 34 | 35 | decoder.start_utt() 36 | 37 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 38 | _LOGGER.debug("Processing audio") 39 | while chunk: 40 | decoder.process_raw(chunk, False, False) 41 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 42 | 43 | decoder.end_utt() 44 | hyp = decoder.hyp() 45 | if hyp: 46 | text = hyp.hypstr 47 | else: 48 | text = "" 49 | 50 | _LOGGER.debug(text) 51 | 52 | print(text.strip()) 53 | 54 | 55 | # ----------------------------------------------------------------------------- 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/bin/pocketsphinx_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import wave 5 | from pathlib import Path 6 | 7 | import pocketsphinx 8 | 9 | _LOGGER = logging.getLogger("pocketsphinx_wav2text") 10 | 11 | 12 | def main() -> None: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("model", help="Path to Pocketsphinx model directory") 15 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 16 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 17 | args = parser.parse_args() 18 | 19 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 20 | 21 | model_dir = Path(args.model) 22 | 23 | _LOGGER.debug("Loading model from %s", model_dir.absolute()) 24 | decoder_config = pocketsphinx.Decoder.default_config() 25 | decoder_config.set_string("-hmm", str(model_dir / "acoustic_model")) 26 | decoder_config.set_string("-dict", str(model_dir / "dictionary.txt")) 27 | decoder_config.set_string("-lm", str(model_dir / "language_model.txt")) 28 | decoder = pocketsphinx.Decoder(decoder_config) 29 | 30 | for wav_path in args.wav_file: 31 | _LOGGER.debug("Processing %s", wav_path) 32 | wav_file: wave.Wave_read = wave.open(wav_path, "rb") 33 | with wav_file: 34 | assert wav_file.getframerate() == 16000, "16Khz sample rate required" 35 | assert wav_file.getsampwidth() == 2, "16-bit samples required" 36 | assert wav_file.getnchannels() == 1, "Mono audio required" 37 | audio_bytes = wav_file.readframes(wav_file.getnframes()) 38 | 39 | decoder.start_utt() 40 | decoder.process_raw(audio_bytes, False, True) 41 | decoder.end_utt() 42 | hyp = decoder.hyp() 43 | if hyp: 44 | text = hyp.hypstr 45 | else: 46 | text = "" 47 | 48 | print(text.strip()) 49 | 50 | 51 | # ----------------------------------------------------------------------------- 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/requirements.txt: -------------------------------------------------------------------------------- 1 | pocketsphinx @ https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz 2 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import itertools 4 | import logging 5 | import tarfile 6 | from pathlib import Path 7 | from urllib.request import urlopen 8 | 9 | _DIR = Path(__file__).parent 10 | _LOGGER = logging.getLogger("setup") 11 | 12 | MODELS = {"en_cmu": "en-us_pocketsphinx-cmu"} 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "model", 19 | nargs="+", 20 | choices=list(itertools.chain(MODELS.keys(), MODELS.values())), 21 | help="Pocketsphinx model(s) to download", 22 | ) 23 | parser.add_argument("--destination", help="Path to destination directory") 24 | parser.add_argument( 25 | "--link-format", 26 | default="https://github.com/rhasspy/models/releases/download/v1.0/asr_pocketsphinx-{model}.tar.gz", 27 | help="Format string for download URLs", 28 | ) 29 | args = parser.parse_args() 30 | logging.basicConfig(level=logging.INFO) 31 | 32 | if args.destination: 33 | args.destination = Path(args.destination) 34 | else: 35 | # Assume we're in programs/asr/pocketsphinx/script 36 | data_dir = _DIR.parent.parent.parent.parent / "data" 37 | args.destination = data_dir / "asr" / "pocketsphinx" 38 | 39 | args.destination.parent.mkdir(parents=True, exist_ok=True) 40 | 41 | for model in args.model: 42 | model = MODELS.get(model, model) 43 | url = args.link_format.format(model=model) 44 | _LOGGER.info("Downloading %s", url) 45 | with urlopen(url) as response: 46 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 47 | _LOGGER.info("Extracting to %s", args.destination) 48 | tar_gz.extractall(args.destination) 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/script/raw2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/pocketsphinx_raw2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/pocketsphinx_server.py" --socketfile "${socket_dir}/pocketsphinx.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/asr/pocketsphinx/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/pocketsphinx_wav2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/vosk/README.md: -------------------------------------------------------------------------------- 1 | # Vosk 2 | 3 | Speech to text service for Rhasspy based on [Vosk](https://alphacephei.com/vosk/). 4 | 5 | You can download additional models here: https://alphacephei.com/vosk/models 6 | 7 | 8 | ## Installation 9 | 10 | 1. Copy the contents of this directory to `config/programs/asr/vosk/` 11 | 2. Run `script/setup` 12 | 3. Download a model with `script/download.py` 13 | * Example: `script/download.py en_small` 14 | * Models are downloaded to `config/data/asr/vosk` directory 15 | 4. Test with `script/wav2text` 16 | * Example `script/wav2text /path/to/vosk-model-small-en-us-0.15/ /path/to/test.wav` 17 | -------------------------------------------------------------------------------- /programs/asr/vosk/bin/vosk_raw2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import logging 5 | import sys 6 | 7 | from vosk import KaldiRecognizer, Model, SetLogLevel 8 | 9 | _LOGGER = logging.getLogger("vosk_raw2text") 10 | 11 | 12 | def main() -> None: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument("model", help="Path to Vosk model directory") 15 | parser.add_argument( 16 | "-r", 17 | "--rate", 18 | type=int, 19 | default=16000, 20 | help="Model sample rate (default: 16000)", 21 | ) 22 | parser.add_argument( 23 | "--samples-per-chunk", 24 | type=int, 25 | default=1024, 26 | help="Number of samples to process at a time", 27 | ) 28 | args = parser.parse_args() 29 | logging.basicConfig(level=logging.INFO) 30 | 31 | SetLogLevel(0) 32 | 33 | model = Model(args.model) 34 | recognizer = KaldiRecognizer( 35 | model, 36 | args.rate, 37 | ) 38 | 39 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 40 | _LOGGER.debug("Processing audio") 41 | while chunk: 42 | recognizer.AcceptWaveform(chunk) 43 | chunk = sys.stdin.buffer.read(args.samples_per_chunk) 44 | 45 | result = json.loads(recognizer.FinalResult()) 46 | print(result["text"].strip()) 47 | 48 | 49 | # ----------------------------------------------------------------------------- 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /programs/asr/vosk/bin/vosk_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import logging 5 | import wave 6 | from pathlib import Path 7 | 8 | from vosk import KaldiRecognizer, Model, SetLogLevel 9 | 10 | _FILE = Path(__file__) 11 | _DIR = _FILE.parent 12 | _LOGGER = logging.getLogger(_FILE.stem) 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("model", help="Path to Vosk model directory") 18 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 19 | parser.add_argument( 20 | "-r", 21 | "--rate", 22 | type=int, 23 | default=16000, 24 | help="Model sample rate (default: 16000)", 25 | ) 26 | parser.add_argument( 27 | "--samples-per-chunk", 28 | type=int, 29 | default=1024, 30 | help="Number of samples to process at a time", 31 | ) 32 | args = parser.parse_args() 33 | logging.basicConfig(level=logging.INFO) 34 | 35 | SetLogLevel(0) 36 | 37 | model = Model(args.model) 38 | recognizer = KaldiRecognizer( 39 | model, 40 | args.rate, 41 | ) 42 | 43 | for wav_path in args.wav_file: 44 | _LOGGER.debug("Processing %s", wav_path) 45 | wav_file: wave.Wave_read = wave.open(wav_path, "rb") 46 | with wav_file: 47 | assert wav_file.getframerate() == 16000, "16Khz sample rate required" 48 | assert wav_file.getsampwidth() == 2, "16-bit samples required" 49 | assert wav_file.getnchannels() == 1, "Mono audio required" 50 | audio_bytes = wav_file.readframes(wav_file.getnframes()) 51 | recognizer.AcceptWaveform(audio_bytes) 52 | 53 | result = json.loads(recognizer.FinalResult()) 54 | print(result["text"].strip()) 55 | 56 | 57 | # ----------------------------------------------------------------------------- 58 | 59 | if __name__ == "__main__": 60 | main() 61 | -------------------------------------------------------------------------------- /programs/asr/vosk/requirements.txt: -------------------------------------------------------------------------------- 1 | vosk 2 | -------------------------------------------------------------------------------- /programs/asr/vosk/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import itertools 4 | import logging 5 | import tarfile 6 | from pathlib import Path 7 | from urllib.request import urlopen 8 | 9 | _DIR = Path(__file__).parent 10 | _LOGGER = logging.getLogger("setup") 11 | 12 | MODELS = {"en_medium": "en-us-0.22-lgraph", "en_small": "small-en-us-0.15"} 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "model", 19 | nargs="+", 20 | choices=list(itertools.chain(MODELS.keys(), MODELS.values())), 21 | help="Vosk model(s) to download", 22 | ) 23 | parser.add_argument( 24 | "--destination", help="Path to destination directory (default: share)" 25 | ) 26 | parser.add_argument( 27 | "--link-format", 28 | default="https://github.com/rhasspy/models/releases/download/v1.0/asr_vosk-model-{model}.tar.gz", 29 | help="Format string for download URLs", 30 | ) 31 | args = parser.parse_args() 32 | logging.basicConfig(level=logging.INFO) 33 | 34 | if args.destination: 35 | args.destination = Path(args.destination) 36 | else: 37 | # Assume we're in programs/asr/vosk/script 38 | data_dir = _DIR.parent.parent.parent.parent / "data" 39 | args.destination = data_dir / "asr" / "vosk" 40 | 41 | args.destination.parent.mkdir(parents=True, exist_ok=True) 42 | 43 | for model in args.model: 44 | model = MODELS.get(model, model) 45 | url = args.link_format.format(model=model) 46 | _LOGGER.info("Downloading %s", url) 47 | with urlopen(url) as response: 48 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 49 | _LOGGER.info("Extracting to %s", args.destination) 50 | tar_gz.extractall(args.destination) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /programs/asr/vosk/script/raw2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/vosk_raw2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/vosk/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/vosk_server.py" --socketfile "${socket_dir}/vosk.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/asr/vosk/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/asr/vosk/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/vosk_wav2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/Dockerfile.libwhisper: -------------------------------------------------------------------------------- 1 | FROM debian:bullseye as build 2 | ARG TARGETARCH 3 | ARG TARGETVARIANT 4 | 5 | ENV LANG C.UTF-8 6 | ENV DEBIAN_FRONTEND=noninteractive 7 | 8 | RUN apt-get update && \ 9 | apt-get install --yes build-essential wget 10 | 11 | WORKDIR /build 12 | 13 | ARG VERSION=1.1.0 14 | RUN wget "https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v${VERSION}.tar.gz" && \ 15 | tar -xzf "v${VERSION}.tar.gz" 16 | 17 | RUN mv "whisper.cpp-${VERSION}/" 'whisper.cpp' 18 | COPY lib/Makefile ./ 19 | RUN cd "whisper.cpp" && make -j8 20 | RUN make 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | FROM scratch 25 | 26 | COPY --from=build /build/libwhisper.so . 27 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/Dockerfile.libwhisper.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !lib/Makefile 3 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/README.md: -------------------------------------------------------------------------------- 1 | # Whisper.cpp 2 | 3 | Speech to text service for Rhasspy based on [whisper.cpp](https://github.com/ggerganov/whisper.cpp/). 4 | 5 | Additional models can be downloaded here: https://huggingface.co/datasets/ggerganov/whisper.cpp 6 | 7 | ## Installation 8 | 9 | 1. Copy the contents of this directory to `config/programs/asr/whisper-cpp/` 10 | 2. Run `script/setup.py` 11 | 3. Download a model with `script/download.py` 12 | * Example: `script/download.py en_tiny` 13 | * Models are downloaded to `config/data/asr/whisper-cpp` directory 14 | 4. Test with `script/wav2text` 15 | * Example `script/wav2text /path/to/ggml-tiny.en.bin /path/to/test.wav` 16 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/bin/whisper_cpp_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import audioop 4 | import logging 5 | import wave 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | from whisper_cpp import Whisper 10 | 11 | _FILE = Path(__file__) 12 | _DIR = _FILE.parent 13 | _LOGGER = logging.getLogger(_FILE.stem) 14 | 15 | 16 | def main() -> None: 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("model", help="Path to whisper.cpp model file") 19 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 20 | parser.add_argument( 21 | "--debug", action="store_true", help="Print DEBUG messages to console" 22 | ) 23 | args = parser.parse_args() 24 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 25 | 26 | _LOGGER.debug("Loading model: %s", args.model) 27 | 28 | with Whisper(args.model) as whisper: 29 | for wav_path in args.wav_file: 30 | wav_file: wave.Wave_read = wave.open(wav_path, "rb") 31 | with wav_file: 32 | rate = wav_file.getframerate() 33 | width = wav_file.getsampwidth() 34 | channels = wav_file.getnchannels() 35 | audio_bytes = wav_file.readframes(wav_file.getnframes()) 36 | 37 | if width != 2: 38 | audio_bytes = audioop.lin2lin(audio_bytes, width, 2) 39 | 40 | if channels != 1: 41 | audio_bytes = audioop.tomono(audio_bytes, 2, 1.0, 1.0) 42 | 43 | if rate != 16000: 44 | audio_bytes, _state = audioop.ratecv( 45 | audio_bytes, 2, 1, rate, 16000, None 46 | ) 47 | 48 | audio_array = np.frombuffer(audio_bytes, dtype=np.int16) 49 | audio_array = audio_array.astype(np.float32) / 32768.0 50 | 51 | text = " ".join(whisper.transcribe(audio_array)) 52 | print(text) 53 | 54 | 55 | # ----------------------------------------------------------------------------- 56 | 57 | if __name__ == "__main__": 58 | main() 59 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/lib/Makefile: -------------------------------------------------------------------------------- 1 | UNAME_M := $(shell uname -m) 2 | 3 | CFLAGS = -Iwhisper.cpp -O3 -std=c11 -fPIC 4 | CXXFLAGS = -Iwhisper.cpp -O3 -std=c++11 -fPIC --shared -static-libstdc++ 5 | LDFLAGS = 6 | 7 | # Linux 8 | CFLAGS += -pthread 9 | CXXFLAGS += -pthread 10 | 11 | AVX1_M := $(shell grep "avx " /proc/cpuinfo) 12 | ifneq (,$(findstring avx,$(AVX1_M))) 13 | CFLAGS += -mavx 14 | endif 15 | AVX2_M := $(shell grep "avx2 " /proc/cpuinfo) 16 | ifneq (,$(findstring avx2,$(AVX2_M))) 17 | CFLAGS += -mavx2 18 | endif 19 | FMA_M := $(shell grep "fma " /proc/cpuinfo) 20 | ifneq (,$(findstring fma,$(FMA_M))) 21 | CFLAGS += -mfma 22 | endif 23 | F16C_M := $(shell grep "f16c " /proc/cpuinfo) 24 | ifneq (,$(findstring f16c,$(F16C_M))) 25 | CFLAGS += -mf16c 26 | endif 27 | SSE3_M := $(shell grep "sse3 " /proc/cpuinfo) 28 | ifneq (,$(findstring sse3,$(SSE3_M))) 29 | CFLAGS += -msse3 30 | endif 31 | 32 | # amd64 and arm64 only 33 | ifeq ($(UNAME_M),amd64) 34 | CFLAGS += -mavx -mavx2 -mfma -mf16c 35 | endif 36 | 37 | ifneq ($(filter armv8%,$(UNAME_M)),) 38 | # Raspberry Pi 4 39 | CFLAGS += -mfp16-format=ieee -mno-unaligned-access 40 | endif 41 | 42 | default: libwhisper.so 43 | 44 | libwhisper.so: whisper.cpp/ggml.o whisper.cpp/whisper.cpp 45 | $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) 46 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/build_libwhisper: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | docker buildx build "${base_dir}" \ 11 | -f "${base_dir}/Dockerfile.libwhisper" \ 12 | --platform 'linux/amd64,linux/arm64' \ 13 | --output "type=local,dest=${base_dir}/build/" 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | echo "Copy the appropriate libwhisper.so from build/ to lib/" 18 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import tarfile 5 | from pathlib import Path 6 | from urllib.request import urlopen 7 | 8 | _DIR = Path(__file__).parent 9 | _LOGGER = logging.getLogger("setup") 10 | 11 | MODELS = ["tiny.en", "base.en"] 12 | 13 | 14 | def main() -> None: 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument( 17 | "model", 18 | nargs="+", 19 | choices=MODELS, 20 | help="Pocketsphinx model(s) to download", 21 | ) 22 | parser.add_argument( 23 | "--destination", help="Path to destination directory (default: share)" 24 | ) 25 | parser.add_argument( 26 | "--link-format", 27 | default="https://github.com/rhasspy/models/releases/download/v1.0/asr_whisper-cpp-ggml-{model}.tar.gz", 28 | help="Format string for download URLs", 29 | ) 30 | args = parser.parse_args() 31 | logging.basicConfig(level=logging.INFO) 32 | 33 | if args.destination: 34 | args.destination = Path(args.destination) 35 | else: 36 | # Assume we're in programs/asr/whisper-cpp/script 37 | data_dir = _DIR.parent.parent.parent.parent / "data" 38 | args.destination = data_dir / "asr" / "whisper-cpp" 39 | 40 | args.destination.parent.mkdir(parents=True, exist_ok=True) 41 | 42 | for model in args.model: 43 | url = args.link_format.format(model=model) 44 | _LOGGER.info("Downloading %s", url) 45 | with urlopen(url) as response: 46 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 47 | _LOGGER.info("Extracting to %s", args.destination) 48 | tar_gz.extractall(args.destination) 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | lib_dir="${base_dir}/lib" 21 | export LD_LIBRARY_PATH="${lib_dir}:${LD_LIBRARY_PATH}" 22 | export PYTHONPATH="${lib_dir}:${PYTHONPATH}" 23 | 24 | python3 "${base_dir}/bin/whisper_cpp_server.py" --socketfile "${socket_dir}/whisper-cpp.socket" "$@" 25 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | python3 "${this_dir}/setup.py" 36 | 37 | # ----------------------------------------------------------------------------- 38 | 39 | echo "OK" 40 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import platform 5 | import tarfile 6 | from pathlib import Path 7 | from urllib.request import urlopen 8 | 9 | _DIR = Path(__file__).parent 10 | _LOGGER = logging.getLogger("setup") 11 | 12 | PLATFORMS = {"x86_64": "amd64", "aarch64": "arm64"} 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "--platform", 19 | help="CPU architecture to download (amd64, arm64)", 20 | ) 21 | parser.add_argument( 22 | "--destination", help="Path to destination directory (default: lib)" 23 | ) 24 | parser.add_argument( 25 | "--link-format", 26 | default="https://github.com/rhasspy/models/releases/download/v1.0/libwhisper_{platform}.tar.gz", 27 | help="Format string for download URLs", 28 | ) 29 | args = parser.parse_args() 30 | logging.basicConfig(level=logging.INFO) 31 | 32 | if not args.platform: 33 | args.platform = platform.machine() 34 | 35 | args.platform = PLATFORMS.get(args.platform, args.platform) 36 | 37 | if not args.destination: 38 | args.destination = _DIR.parent / "lib" 39 | 40 | args.destination.parent.mkdir(parents=True, exist_ok=True) 41 | 42 | url = args.link_format.format(platform=args.platform) 43 | _LOGGER.info("Downloading %s", url) 44 | with urlopen(url) as response: 45 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 46 | _LOGGER.info("Extracting to %s", args.destination) 47 | tar_gz.extractall(args.destination) 48 | 49 | 50 | if __name__ == "__main__": 51 | main() 52 | -------------------------------------------------------------------------------- /programs/asr/whisper-cpp/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | lib_dir="${base_dir}/lib" 18 | export LD_LIBRARY_PATH="${lib_dir}:${LD_LIBRARY_PATH}" 19 | export PYTHONPATH="${lib_dir}:${PYTHONPATH}" 20 | 21 | python3 "${base_dir}/bin/whisper_cpp_wav2text.py" "$@" 22 | -------------------------------------------------------------------------------- /programs/asr/whisper/README.md: -------------------------------------------------------------------------------- 1 | # Whisper 2 | 3 | Speech to text service for Rhasspy based on [Whisper](https://github.com/openai/whisper). 4 | 5 | Models are downloaded automatically the first time they're used to the `config/data/asr/whisper` directory. 6 | 7 | Available models: 8 | 9 | * tiny.en 10 | * tiny 11 | * base.en 12 | * base 13 | * small.en 14 | * small 15 | * medium.en 16 | * medium 17 | * large-v1 18 | * large-v2 19 | * large 20 | 21 | ## Installation 22 | 23 | 1. Copy the contents of this directory to `config/programs/asr/whisper/` 24 | 2. Run `script/setup` 25 | 3. Test with `script/wav2text` 26 | * Example `script/wav2text 'tiny.en' /path/to/test.wav` 27 | -------------------------------------------------------------------------------- /programs/asr/whisper/bin/whisper_wav2text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | 5 | from whisper import load_model, transcribe 6 | 7 | _LOGGER = logging.getLogger("whisper_wav2text") 8 | 9 | 10 | def main() -> None: 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("model", help="Name of Whisper model to use") 13 | parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe") 14 | parser.add_argument( 15 | "--language", 16 | help="Whisper language", 17 | ) 18 | parser.add_argument("--device", default="cpu", choices=("cpu", "cuda")) 19 | # 20 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 21 | args = parser.parse_args() 22 | 23 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 24 | 25 | _LOGGER.debug("Loading model: %s", args.model) 26 | model = load_model(args.model, device=args.device) 27 | for wav_file in args.wav_file: 28 | _LOGGER.debug("Processing %s", wav_file) 29 | result = transcribe(model, wav_file, language=args.language) 30 | _LOGGER.debug(result) 31 | 32 | text = result["text"] 33 | print(text.strip()) 34 | 35 | 36 | # ----------------------------------------------------------------------------- 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /programs/asr/whisper/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/openai/whisper.git 2 | -------------------------------------------------------------------------------- /programs/asr/whisper/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/whisper_server.py" --socketfile "${socket_dir}/whisper.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/asr/whisper/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/asr/whisper/script/wav2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/whisper_wav2text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/handle/date_time/bin/date_time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import re 3 | import sys 4 | from datetime import datetime 5 | 6 | 7 | def main() -> None: 8 | text = sys.stdin.read().strip().lower() 9 | words = [re.sub(r"\W", "", word) for word in text.split()] 10 | 11 | now = datetime.now() 12 | if "time" in words: 13 | print(now.strftime("%I:%M %p")) 14 | elif "date" in words: 15 | print(now.strftime("%A, %B %d, %Y")) 16 | 17 | 18 | if __name__ == "__main__": 19 | main() 20 | -------------------------------------------------------------------------------- /programs/handle/home_assistant/bin/converse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | import logging 5 | import sys 6 | from pathlib import Path 7 | from urllib.request import Request, urlopen 8 | 9 | _FILE = Path(__file__) 10 | _DIR = _FILE.parent 11 | _LOGGER = logging.getLogger(_FILE.stem) 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument( 17 | "url", 18 | help="URL of API endpoint", 19 | ) 20 | parser.add_argument("token_file", help="Path to file with authorization token") 21 | parser.add_argument("--language", help="Language code to use") 22 | parser.add_argument( 23 | "--debug", action="store_true", help="Print DEBUG messages to console" 24 | ) 25 | args = parser.parse_args() 26 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 27 | 28 | token = Path(args.token_file).read_text(encoding="utf-8").strip() 29 | headers = {"Authorization": f"Bearer {token}"} 30 | 31 | data_dict = {"text": sys.stdin.read()} 32 | if args.language: 33 | data_dict["language"] = args.language 34 | 35 | data = json.dumps(data_dict, ensure_ascii=False).encode("utf-8") 36 | request = Request(args.url, data=data, headers=headers) 37 | 38 | with urlopen(request) as response: 39 | response = json.loads(response.read()) 40 | response_text = ( 41 | response.get("response", {}) 42 | .get("speech", {}) 43 | .get("plain", {}) 44 | .get("speech", "") 45 | ) 46 | print(response_text) 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /programs/intent/regex/bin/regex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import re 4 | from collections import defaultdict 5 | from typing import Dict, List, Optional 6 | 7 | from rhasspy3.event import read_event, write_event 8 | from rhasspy3.intent import Entity, Intent, NotRecognized, Recognize 9 | 10 | 11 | def main() -> None: 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument( 14 | "-i", 15 | "--intent", 16 | required=True, 17 | nargs=2, 18 | metavar=("name", "regex"), 19 | action="append", 20 | default=[], 21 | help="Intent name and regex", 22 | ) 23 | args = parser.parse_args() 24 | 25 | # intent name -> [pattern] 26 | patterns: Dict[str, List[re.Pattern]] = defaultdict(list) 27 | 28 | for intent_name, pattern_str in args.intent: 29 | patterns[intent_name].append(re.compile(pattern_str, re.IGNORECASE)) 30 | 31 | try: 32 | while True: 33 | event = read_event() 34 | if event is None: 35 | break 36 | 37 | if Recognize.is_type(event.type): 38 | recognize = Recognize.from_event(event) 39 | text = _clean(recognize.text) 40 | intent = _recognize(text, patterns) 41 | if intent is None: 42 | write_event(NotRecognized().event()) 43 | else: 44 | write_event(intent.event()) 45 | except KeyboardInterrupt: 46 | pass 47 | 48 | 49 | def _clean(text: str) -> str: 50 | text = " ".join(text.split()) 51 | return text 52 | 53 | 54 | def _recognize(text: str, patterns: Dict[str, List[re.Pattern]]) -> Optional[Intent]: 55 | for intent_name, intent_patterns in patterns.items(): 56 | for intent_pattern in intent_patterns: 57 | match = intent_pattern.match(text) 58 | if match is None: 59 | continue 60 | 61 | return Intent( 62 | name=intent_name, 63 | entities=[ 64 | Entity(name=name, value=value) 65 | for name, value in match.groupdict().items() 66 | ], 67 | ) 68 | 69 | return None 70 | 71 | 72 | if __name__ == "__main__": 73 | main() 74 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/README.md: -------------------------------------------------------------------------------- 1 | # PyAudio 2 | 3 | Audio input service for Rhasspy based on [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/docs/). 4 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/bin/pyaudio_events.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | from pathlib import Path 5 | 6 | from pyaudio_shared import iter_chunks 7 | 8 | from rhasspy3.audio import ( 9 | DEFAULT_IN_CHANNELS, 10 | DEFAULT_IN_RATE, 11 | DEFAULT_IN_WIDTH, 12 | DEFAULT_SAMPLES_PER_CHUNK, 13 | AudioChunk, 14 | ) 15 | from rhasspy3.event import write_event 16 | 17 | _FILE = Path(__file__) 18 | _DIR = _FILE.parent 19 | _LOGGER = logging.getLogger(_FILE.stem) 20 | 21 | 22 | def main() -> None: 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)" 26 | ) 27 | parser.add_argument( 28 | "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)" 29 | ) 30 | parser.add_argument( 31 | "--channels", type=int, default=DEFAULT_IN_CHANNELS, help="Sample channel count" 32 | ) 33 | parser.add_argument( 34 | "--samples-per-chunk", 35 | type=int, 36 | default=DEFAULT_SAMPLES_PER_CHUNK, 37 | help="Number of samples to process at a time", 38 | ) 39 | parser.add_argument("--device", help="Name or index of device to use") 40 | # 41 | parser.add_argument( 42 | "--debug", action="store_true", help="Print DEBUG messages to console" 43 | ) 44 | args = parser.parse_args() 45 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 46 | 47 | for chunk in iter_chunks( 48 | args.device, args.rate, args.width, args.channels, args.samples_per_chunk 49 | ): 50 | write_event(AudioChunk(args.rate, args.width, args.channels, chunk).event()) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/bin/pyaudio_list_mics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import pyaudio 4 | 5 | 6 | def main() -> None: 7 | audio_system = pyaudio.PyAudio() 8 | for i in range(audio_system.get_device_count()): 9 | print(audio_system.get_device_info_by_index(i)) 10 | 11 | 12 | if __name__ == "__main__": 13 | main() 14 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/bin/pyaudio_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | 7 | from pyaudio_shared import iter_chunks 8 | 9 | _FILE = Path(__file__) 10 | _DIR = _FILE.parent 11 | _LOGGER = logging.getLogger(_FILE.stem) 12 | 13 | 14 | def main() -> None: 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--rate", type=int, required=True, help="Sample rate (hertz)") 17 | parser.add_argument("--width", type=int, required=True, help="Sample width (bytes)") 18 | parser.add_argument( 19 | "--channels", type=int, required=True, help="Sample channel count" 20 | ) 21 | parser.add_argument( 22 | "--samples-per-chunk", 23 | type=int, 24 | required=True, 25 | help="Number of samples to process at a time", 26 | ) 27 | parser.add_argument("--device", help="Name or index of device to use") 28 | # 29 | parser.add_argument( 30 | "--debug", action="store_true", help="Print DEBUG messages to console" 31 | ) 32 | args = parser.parse_args() 33 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 34 | 35 | for chunk in iter_chunks( 36 | args.device, args.rate, args.width, args.channels, args.samples_per_chunk 37 | ): 38 | sys.stdout.buffer.write(chunk) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/bin/pyaudio_shared.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Iterable, Optional, Union 4 | 5 | import pyaudio 6 | 7 | _FILE = Path(__file__) 8 | _DIR = _FILE.parent 9 | _LOGGER = logging.getLogger(_FILE.stem) 10 | 11 | 12 | def iter_chunks( 13 | device: Optional[Union[int, str]], 14 | rate: int, 15 | width: int, 16 | channels: int, 17 | samples_per_chunk: int, 18 | ) -> Iterable[bytes]: 19 | """Open input stream and yield audio chunks.""" 20 | audio_system = pyaudio.PyAudio() 21 | try: 22 | if isinstance(device, str): 23 | try: 24 | device = int(device) 25 | except ValueError: 26 | for i in range(audio_system.get_device_count()): 27 | info = audio_system.get_device_info_by_index(i) 28 | if device == info["name"]: 29 | device = i 30 | break 31 | 32 | assert device is not None, f"No device named: {device}" 33 | 34 | _LOGGER.debug("Device: %s", device) 35 | stream = audio_system.open( 36 | input_device_index=device, 37 | format=audio_system.get_format_from_width(width), 38 | channels=channels, 39 | rate=rate, 40 | input=True, 41 | frames_per_buffer=samples_per_chunk, 42 | ) 43 | 44 | chunk = stream.read(samples_per_chunk) 45 | while chunk: 46 | yield chunk 47 | chunk = stream.read(samples_per_chunk) 48 | except KeyboardInterrupt: 49 | pass 50 | finally: 51 | audio_system.terminate() 52 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/requirements.txt: -------------------------------------------------------------------------------- 1 | pyaudio 2 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/script/events: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/pyaudio_events.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/script/list_mics: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/pyaudio_list_mics.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/script/raw: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/pyaudio_raw.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/mic/pyaudio/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # Install rhasspy3 36 | rhasspy3_dir="${base_dir}/../../../.." 37 | pip3 install -e "${rhasspy3_dir}" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/README.md: -------------------------------------------------------------------------------- 1 | # sounddevice 2 | 3 | Audio input service for Rhasspy based on [sounddevice](https://python-sounddevice.readthedocs.io). 4 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/bin/sounddevice_events.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | from pathlib import Path 5 | 6 | from sounddevice_shared import iter_chunks 7 | 8 | from rhasspy3.audio import ( 9 | DEFAULT_IN_CHANNELS, 10 | DEFAULT_IN_RATE, 11 | DEFAULT_IN_WIDTH, 12 | DEFAULT_SAMPLES_PER_CHUNK, 13 | AudioChunk, 14 | ) 15 | from rhasspy3.event import write_event 16 | 17 | _FILE = Path(__file__) 18 | _DIR = _FILE.parent 19 | _LOGGER = logging.getLogger(_FILE.stem) 20 | 21 | 22 | def main() -> None: 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument( 25 | "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)" 26 | ) 27 | parser.add_argument( 28 | "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)" 29 | ) 30 | parser.add_argument( 31 | "--channels", type=int, default=DEFAULT_IN_CHANNELS, help="Sample channel count" 32 | ) 33 | parser.add_argument( 34 | "--samples-per-chunk", 35 | type=int, 36 | default=DEFAULT_SAMPLES_PER_CHUNK, 37 | help="Number of samples to process at a time", 38 | ) 39 | parser.add_argument("--device", help="Name or index of device to use") 40 | # 41 | parser.add_argument( 42 | "--debug", action="store_true", help="Print DEBUG messages to console" 43 | ) 44 | args = parser.parse_args() 45 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 46 | 47 | for chunk in iter_chunks( 48 | args.device, args.rate, args.width, args.channels, args.samples_per_chunk 49 | ): 50 | write_event(AudioChunk(args.rate, args.width, args.channels, chunk).event()) 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/bin/sounddevice_list_mics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sounddevice 4 | 5 | 6 | def main() -> None: 7 | for info in sounddevice.query_devices(): 8 | print(info) 9 | 10 | 11 | if __name__ == "__main__": 12 | main() 13 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/bin/sounddevice_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | 7 | from sounddevice_shared import iter_chunks 8 | 9 | _FILE = Path(__file__) 10 | _DIR = _FILE.parent 11 | _LOGGER = logging.getLogger(_FILE.stem) 12 | 13 | 14 | def main() -> None: 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("--rate", type=int, required=True, help="Sample rate (hertz)") 17 | parser.add_argument("--width", type=int, required=True, help="Sample width (bytes)") 18 | parser.add_argument( 19 | "--channels", type=int, required=True, help="Sample channel count" 20 | ) 21 | parser.add_argument( 22 | "--samples-per-chunk", 23 | type=int, 24 | required=True, 25 | help="Number of samples to process at a time", 26 | ) 27 | parser.add_argument("--device", help="Name or index of device to use") 28 | # 29 | parser.add_argument( 30 | "--debug", action="store_true", help="Print DEBUG messages to console" 31 | ) 32 | args = parser.parse_args() 33 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 34 | 35 | for chunk in iter_chunks( 36 | args.device, args.rate, args.width, args.channels, args.samples_per_chunk 37 | ): 38 | sys.stdout.buffer.write(chunk) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/bin/sounddevice_shared.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | from typing import Iterable, Optional, Union 4 | 5 | import sounddevice 6 | 7 | _FILE = Path(__file__) 8 | _DIR = _FILE.parent 9 | _LOGGER = logging.getLogger(_FILE.stem) 10 | 11 | 12 | def iter_chunks( 13 | device: Optional[Union[int, str]], 14 | rate: int, 15 | width: int, 16 | channels: int, 17 | samples_per_chunk: int, 18 | ) -> Iterable[bytes]: 19 | """Open input stream and yield audio chunks.""" 20 | try: 21 | if isinstance(device, str): 22 | try: 23 | device = int(device) 24 | except ValueError: 25 | for i, info in enumerate(sounddevice.query_devices()): 26 | if device == info["name"]: 27 | device = i 28 | break 29 | 30 | assert device is not None, f"No device named: {device}" 31 | 32 | _LOGGER.debug("Device: %s", device) 33 | 34 | with sounddevice.RawInputStream( 35 | samplerate=rate, 36 | blocksize=samples_per_chunk, 37 | device=device, 38 | channels=channels, 39 | dtype="int16", 40 | ) as stream: 41 | chunk, _overflowed = stream.read(samples_per_chunk) 42 | while chunk: 43 | yield chunk 44 | chunk, _overflowed = stream.read(samples_per_chunk) 45 | except KeyboardInterrupt: 46 | pass 47 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/requirements.txt: -------------------------------------------------------------------------------- 1 | sounddevice 2 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/script/events: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/sounddevice_events.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/script/list_mics: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 -m sounddevice 18 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/script/raw: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/sounddevice_raw.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/mic/sounddevice/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # Install rhasspy3 36 | rhasspy3_dir="${base_dir}/../../../.." 37 | pip3 install -e "${rhasspy3_dir}" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /programs/mic/udp_raw/bin/udp_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import socketserver 4 | from functools import partial 5 | 6 | from rhasspy3.audio import ( 7 | DEFAULT_IN_CHANNELS, 8 | DEFAULT_IN_RATE, 9 | DEFAULT_IN_WIDTH, 10 | AudioChunk, 11 | ) 12 | from rhasspy3.event import write_event 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("--port", type=int, required=True) 18 | parser.add_argument("--host", default="0.0.0.0") 19 | # 20 | parser.add_argument( 21 | "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)" 22 | ) 23 | parser.add_argument( 24 | "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)" 25 | ) 26 | parser.add_argument( 27 | "--channels", 28 | type=int, 29 | default=DEFAULT_IN_CHANNELS, 30 | help="Sample channel count", 31 | ) 32 | args = parser.parse_args() 33 | 34 | with socketserver.UDPServer( 35 | (args.host, args.port), 36 | partial(MicUDPHandler, args.rate, args.width, args.channels), 37 | ) as server: 38 | server.serve_forever() 39 | 40 | 41 | class MicUDPHandler(socketserver.BaseRequestHandler): 42 | def __init__(self, rate: int, width: int, channels: int, *args, **kwargs): 43 | self.rate = rate 44 | self.width = width 45 | self.channels = channels 46 | self.state = None 47 | super().__init__(*args, **kwargs) 48 | 49 | def handle(self): 50 | audio_bytes = self.request[0] 51 | write_event( 52 | AudioChunk( 53 | rate=self.rate, 54 | width=self.width, 55 | channels=self.channels, 56 | audio=audio_bytes, 57 | ).event() 58 | ) 59 | 60 | 61 | if __name__ == "__main__": 62 | main() 63 | -------------------------------------------------------------------------------- /programs/remote/websocket/requirements.txt: -------------------------------------------------------------------------------- 1 | websockets 2 | -------------------------------------------------------------------------------- /programs/remote/websocket/script/run: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/stream2stream.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/remote/websocket/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # Install rhasspy3 36 | rhasspy3_dir="${base_dir}/../../../.." 37 | pip3 install -e "${rhasspy3_dir}" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /programs/snd/udp_raw/bin/udp_raw.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import socket 4 | 5 | from rhasspy3.audio import ( 6 | DEFAULT_OUT_CHANNELS, 7 | DEFAULT_OUT_RATE, 8 | DEFAULT_OUT_WIDTH, 9 | AudioChunk, 10 | AudioChunkConverter, 11 | AudioStop, 12 | ) 13 | from rhasspy3.event import read_event, write_event 14 | from rhasspy3.snd import Played 15 | 16 | 17 | def main() -> None: 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("--port", type=int, required=True) 20 | parser.add_argument("--host", required=True) 21 | # 22 | parser.add_argument( 23 | "--rate", type=int, default=DEFAULT_OUT_RATE, help="Sample rate (hertz)" 24 | ) 25 | parser.add_argument( 26 | "--width", type=int, default=DEFAULT_OUT_WIDTH, help="Sample width (bytes)" 27 | ) 28 | parser.add_argument( 29 | "--channels", 30 | type=int, 31 | default=DEFAULT_OUT_CHANNELS, 32 | help="Sample channel count", 33 | ) 34 | # 35 | args = parser.parse_args() 36 | 37 | converter = AudioChunkConverter(args.rate, args.width, args.channels) 38 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 39 | while True: 40 | event = read_event() 41 | if event is None: 42 | break 43 | 44 | if AudioChunk.is_type(event.type): 45 | chunk = AudioChunk.from_event(event) 46 | chunk = converter.convert(chunk) 47 | sock.sendto(chunk.audio, (args.host, args.port)) 48 | elif AudioStop.is_type(event.type): 49 | break 50 | 51 | write_event(Played().event()) 52 | 53 | 54 | if __name__ == "__main__": 55 | main() 56 | -------------------------------------------------------------------------------- /programs/tts/coqui-tts/README.md: -------------------------------------------------------------------------------- 1 | # Coqui-TTS 2 | 3 | Text to speech service for Rhasspy based on [Coqui-TTS](https://tts.readthedocs.io). 4 | -------------------------------------------------------------------------------- /programs/tts/coqui-tts/requirements.txt: -------------------------------------------------------------------------------- 1 | tts 2 | -------------------------------------------------------------------------------- /programs/tts/coqui-tts/script/list_models: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | tts --list_models "$@" 18 | -------------------------------------------------------------------------------- /programs/tts/coqui-tts/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | tts-server --model_name 'tts_models/en/ljspeech/vits' "$@" 18 | -------------------------------------------------------------------------------- /programs/tts/coqui-tts/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/tts/flite/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import tarfile 5 | from pathlib import Path 6 | from urllib.request import urlopen 7 | 8 | _DIR = Path(__file__).parent 9 | _LOGGER = logging.getLogger("download") 10 | 11 | 12 | def main() -> None: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument( 15 | "language", 16 | nargs="+", 17 | choices=("en", "indic"), 18 | help="Voice language(s) to download", 19 | ) 20 | parser.add_argument( 21 | "--destination", help="Path to destination directory (default: share)" 22 | ) 23 | parser.add_argument( 24 | "--link-format", 25 | default="https://github.com/rhasspy/models/releases/download/v1.0/tts_flite-{language}.tar.gz", 26 | help="Format string for download URLs", 27 | ) 28 | args = parser.parse_args() 29 | logging.basicConfig(level=logging.INFO) 30 | 31 | if not args.destination: 32 | args.destination = _DIR.parent / "share" 33 | 34 | args.destination.parent.mkdir(parents=True, exist_ok=True) 35 | 36 | for language in args.language: 37 | url = args.link_format.format(language=language) 38 | _LOGGER.info("Downloading %s", url) 39 | with urlopen(url) as response: 40 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 41 | _LOGGER.info("Extracting to %s", args.destination) 42 | tar_gz.extractall(args.destination) 43 | 44 | 45 | if __name__ == "__main__": 46 | main() 47 | -------------------------------------------------------------------------------- /programs/tts/flite/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | sudo apt-get update 3 | sudo apt-get install flite 4 | 5 | # ----------------------------------------------------------------------------- 6 | 7 | echo "OK" 8 | -------------------------------------------------------------------------------- /programs/tts/larynx/README.md: -------------------------------------------------------------------------------- 1 | # Larynx 2 | 3 | Text to speech service for Rhasspy based on [Larynx](https://github.com/rhasspy/larynx/). 4 | -------------------------------------------------------------------------------- /programs/tts/larynx/bin/larynx_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shutil 5 | import sys 6 | from pathlib import Path 7 | from urllib.parse import urlencode 8 | from urllib.request import urlopen 9 | 10 | _FILE = Path(__file__) 11 | _DIR = _FILE.parent 12 | _LOGGER = logging.getLogger(_FILE.stem) 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "url", 19 | help="URL of API endpoint", 20 | ) 21 | parser.add_argument("voice", help="VOICE parameter") 22 | parser.add_argument( 23 | "--debug", action="store_true", help="Print DEBUG messages to console" 24 | ) 25 | args = parser.parse_args() 26 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 27 | 28 | params = {"INPUT_TEXT": sys.stdin.read(), "VOICE": args.voice} 29 | url = args.url + "?" + urlencode(params) 30 | 31 | _LOGGER.debug(url) 32 | 33 | with urlopen(url) as response: 34 | shutil.copyfileobj(response, sys.stdout.buffer) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /programs/tts/larynx/requirements.txt: -------------------------------------------------------------------------------- 1 | larynx 2 | -------------------------------------------------------------------------------- /programs/tts/larynx/script/list_models: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | larynx --list "$@" 18 | -------------------------------------------------------------------------------- /programs/tts/larynx/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | larynx-server "$@" 18 | -------------------------------------------------------------------------------- /programs/tts/larynx/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" \ 34 | 'jinja2<3.1.0' \ 35 | -f 'https://synesthesiam.github.io/prebuilt-apps/' \ 36 | -f 'https://download.pytorch.org/whl/cpu/torch_stable.html' 37 | 38 | # ----------------------------------------------------------------------------- 39 | 40 | echo "OK" 41 | -------------------------------------------------------------------------------- /programs/tts/marytts/bin/marytts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import shutil 5 | import sys 6 | from pathlib import Path 7 | from urllib.parse import urlencode 8 | from urllib.request import urlopen 9 | 10 | _FILE = Path(__file__) 11 | _DIR = _FILE.parent 12 | _LOGGER = logging.getLogger(_FILE.stem) 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "url", 19 | help="URL of API endpoint", 20 | ) 21 | parser.add_argument("voice", help="VOICE parameter") 22 | parser.add_argument( 23 | "--debug", action="store_true", help="Print DEBUG messages to console" 24 | ) 25 | args = parser.parse_args() 26 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 27 | 28 | params = {"INPUT_TEXT": sys.stdin.read(), "VOICE": args.voice} 29 | url = args.url + "?" + urlencode(params) 30 | 31 | _LOGGER.debug(url) 32 | 33 | with urlopen(url) as response: 34 | shutil.copyfileobj(response, sys.stdout.buffer) 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /programs/tts/mimic3/README.md: -------------------------------------------------------------------------------- 1 | # Mimic 3 2 | 3 | Text to speech service for Rhasspy based on [Mimic 3](https://github.com/mycroftAI/mimic3). 4 | -------------------------------------------------------------------------------- /programs/tts/mimic3/requirements.txt: -------------------------------------------------------------------------------- 1 | mycroft-mimic3-tts[all] 2 | -------------------------------------------------------------------------------- /programs/tts/mimic3/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/mimic3_server.py" --socketfile "${socket_dir}/mimic3.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/tts/mimic3/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | mimic3-download --output-dir "${base_dir}/share" 'apope' 36 | 37 | # ----------------------------------------------------------------------------- 38 | 39 | echo "OK" 40 | -------------------------------------------------------------------------------- /programs/tts/piper/README.md: -------------------------------------------------------------------------------- 1 | # Piper 2 | 3 | Text to speech service for Rhasspy based on [Piper](https://github.com/rhasspy/piper). 4 | 5 | 6 | ## Installation 7 | 8 | 1. Copy the contents of this directory to `config/programs/tts/piper/` 9 | 2. Run `script/setup` 10 | 3. Download a model with `script/download.py` 11 | * Example: `script/download.py english` 12 | * Models are downloaded to `config/data/tts/piper` directory 13 | 4. Test with `bin/piper` 14 | * Example `echo 'Welcome to the world of speech synthesis.' | bin/piper --model /path/to/en-us-blizzard_lessac-medium.onnx --output_file welcome.wav` 15 | -------------------------------------------------------------------------------- /programs/tts/piper/script/server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | socket_dir="${base_dir}/var/run" 18 | mkdir -p "${socket_dir}" 19 | 20 | python3 "${base_dir}/bin/piper_server.py" --socketfile "${socket_dir}/piper.socket" "$@" 21 | -------------------------------------------------------------------------------- /programs/tts/piper/script/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import platform 5 | import shutil 6 | import tarfile 7 | import tempfile 8 | from pathlib import Path 9 | from urllib.request import urlopen 10 | 11 | _DIR = Path(__file__).parent 12 | _LOGGER = logging.getLogger("setup") 13 | 14 | PLATFORMS = {"x86_64": "amd64", "aarch64": "arm64"} 15 | 16 | 17 | def main() -> None: 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument( 20 | "--platform", 21 | help="CPU architecture to download (amd64, arm64)", 22 | ) 23 | parser.add_argument( 24 | "--destination", help="Path to destination directory (default: bin)" 25 | ) 26 | parser.add_argument( 27 | "--link-format", 28 | default="https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_{platform}.tar.gz", 29 | help="Format string for download URLs", 30 | ) 31 | args = parser.parse_args() 32 | logging.basicConfig(level=logging.INFO) 33 | 34 | if not args.platform: 35 | args.platform = platform.machine() 36 | 37 | args.platform = PLATFORMS.get(args.platform, args.platform) 38 | 39 | if not args.destination: 40 | args.destination = _DIR.parent / "bin" 41 | else: 42 | args.destination = Path(args.destination) 43 | 44 | args.destination.mkdir(parents=True, exist_ok=True) 45 | 46 | url = args.link_format.format(platform=args.platform) 47 | _LOGGER.info("Downloading %s", url) 48 | with urlopen(url) as response, tempfile.TemporaryDirectory() as temp_dir_str: 49 | temp_dir = Path(temp_dir_str) 50 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 51 | _LOGGER.info("Extracting to %s", temp_dir) 52 | tar_gz.extractall(temp_dir) 53 | 54 | # Move piper/ contents 55 | piper_dir = temp_dir / "piper" 56 | for path in piper_dir.iterdir(): 57 | rel_path = path.relative_to(piper_dir) 58 | if path.is_dir(): 59 | shutil.copytree(path, args.destination / rel_path, symlinks=True) 60 | else: 61 | shutil.copy(path, args.destination / rel_path, follow_symlinks=False) 62 | 63 | 64 | if __name__ == "__main__": 65 | main() 66 | -------------------------------------------------------------------------------- /programs/vad/energy/bin/energy_speech_prob.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import audioop 4 | import logging 5 | import sys 6 | from pathlib import Path 7 | 8 | _FILE = Path(__file__) 9 | _DIR = _FILE.parent 10 | _LOGGER = logging.getLogger(_FILE.stem) 11 | 12 | 13 | def main() -> None: 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument( 16 | "--threshold", 17 | type=float, 18 | required=True, 19 | help="Energy threshold above which is considered speech", 20 | ) 21 | parser.add_argument( 22 | "--width", 23 | type=int, 24 | required=True, 25 | help="Sample width bytes", 26 | ) 27 | parser.add_argument( 28 | "--samples-per-chunk", 29 | required=True, 30 | type=int, 31 | help="Samples to send to command at a time", 32 | ) 33 | # 34 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 35 | args = parser.parse_args() 36 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 37 | 38 | bytes_per_chunk = args.samples_per_chunk * args.width 39 | 40 | try: 41 | chunk = sys.stdin.buffer.read(bytes_per_chunk) 42 | while chunk: 43 | energy = get_debiased_energy(chunk, args.width) 44 | speech_probability = 1 if energy > args.threshold else 0 45 | print(speech_probability, flush=True) 46 | chunk = sys.stdin.buffer.read(bytes_per_chunk) 47 | except KeyboardInterrupt: 48 | pass 49 | 50 | 51 | # ----------------------------------------------------------------------------- 52 | 53 | 54 | def get_debiased_energy(audio_data: bytes, width: int) -> float: 55 | """Compute RMS of debiased audio.""" 56 | # Thanks to the speech_recognition library! 57 | # https://github.com/Uberi/speech_recognition/blob/master/speech_recognition/__init__.py 58 | energy = -audioop.rms(audio_data, width) 59 | energy_bytes = bytes([energy & 0xFF, (energy >> 8) & 0xFF]) 60 | debiased_energy = audioop.rms( 61 | audioop.add(audio_data, energy_bytes * (len(audio_data) // width), width), 62 | width, 63 | ) 64 | 65 | return debiased_energy 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /programs/vad/silero/README.md: -------------------------------------------------------------------------------- 1 | # Silero VAD 2 | 3 | Voice activity detection service for Rhasspy based on [silero-vad](https://github.com/snakers4/silero-vad). 4 | -------------------------------------------------------------------------------- /programs/vad/silero/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime 2 | numpy 3 | -------------------------------------------------------------------------------- /programs/vad/silero/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # Install rhasspy3 36 | rhasspy3_dir="${base_dir}/../../../.." 37 | pip3 install -e "${rhasspy3_dir}" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /programs/vad/silero/script/speech_prob: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/silero_speech_prob.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/vad/silero/share/silero_vad.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/vad/silero/share/silero_vad.onnx -------------------------------------------------------------------------------- /programs/vad/webrtcvad/README.md: -------------------------------------------------------------------------------- 1 | # webrtcvad 2 | 3 | Voice activity detection service for Rhasspy based on [webrtcvad](https://pypi.org/project/webrtcvad/). 4 | -------------------------------------------------------------------------------- /programs/vad/webrtcvad/bin/webrtcvad_speech_prob.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import sys 5 | from pathlib import Path 6 | 7 | import webrtcvad 8 | 9 | _FILE = Path(__file__) 10 | _DIR = _FILE.parent 11 | _LOGGER = logging.getLogger(_FILE.stem) 12 | 13 | 14 | def main() -> None: 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument( 17 | "mode", 18 | choices=(0, 1, 2, 3), 19 | type=int, 20 | help="Aggressiveness in filtering out non-speech", 21 | ) 22 | parser.add_argument( 23 | "--rate", 24 | type=int, 25 | default=16000, 26 | help="Sample rate (hz)", 27 | ) 28 | parser.add_argument( 29 | "--width", 30 | type=int, 31 | default=2, 32 | help="Sample width bytes", 33 | ) 34 | parser.add_argument("--samples-per-chunk", type=int, default=480) 35 | # 36 | parser.add_argument("--debug", action="store_true", help="Log DEBUG messages") 37 | args = parser.parse_args() 38 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 39 | 40 | chunk_ms = 1000 * (args.samples_per_chunk / args.rate) 41 | assert chunk_ms in [10, 20, 30], ( 42 | "Sample rate and chunk size must make for 10, 20, or 30 ms buffer sizes," 43 | + f" assuming mono audio (got {chunk_ms} ms)" 44 | ) 45 | 46 | bytes_per_chunk = args.samples_per_chunk * args.width 47 | vad = webrtcvad.Vad() 48 | vad.set_mode(args.mode) 49 | 50 | try: 51 | chunk = sys.stdin.buffer.read(bytes_per_chunk) 52 | while chunk: 53 | speech_probability = 1 if vad.is_speech(chunk, args.rate) else 0 54 | print(speech_probability, flush=True) 55 | chunk = sys.stdin.buffer.read(bytes_per_chunk) 56 | except KeyboardInterrupt: 57 | pass 58 | 59 | 60 | # ----------------------------------------------------------------------------- 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /programs/vad/webrtcvad/requirements.txt: -------------------------------------------------------------------------------- 1 | webrtcvad 2 | -------------------------------------------------------------------------------- /programs/vad/webrtcvad/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # Install rhasspy3 36 | rhasspy3_dir="${base_dir}/../../../.." 37 | pip3 install -e "${rhasspy3_dir}" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /programs/vad/webrtcvad/script/speech_prob: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/webrtcvad_speech_prob.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/bin/list_models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from pathlib import Path 3 | 4 | import pvporcupine 5 | 6 | 7 | def main() -> None: 8 | """Main method.""" 9 | 10 | for keyword_path in sorted(pvporcupine.pv_keyword_paths("").values()): 11 | model_name = Path(keyword_path).name 12 | print(model_name) 13 | 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | if __name__ == "__main__": 18 | main() 19 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/bin/porcupine_raw_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import struct 5 | import sys 6 | from pathlib import Path 7 | 8 | from porcupine_shared import get_arg_parser, load_porcupine 9 | 10 | _FILE = Path(__file__) 11 | _DIR = _FILE.parent 12 | _LOGGER = logging.getLogger(_FILE.stem) 13 | 14 | # ----------------------------------------------------------------------------- 15 | 16 | 17 | def main() -> None: 18 | """Main method.""" 19 | parser = get_arg_parser() 20 | parser.add_argument("--samples-per-chunk", type=int, default=512) 21 | args = parser.parse_args() 22 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 23 | 24 | porcupine, names = load_porcupine(args) 25 | 26 | chunk_format = "h" * porcupine.frame_length 27 | bytes_per_chunk = porcupine.frame_length * 2 28 | 29 | # Read 16Khz, 16-bit mono PCM from stdin 30 | try: 31 | chunk = bytes() 32 | next_chunk = sys.stdin.buffer.read(bytes_per_chunk) 33 | while next_chunk: 34 | while len(chunk) >= bytes_per_chunk: 35 | unpacked_chunk = struct.unpack_from( 36 | chunk_format, chunk[:bytes_per_chunk] 37 | ) 38 | keyword_index = porcupine.process(unpacked_chunk) 39 | if keyword_index >= 0: 40 | print(names[keyword_index], flush=True) 41 | 42 | chunk = chunk[bytes_per_chunk:] 43 | 44 | next_chunk = sys.stdin.buffer.read(bytes_per_chunk) 45 | chunk += next_chunk 46 | except KeyboardInterrupt: 47 | pass 48 | 49 | 50 | # ----------------------------------------------------------------------------- 51 | 52 | if __name__ == "__main__": 53 | main() 54 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/bin/porcupine_shared.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | from typing import List, Tuple 4 | 5 | import pvporcupine 6 | 7 | 8 | def get_arg_parser() -> argparse.ArgumentParser: 9 | """Get shared command-line argument parser.""" 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--model", 13 | required=True, 14 | action="append", 15 | nargs="+", 16 | help="Keyword model settings (path, [sensitivity])", 17 | ) 18 | parser.add_argument( 19 | "--lang_model", 20 | help="Path of the language model (.pv file), default is English", 21 | ) 22 | parser.add_argument( 23 | "--debug", action="store_true", help="Print DEBUG messages to console" 24 | ) 25 | return parser 26 | 27 | 28 | def load_porcupine(args: argparse.Namespace) -> Tuple[pvporcupine.Porcupine, List[str]]: 29 | """Loads porcupine keywords. Returns Porcupine object and list of keyword names (in order).""" 30 | # Path to embedded keywords 31 | keyword_dir = Path(next(iter(pvporcupine.pv_keyword_paths("").values()))).parent 32 | 33 | names: List[str] = [] 34 | keyword_paths: List[Path] = [] 35 | sensitivities: List[float] = [] 36 | 37 | model_path = ( 38 | str(Path(args.lang_model).absolute()) if args.lang_model is not None else None 39 | ) 40 | 41 | for model_settings in args.model: 42 | keyword_path_str = model_settings[0] 43 | keyword_path = Path(keyword_path_str) 44 | if not keyword_path.exists(): 45 | keyword_path = keyword_dir / keyword_path_str 46 | assert keyword_path.exists(), f"Cannot find {keyword_path_str}" 47 | 48 | keyword_paths.append(keyword_path) 49 | names.append(keyword_path.stem) 50 | 51 | sensitivity = 0.5 52 | if len(model_settings) > 1: 53 | sensitivity = float(model_settings[1]) 54 | 55 | sensitivities.append(sensitivity) 56 | 57 | porcupine = pvporcupine.create( 58 | keyword_paths=[str(keyword_path.absolute()) for keyword_path in keyword_paths], 59 | sensitivities=sensitivities, 60 | model_path=model_path, 61 | ) 62 | 63 | return porcupine, names 64 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/bin/porcupine_stream.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import struct 4 | from pathlib import Path 5 | 6 | from porcupine_shared import get_arg_parser, load_porcupine 7 | 8 | from rhasspy3.audio import AudioChunk, AudioStop 9 | from rhasspy3.event import read_event, write_event 10 | from rhasspy3.wake import Detection, NotDetected 11 | 12 | _FILE = Path(__file__) 13 | _DIR = _FILE.parent 14 | _LOGGER = logging.getLogger(_FILE.stem) 15 | 16 | # ----------------------------------------------------------------------------- 17 | 18 | 19 | def main() -> None: 20 | """Main method.""" 21 | parser = get_arg_parser() 22 | args = parser.parse_args() 23 | logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) 24 | 25 | porcupine, names = load_porcupine(args) 26 | 27 | chunk_format = "h" * porcupine.frame_length 28 | bytes_per_chunk = porcupine.frame_length * 2 # 16-bit width 29 | audio_bytes = bytes() 30 | is_detected = False 31 | 32 | try: 33 | while True: 34 | event = read_event() 35 | if event is None: 36 | break 37 | 38 | if AudioStop.is_type(event.type): 39 | break 40 | 41 | if not AudioChunk.is_type(event.type): 42 | continue 43 | 44 | chunk = AudioChunk.from_event(event) 45 | audio_bytes += chunk.audio 46 | 47 | while len(audio_bytes) >= bytes_per_chunk: 48 | unpacked_chunk = struct.unpack_from( 49 | chunk_format, audio_bytes[:bytes_per_chunk] 50 | ) 51 | keyword_index = porcupine.process(unpacked_chunk) 52 | if keyword_index >= 0: 53 | write_event( 54 | Detection( 55 | name=names[keyword_index], timestamp=chunk.timestamp 56 | ).event() 57 | ) 58 | is_detected = True 59 | 60 | audio_bytes = audio_bytes[bytes_per_chunk:] 61 | 62 | if is_detected: 63 | write_event(NotDetected().event()) 64 | except KeyboardInterrupt: 65 | pass 66 | 67 | 68 | # ----------------------------------------------------------------------------- 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/requirements.txt: -------------------------------------------------------------------------------- 1 | pvporcupine~=1.9.0 2 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/script/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import logging 4 | import tarfile 5 | from pathlib import Path 6 | from urllib.request import urlopen 7 | 8 | _DIR = Path(__file__).parent 9 | _LOGGER = logging.getLogger("download") 10 | 11 | 12 | def main() -> None: 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument( 15 | "--destination", help="Path to destination directory (default: data)" 16 | ) 17 | parser.add_argument( 18 | "--url", 19 | default="https://github.com/rhasspy/models/releases/download/v1.0/wake_porcupine1-data.tar.gz", 20 | help="URL of porcupine1 data", 21 | ) 22 | args = parser.parse_args() 23 | logging.basicConfig(level=logging.INFO) 24 | 25 | if args.destination: 26 | args.destination = Path(args.destination) 27 | else: 28 | # Assume we're in programs/wake/porcupine1/script 29 | data_dir = _DIR.parent.parent.parent.parent / "data" 30 | args.destination = data_dir / "wake" / "porcupine1" 31 | 32 | args.destination.parent.mkdir(parents=True, exist_ok=True) 33 | 34 | _LOGGER.info("Downloading %s", args.url) 35 | with urlopen(args.url) as response: 36 | with tarfile.open(mode="r|*", fileobj=response) as tar_gz: 37 | _LOGGER.info("Extracting to %s", args.destination) 38 | tar_gz.extractall(args.destination) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/script/list_models: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/list_models.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/script/raw2text: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python3 "${base_dir}/bin/porcupine_raw_text.py" "$@" 18 | -------------------------------------------------------------------------------- /programs/wake/porcupine1/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/wake/precise-lite/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | sonopy~=0.1.0 3 | tflite_runtime>=2.5.0,<3.0 4 | -------------------------------------------------------------------------------- /programs/wake/precise-lite/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/wake/precise-lite/share/hey_mycroft.tflite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/precise-lite/share/hey_mycroft.tflite -------------------------------------------------------------------------------- /programs/wake/snowboy/requirements.txt: -------------------------------------------------------------------------------- 1 | snowboy @ https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz 2 | -------------------------------------------------------------------------------- /programs/wake/snowboy/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | if [ ! -d "${venv}" ]; then 19 | # Create virtual environment 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | pip3 install --upgrade pip 26 | pip3 install --upgrade wheel setuptools 27 | else 28 | source "${venv}/bin/activate" 29 | fi 30 | 31 | # Install Python dependencies 32 | echo 'Installing Python dependencies' 33 | pip3 install -r "${base_dir}/requirements.txt" 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | echo "OK" 38 | -------------------------------------------------------------------------------- /programs/wake/snowboy/share/hey_extreme.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/hey_extreme.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/jarvis.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/jarvis.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/neoya.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/neoya.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/smart_mirror.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/smart_mirror.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/snowboy.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/snowboy.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/subex.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/subex.umdl -------------------------------------------------------------------------------- /programs/wake/snowboy/share/view_glass.umdl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/programs/wake/snowboy/share/view_glass.umdl -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable= 3 | format, 4 | abstract-method, 5 | cyclic-import, 6 | duplicate-code, 7 | global-statement, 8 | import-outside-toplevel, 9 | inconsistent-return-statements, 10 | locally-disabled, 11 | not-context-manager, 12 | too-few-public-methods, 13 | too-many-arguments, 14 | too-many-branches, 15 | too-many-instance-attributes, 16 | too-many-lines, 17 | too-many-locals, 18 | too-many-public-methods, 19 | too-many-return-statements, 20 | too-many-statements, 21 | too-many-boolean-expressions, 22 | unnecessary-pass, 23 | unused-argument, 24 | broad-except, 25 | too-many-nested-blocks, 26 | invalid-name, 27 | unused-import, 28 | fixme, 29 | useless-super-delegation, 30 | missing-module-docstring, 31 | missing-class-docstring, 32 | missing-function-docstring, 33 | import-error, 34 | consider-using-with 35 | 36 | [FORMAT] 37 | expected-line-ending-format=LF 38 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black==22.12.0 2 | flake8==6.0.0 3 | isort==5.11.3 4 | mypy==0.991 5 | pylint==2.15.9 6 | pytest==7.2.0 7 | -------------------------------------------------------------------------------- /requirements_http_api.txt: -------------------------------------------------------------------------------- 1 | quart 2 | Quart-CORS 3 | hypercorn 4 | -------------------------------------------------------------------------------- /rhasspy3/VERSION: -------------------------------------------------------------------------------- 1 | 0.0.1 2 | -------------------------------------------------------------------------------- /rhasspy3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/rhasspy3/__init__.py -------------------------------------------------------------------------------- /rhasspy3/core.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from dataclasses import dataclass 3 | from pathlib import Path 4 | from typing import Any, Dict, Union 5 | 6 | from .config import Config 7 | from .util import merge_dict 8 | from .util.jaml import safe_load 9 | 10 | _DIR = Path(__file__).parent 11 | _DEFAULT_CONFIG = _DIR / "configuration.yaml" 12 | _LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | @dataclass 16 | class Rhasspy: 17 | config: Config 18 | config_dir: Path 19 | base_dir: Path 20 | config_dict: Dict[str, Any] 21 | 22 | @property 23 | def programs_dir(self) -> Path: 24 | """Directory where programs are installed.""" 25 | return self.config_dir / "programs" 26 | 27 | @property 28 | def data_dir(self) -> Path: 29 | """Directory where models are downloaded.""" 30 | return self.config_dir / "data" 31 | 32 | @staticmethod 33 | def load(config_dir: Union[str, Path]) -> "Rhasspy": 34 | """Load and merge configuration.yaml files from rhasspy3 and config dir.""" 35 | config_dir = Path(config_dir) 36 | config_paths = [ 37 | _DEFAULT_CONFIG, 38 | config_dir / "configuration.yaml", 39 | ] 40 | config_dict: Dict[str, Any] = {} 41 | 42 | for config_path in config_paths: 43 | if config_path.exists(): 44 | _LOGGER.debug("Loading config from %s", config_path) 45 | with config_path.open(encoding="utf-8") as config_file: 46 | merge_dict(config_dict, safe_load(config_file)) 47 | else: 48 | _LOGGER.debug("Skipping %s", config_path) 49 | 50 | return Rhasspy( 51 | config=Config.from_dict(config_dict), 52 | config_dir=config_dir, 53 | config_dict=config_dict, 54 | base_dir=_DIR.parent, 55 | ) 56 | -------------------------------------------------------------------------------- /rhasspy3/handle.py: -------------------------------------------------------------------------------- 1 | """Intent recognition and handling.""" 2 | import logging 3 | from dataclasses import dataclass 4 | from typing import Any, Dict, Optional, Union 5 | 6 | from .asr import Transcript 7 | from .config import PipelineProgramConfig 8 | from .core import Rhasspy 9 | from .event import Event, Eventable, async_read_event, async_write_event 10 | from .intent import Intent, NotRecognized 11 | from .program import create_process 12 | 13 | DOMAIN = "handle" 14 | _HANDLED_TYPE = "handled" 15 | _NOT_HANDLED_TYPE = "not-handled" 16 | 17 | _LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | @dataclass 21 | class Handled(Eventable): 22 | text: Optional[str] = None 23 | 24 | @staticmethod 25 | def is_type(event_type: str) -> bool: 26 | return event_type == _HANDLED_TYPE 27 | 28 | def event(self) -> Event: 29 | data: Dict[str, Any] = {} 30 | if self.text is not None: 31 | data["text"] = self.text 32 | 33 | return Event(type=_HANDLED_TYPE, data=data) 34 | 35 | @staticmethod 36 | def from_event(event: Event) -> "Handled": 37 | assert event.data is not None 38 | return Handled(text=event.data.get("text")) 39 | 40 | 41 | @dataclass 42 | class NotHandled(Eventable): 43 | text: Optional[str] = None 44 | 45 | @staticmethod 46 | def is_type(event_type: str) -> bool: 47 | return event_type == _NOT_HANDLED_TYPE 48 | 49 | def event(self) -> Event: 50 | data: Dict[str, Any] = {} 51 | if self.text is not None: 52 | data["text"] = self.text 53 | 54 | return Event(type=_NOT_HANDLED_TYPE, data=data) 55 | 56 | @staticmethod 57 | def from_event(event: Event) -> "NotHandled": 58 | assert event.data is not None 59 | return NotHandled(text=event.data.get("text")) 60 | 61 | 62 | async def handle( 63 | rhasspy: Rhasspy, 64 | program: Union[str, PipelineProgramConfig], 65 | handle_input: Union[Intent, NotRecognized, Transcript], 66 | ) -> Optional[Union[Handled, NotHandled]]: 67 | handle_result: Optional[Union[Handled, NotHandled]] = None 68 | async with (await create_process(rhasspy, DOMAIN, program)) as handle_proc: 69 | assert handle_proc.stdin is not None 70 | assert handle_proc.stdout is not None 71 | 72 | _LOGGER.debug("handle: input=%s", handle_input) 73 | await async_write_event(handle_input.event(), handle_proc.stdin) 74 | while True: 75 | event = await async_read_event(handle_proc.stdout) 76 | if event is None: 77 | break 78 | 79 | if Handled.is_type(event.type): 80 | handle_result = Handled.from_event(event) 81 | elif NotHandled.is_type(event.type): 82 | handle_result = NotHandled.from_event(event) 83 | 84 | _LOGGER.debug("handle: %s", handle_result) 85 | 86 | return handle_result 87 | -------------------------------------------------------------------------------- /rhasspy3/mic.py: -------------------------------------------------------------------------------- 1 | """Audio input from a microphone.""" 2 | DOMAIN = "mic" 3 | -------------------------------------------------------------------------------- /rhasspy3/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/rhasspy3/py.typed -------------------------------------------------------------------------------- /rhasspy3/remote.py: -------------------------------------------------------------------------------- 1 | """Remote communication with a base station.""" 2 | DOMAIN = "remote" 3 | -------------------------------------------------------------------------------- /rhasspy3/util/__init__.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | 4 | def merge_dict(base_dict, new_dict): 5 | """Merges new_dict into base_dict.""" 6 | for key, value in new_dict.items(): 7 | if key in base_dict: 8 | old_value = base_dict[key] 9 | if isinstance(old_value, collections.abc.MutableMapping): 10 | # Combine dictionary 11 | assert isinstance( 12 | value, collections.abc.Mapping 13 | ), f"Not a dict: {value}" 14 | merge_dict(old_value, value) 15 | elif isinstance(old_value, collections.abc.MutableSequence): 16 | # Combine list 17 | assert isinstance( 18 | value, collections.abc.Sequence 19 | ), f"Not a list: {value}" 20 | old_value.extend(value) 21 | else: 22 | # Overwrite 23 | base_dict[key] = value 24 | else: 25 | base_dict[key] = value 26 | -------------------------------------------------------------------------------- /rhasspy3/util/dataclasses_json.py: -------------------------------------------------------------------------------- 1 | """Implement a tiny subset of dataclasses_json for config.""" 2 | from collections.abc import Mapping, Sequence 3 | from dataclasses import asdict, fields, is_dataclass 4 | from typing import Any, Dict, Type 5 | 6 | 7 | class DataClassJsonMixin: 8 | """Adds from_dict to dataclass.""" 9 | 10 | @classmethod 11 | def from_dict(cls, data: Dict[str, Any]) -> Any: 12 | """Parse dataclasses recursively.""" 13 | kwargs: Dict[str, Any] = {} 14 | 15 | cls_fields = {field.name: field for field in fields(cls)} 16 | for key, value in data.items(): 17 | field = cls_fields[key] 18 | if is_dataclass(field.type): 19 | assert issubclass(field.type, DataClassJsonMixin), field.type 20 | kwargs[key] = field.type.from_dict(value) 21 | else: 22 | kwargs[key] = _decode(value, field.type) 23 | 24 | return cls(**kwargs) 25 | 26 | def to_dict(self) -> Dict[str, Any]: 27 | """Alias for asdict.""" 28 | return asdict(self) 29 | 30 | 31 | def _decode(value: Any, target_type: Type) -> Any: 32 | """Decode value using (possibly generic) type.""" 33 | if is_dataclass(target_type): 34 | assert issubclass(target_type, DataClassJsonMixin), target_type 35 | return target_type.from_dict(value) if value is not None else None 36 | 37 | if hasattr(target_type, "__args__"): 38 | # Optional[T] 39 | if type(None) in target_type.__args__: 40 | optional_type = target_type.__args__[0] 41 | return _decode(value, optional_type) 42 | 43 | # List[T] 44 | if isinstance(value, Sequence): 45 | list_type = target_type.__args__[0] 46 | return [_decode(item, list_type) for item in value] 47 | 48 | # Dict[str, T] 49 | if isinstance(value, Mapping): 50 | value_type = target_type.__args__[1] 51 | return { 52 | map_key: _decode(map_value, value_type) 53 | for map_key, map_value in value.items() 54 | } 55 | 56 | return value 57 | -------------------------------------------------------------------------------- /rhasspy3_http_api/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/rhasspy3_http_api/__init__.py -------------------------------------------------------------------------------- /rhasspy3_http_api/css/main.css: -------------------------------------------------------------------------------- 1 | #header { 2 | border-bottom: 1px solid black; 3 | } 4 | 5 | li { 6 | padding-bottom: 1rem; 7 | } 8 | -------------------------------------------------------------------------------- /rhasspy3_http_api/handle.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | from typing import Optional, Union 5 | 6 | from quart import Quart, Response, jsonify, request 7 | 8 | from rhasspy3.asr import Transcript 9 | from rhasspy3.config import PipelineConfig 10 | from rhasspy3.core import Rhasspy 11 | from rhasspy3.event import Event 12 | from rhasspy3.handle import handle 13 | from rhasspy3.intent import Intent, NotRecognized 14 | 15 | _LOGGER = logging.getLogger(__name__) 16 | _HANDLE_INPUT_TYPES = (Transcript, Intent, NotRecognized) 17 | 18 | 19 | def add_handle( 20 | app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace 21 | ) -> None: 22 | @app.route("/handle/handle", methods=["GET", "POST"]) 23 | async def http_handle_handle() -> Response: 24 | """Handle text or intent JSON.""" 25 | if request.method == "GET": 26 | data = request.args["input"] 27 | else: 28 | data = (await request.data).decode() 29 | 30 | handle_pipeline = ( 31 | rhasspy.config.pipelines[request.args["pipeline"]] 32 | if "pipeline" in request.args 33 | else pipeline 34 | ) 35 | 36 | # Input can be plain text or a JSON intent 37 | handle_input: Optional[Union[Intent, NotRecognized, Transcript]] = None 38 | if request.content_type == "application/json": 39 | # Try to parse either an "intent" or "not-recognized" event 40 | event = Event(json.loads(data)) 41 | for event_class in _HANDLE_INPUT_TYPES: 42 | assert issubclass(event_class, _HANDLE_INPUT_TYPES) 43 | if event_class.is_type(event.type): 44 | handle_input = event_class.from_event(event) 45 | else: 46 | # Assume plain text 47 | handle_input = Transcript(data) 48 | 49 | assert handle_input is not None, "Invalid input" 50 | 51 | handle_program = request.args.get("handle_program") or handle_pipeline.handle 52 | assert handle_program is not None, "Missing program for handle" 53 | _LOGGER.debug("handle: handle=%s, input='%s'", handle_program, handle_input) 54 | 55 | result = await handle(rhasspy, handle_program, handle_input) 56 | _LOGGER.debug("handle: result=%s", result) 57 | 58 | return jsonify(result.event().to_dict() if result is not None else {}) 59 | -------------------------------------------------------------------------------- /rhasspy3_http_api/img/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/rhasspy3_http_api/img/banner.png -------------------------------------------------------------------------------- /rhasspy3_http_api/img/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhasspy/rhasspy3/11e8d3016d323a2ab1756dc68b4ba8a9f75f22a6/rhasspy3_http_api/img/favicon.png -------------------------------------------------------------------------------- /rhasspy3_http_api/intent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from quart import Quart, Response, jsonify, request 5 | 6 | from rhasspy3.config import PipelineConfig 7 | from rhasspy3.core import Rhasspy 8 | from rhasspy3.intent import recognize 9 | 10 | _LOGGER = logging.getLogger(__name__) 11 | 12 | 13 | def add_intent( 14 | app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace 15 | ) -> None: 16 | @app.route("/intent/recognize", methods=["GET", "POST"]) 17 | async def http_intent_recognize() -> Response: 18 | """Recognize intent from text.""" 19 | if request.method == "GET": 20 | text = request.args["text"] 21 | else: 22 | text = (await request.data).decode() 23 | 24 | intent_pipeline = ( 25 | rhasspy.config.pipelines[request.args["pipeline"]] 26 | if "pipeline" in request.args 27 | else pipeline 28 | ) 29 | intent_program = request.args.get("intent_program") or intent_pipeline.intent 30 | assert intent_program, "Missing program for intent" 31 | _LOGGER.debug("recognize: intent=%s, text='%s'", intent_program, text) 32 | 33 | result = await recognize(rhasspy, intent_program, text) 34 | _LOGGER.debug("recognize: result=%s", result) 35 | 36 | return jsonify(result.event().to_dict() if result is not None else {}) 37 | -------------------------------------------------------------------------------- /rhasspy3_http_api/js/main.js: -------------------------------------------------------------------------------- 1 | function q(selector) { 2 | return document.querySelector(selector); 3 | } 4 | 5 | // https://stackoverflow.com/questions/62093473/how-to-play-raw-audio-files 6 | function buildWaveHeader(opts) { 7 | const numFrames = opts.numFrames; 8 | const numChannels = opts.numChannels || 2; 9 | const sampleRate = opts.sampleRate || 44100; 10 | const bytesPerSample = opts.bytesPerSample || 2; 11 | const format = opts.format 12 | 13 | const blockAlign = numChannels * bytesPerSample; 14 | const byteRate = sampleRate * blockAlign; 15 | const dataSize = numFrames * blockAlign; 16 | 17 | const buffer = new ArrayBuffer(44); 18 | const dv = new DataView(buffer); 19 | 20 | let p = 0; 21 | 22 | function writeString(s) { 23 | for (let i = 0; i < s.length; i++) { 24 | dv.setUint8(p + i, s.charCodeAt(i)); 25 | } 26 | p += s.length; 27 | } 28 | 29 | function writeUint32(d) { 30 | dv.setUint32(p, d, true); 31 | p += 4; 32 | } 33 | 34 | function writeUint16(d) { 35 | dv.setUint16(p, d, true); 36 | p += 2; 37 | } 38 | 39 | writeString('RIFF'); // ChunkID 40 | writeUint32(dataSize + 36); // ChunkSize 41 | writeString('WAVE'); // Format 42 | writeString('fmt '); // Subchunk1ID 43 | writeUint32(16); // Subchunk1Size 44 | writeUint16(format); // AudioFormat 45 | writeUint16(numChannels); // NumChannels 46 | writeUint32(sampleRate); // SampleRate 47 | writeUint32(byteRate); // ByteRate 48 | writeUint16(blockAlign); // BlockAlign 49 | writeUint16(bytesPerSample * 8); // BitsPerSample 50 | writeString('data'); // Subchunk2ID 51 | writeUint32(dataSize); // Subchunk2Size 52 | 53 | return buffer; 54 | } 55 | -------------------------------------------------------------------------------- /rhasspy3_http_api/js/recorder.worklet.js: -------------------------------------------------------------------------------- 1 | class RecorderProcessor extends AudioWorkletProcessor { 2 | constructor() { 3 | super(); 4 | } 5 | 6 | process(inputList, outputList, parameters) { 7 | if (inputList[0].length < 1) { 8 | return true; 9 | } 10 | 11 | const float32Data = inputList[0][0]; 12 | const int16Data = new Int16Array(float32Data.length); 13 | 14 | for (let i = 0; i < float32Data.length; i++) { 15 | const s = Math.max(-1, Math.min(1, float32Data[i])); 16 | int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff; 17 | } 18 | 19 | this.port.postMessage(int16Data); 20 | 21 | return true; 22 | } 23 | }; 24 | 25 | registerProcessor("recorder.worklet", RecorderProcessor); 26 | -------------------------------------------------------------------------------- /rhasspy3_http_api/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | 3 | {% block body %} 4 | 5 | 22 | 23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /rhasspy3_http_api/templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Rhasspy 10 | 11 | 12 | 17 | 18 | {% block body %}{% endblock %} 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /script/format: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | base_dir="$(realpath "${this_dir}/..")" 8 | 9 | # Path to virtual environment 10 | : "${venv:=${base_dir}/.venv}" 11 | 12 | if [ -d "${venv}" ]; then 13 | # Activate virtual environment if available 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python_files=() 18 | python_files+=("${base_dir}/bin") 19 | python_files+=("${base_dir}/rhasspy3") 20 | python_files+=("${base_dir}/rhasspy3_http_api") 21 | python_files+=("${base_dir}/programs") 22 | 23 | # Format code 24 | black "${python_files[@]}" 25 | isort "${python_files[@]}" 26 | -------------------------------------------------------------------------------- /script/http_server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | # Activate virtual environment if available 15 | source "${venv}/bin/activate" 16 | fi 17 | 18 | export PYTHONPATH="${base_dir}" 19 | export PATH="${base_dir}/bin:${PATH}" 20 | 21 | python3 -m rhasspy3_http_api "$@" 22 | -------------------------------------------------------------------------------- /script/lint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | base_dir="$(realpath "${this_dir}/..")" 8 | 9 | # Path to virtual environment 10 | : "${venv:=${base_dir}/.venv}" 11 | 12 | if [ -d "${venv}" ]; then 13 | # Activate virtual environment if available 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | python_files=() 18 | python_files+=("${base_dir}/bin") 19 | python_files+=("${base_dir}/rhasspy3") 20 | python_files+=("${base_dir}/rhasspy3_http_api") 21 | python_files+=("${base_dir}/programs") 22 | 23 | # Check 24 | black "${python_files[@]}" --check 25 | isort "${python_files[@]}" --check 26 | flake8 "${python_files[@]}" 27 | pylint "${python_files[@]}" 28 | mypy "${base_dir}/bin" "${base_dir}/rhasspy3" 29 | -------------------------------------------------------------------------------- /script/run: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | # Activate virtual environment if available 15 | source "${venv}/bin/activate" 16 | fi 17 | 18 | export PYTHONPATH="${base_dir}" 19 | export PYTHONUNBUFFERED='1' 20 | export PATH="${base_dir}/bin:${PATH}" 21 | 22 | "$@" 23 | -------------------------------------------------------------------------------- /script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Copy default config 11 | config_dir="${base_dir}/config" 12 | 13 | mkdir -p "${config_dir}" 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | echo "OK" 18 | -------------------------------------------------------------------------------- /script/setup_http_server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | # Create virtual environment 19 | if [ ! -d "${venv}" ]; then 20 | echo "Creating virtual environment at ${venv} (${python_version})" 21 | rm -rf "${venv}" 22 | "${PYTHON}" -m venv "${venv}" 23 | source "${venv}/bin/activate" 24 | 25 | # Install Python dependencies 26 | echo 'Installing Python dependencies' 27 | pip3 install --upgrade pip 28 | pip3 install --upgrade wheel setuptools 29 | else 30 | source "${venv}/bin/activate" 31 | fi 32 | 33 | if [ -f "${base_dir}/requirements.txt" ]; then 34 | pip3 install -r "${base_dir}/requirements.txt" 35 | fi 36 | 37 | pip3 install -r "${base_dir}/requirements_http_api.txt" 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | echo "OK" 42 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | # Activate virtual environment if available 15 | source "${venv}/bin/activate" 16 | fi 17 | 18 | export PYTHONPATH="${base_dir}:${PYTHONPATH}" 19 | pytest -vv "${base_dir}/tests" 20 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # To work with Black 3 | max-line-length = 88 4 | # E501: line too long 5 | # W503: Line break occurred before a binary operator 6 | # E203: Whitespace before ':' 7 | # D202 No blank lines allowed after function docstring 8 | # W504 line break after binary operator 9 | ignore = 10 | E501, 11 | W503, 12 | E203, 13 | D202, 14 | W504 15 | 16 | # F401 import unused 17 | per-file-ignores = 18 | programs/asr/faster-whisper/src/faster_whisper/__init__.py:F401 19 | 20 | [isort] 21 | multi_line_output = 3 22 | include_trailing_comma=True 23 | force_grid_wrap=0 24 | use_parentheses=True 25 | line_length=88 26 | indent = " " 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from pathlib import Path 3 | 4 | import setuptools 5 | from setuptools import setup 6 | 7 | this_dir = Path(__file__).parent 8 | module_dir = this_dir / "rhasspy3" 9 | 10 | # ----------------------------------------------------------------------------- 11 | 12 | # Load README in as long description 13 | long_description: str = "" 14 | readme_path = this_dir / "README.md" 15 | if readme_path.is_file(): 16 | long_description = readme_path.read_text(encoding="utf-8") 17 | 18 | requirements = [] 19 | requirements_path = this_dir / "requirements.txt" 20 | if requirements_path.is_file(): 21 | with open(requirements_path, "r", encoding="utf-8") as requirements_file: 22 | requirements = requirements_file.read().splitlines() 23 | 24 | version_path = module_dir / "VERSION" 25 | with open(version_path, "r", encoding="utf-8") as version_file: 26 | version = version_file.read().strip() 27 | 28 | # ----------------------------------------------------------------------------- 29 | 30 | setup( 31 | name="rhasspy3", 32 | version=version, 33 | description="Rhasspy Voice Assistant Toolkit", 34 | long_description=long_description, 35 | url="http://github.com/rhasspy/rhasspy3", 36 | author="Michael Hansen", 37 | author_email="mike@rhasspy.org", 38 | license="MIT", 39 | packages=setuptools.find_packages(), 40 | package_data={ 41 | "rhasspy3": ["VERSION", "py.typed"], 42 | }, 43 | install_requires=requirements, 44 | classifiers=[ 45 | "Development Status :: 3 - Alpha", 46 | "Intended Audience :: Developers", 47 | "Topic :: Text Processing :: Linguistic", 48 | "License :: OSI Approved :: License :: OSI Approved :: MIT License", 49 | "Programming Language :: Python :: 3.7", 50 | "Programming Language :: Python :: 3.8", 51 | "Programming Language :: Python :: 3.9", 52 | "Programming Language :: Python :: 3.10", 53 | ], 54 | keywords="voice assistant rhasspy", 55 | ) 56 | -------------------------------------------------------------------------------- /tests/test_dataclasses_json.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Optional 3 | 4 | from rhasspy3.util.dataclasses_json import DataClassJsonMixin 5 | 6 | 7 | @dataclass 8 | class Class1(DataClassJsonMixin): 9 | name: str 10 | 11 | 12 | @dataclass 13 | class Class2(DataClassJsonMixin): 14 | name: str 15 | obj1: Class1 16 | list1: List[Class1] 17 | dict1: Dict[str, Class1] 18 | opt1: Optional[Class1] 19 | 20 | 21 | _DICT = { 22 | "name": "2", 23 | "obj1": {"name": "1"}, 24 | "list1": [{"name": "1-2"}], 25 | "dict1": {"key": {"name": "1-3"}}, 26 | "opt1": {"name": "1-4"}, 27 | } 28 | _OBJ = Class2( 29 | name="2", 30 | obj1=Class1(name="1"), 31 | list1=[Class1(name="1-2")], 32 | dict1={"key": Class1(name="1-3")}, 33 | opt1=Class1(name="1-4"), 34 | ) 35 | 36 | 37 | def test_to_dict(): 38 | assert _OBJ.to_dict() == _DICT 39 | 40 | 41 | def test_from_dict(): 42 | assert Class2.from_dict(_DICT) == _OBJ 43 | -------------------------------------------------------------------------------- /tests/test_jaml.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | from rhasspy3.util.jaml import safe_load 4 | 5 | YAML = """ 6 | # Line comment 7 | outer_a: # Inline comment 8 | name: outer_a 9 | prop_int: 1 10 | prop_float: 1.23 11 | prop_bool: true 12 | prop_bool2: false 13 | prop_str_noquotes: hello: world 14 | prop_str_1quotes: 'hello: world' 15 | prop_str_2quotes: "hello: world" 16 | prop_str_literal: | 17 | hello: 18 | world 19 | inner_a: 20 | name: inner_a 21 | empty_string: "" 22 | string_with_hash: "#test" 23 | 24 | outer_b: 25 | name: inner_b 26 | """ 27 | 28 | 29 | def test_safe_load(): 30 | with io.StringIO(YAML) as yaml: 31 | assert safe_load(yaml) == { 32 | "outer_a": { 33 | "name": "outer_a", 34 | "prop_int": 1, 35 | "prop_float": 1.23, 36 | "prop_bool": True, 37 | "prop_bool2": False, 38 | "prop_str_noquotes": "hello: world", 39 | "prop_str_1quotes": "hello: world", 40 | "prop_str_2quotes": "hello: world", 41 | "prop_str_literal": "hello:\nworld", 42 | "inner_a": {"name": "inner_a"}, 43 | "empty_string": "", 44 | "string_with_hash": "#test", 45 | }, 46 | "outer_b": {"name": "inner_b"}, 47 | } 48 | -------------------------------------------------------------------------------- /tools/websocket-client/bin/websocket_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import asyncio 4 | import wave 5 | from urllib.parse import urlencode, urlparse, parse_qsl, urlunparse 6 | 7 | from websockets import connect 8 | 9 | 10 | async def main() -> None: 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("url") 13 | parser.add_argument("wav_file", nargs="+", help="Path(s) to WAV file(s)") 14 | parser.add_argument("--samples-per-chunk", type=int, default=1024) 15 | args = parser.parse_args() 16 | 17 | for wav_path in args.wav_file: 18 | wav_file: wave.Wave_read = wave.open(wav_path, "rb") 19 | with wav_file: 20 | # Add audio parameters if missing 21 | parse_result = urlparse(args.url) 22 | query = dict(parse_qsl(parse_result.query)) 23 | query.setdefault("rate", str(wav_file.getframerate())) 24 | query.setdefault("width", str(wav_file.getsampwidth())) 25 | query.setdefault("channels", str(wav_file.getnchannels())) 26 | 27 | url = urlunparse(parse_result._replace(query=urlencode(query))) 28 | async with connect(url) as websocket: 29 | chunk = wav_file.readframes(args.samples_per_chunk) 30 | while chunk: 31 | await websocket.send(chunk) 32 | chunk = wav_file.readframes(args.samples_per_chunk) 33 | 34 | # Signal stop with empty message 35 | await websocket.send(bytes()) 36 | result = await websocket.recv() 37 | print(result) 38 | 39 | 40 | if __name__ == "__main__": 41 | asyncio.run(main()) 42 | -------------------------------------------------------------------------------- /tools/websocket-client/requirements.txt: -------------------------------------------------------------------------------- 1 | websockets 2 | -------------------------------------------------------------------------------- /tools/websocket-client/script/run: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | if [ -d "${venv}" ]; then 14 | source "${venv}/bin/activate" 15 | fi 16 | 17 | export PATH="${base_dir}/bin:${PATH}" 18 | 19 | python3 "${base_dir}/bin/websocket_client.py" "$@" 20 | -------------------------------------------------------------------------------- /tools/websocket-client/script/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | 7 | # Base directory of repo 8 | base_dir="$(realpath "${this_dir}/..")" 9 | 10 | # Path to virtual environment 11 | : "${venv:=${base_dir}/.venv}" 12 | 13 | # Python binary to use 14 | : "${PYTHON=python3}" 15 | 16 | python_version="$(${PYTHON} --version)" 17 | 18 | # Create virtual environment 19 | echo "Creating virtual environment at ${venv} (${python_version})" 20 | rm -rf "${venv}" 21 | "${PYTHON}" -m venv "${venv}" 22 | source "${venv}/bin/activate" 23 | 24 | # Install Python dependencies 25 | echo 'Installing Python dependencies' 26 | pip3 install --upgrade pip 27 | pip3 install --upgrade wheel setuptools 28 | 29 | pip3 install -r "${base_dir}/requirements.txt" 30 | 31 | # ----------------------------------------------------------------------------- 32 | 33 | echo "OK" 34 | --------------------------------------------------------------------------------