├── .dockerignore ├── .gitignore ├── .isort.cfg ├── .projectile ├── .python-version ├── AUTHORS ├── CHANGELOG ├── Dockerfile ├── Dockerfile.debian ├── Dockerfile.test.debian ├── LICENSE ├── Makefile.in ├── PKG-INFO ├── README.md ├── TODO.md ├── VERSION ├── __main__.py ├── aclocal.m4 ├── architecture.sh ├── bin ├── download-files.sh ├── verify-profile.py └── voice2json ├── bootstrap.sh ├── config.guess ├── config.sub ├── configure ├── configure.ac ├── debian ├── control.in ├── sox └── voice2json ├── docker ├── Dockerfile.pyinstaller ├── multiarch_build │ ├── Dockerfile.build │ ├── Dockerfile.build.amd64 │ ├── Dockerfile.build.arm64 │ ├── Dockerfile.build.armv6 │ ├── Dockerfile.build.armv7 │ ├── Dockerfile.debian │ ├── Dockerfile.debian.amd64 │ ├── Dockerfile.debian.arm64 │ ├── Dockerfile.debian.armv6 │ ├── Dockerfile.debian.armv7 │ ├── Dockerfile.pyinstaller │ ├── Dockerfile.run │ ├── Dockerfile.run.amd64 │ ├── Dockerfile.run.arm64 │ ├── Dockerfile.run.armv6 │ ├── Dockerfile.run.armv7 │ ├── Makefile │ ├── bin │ │ └── sudo │ └── requirements.txt ├── preprocess.sh └── voice2json ├── docs ├── CNAME ├── _config.yml ├── about.md ├── commands.md ├── data │ ├── LightState.fst │ ├── LightState.gram │ └── intent.fst ├── formats.md ├── img │ ├── microphone.png │ ├── mike-head.png │ ├── output_21_0.svg │ ├── output_24_0.svg │ ├── output_27_0.svg │ ├── overview-1.svg │ ├── overview-2.svg │ ├── overview-3.svg │ ├── overview-4.svg │ ├── overview-5.svg │ ├── overview-6.svg │ ├── overview-7.svg │ ├── overview-8.svg │ ├── overview-9.svg │ ├── rhasspy.svg │ ├── terminal.svg │ ├── training.svg │ ├── v2_architecture.svg │ ├── v2_sentences.svg │ ├── voice2json-inverted.png │ ├── voice2json-inverted.svg │ ├── voice2json.svg │ └── whitepaper │ │ ├── acoustic-model.svg │ │ ├── audio-to-json.svg │ │ ├── core-components.svg │ │ ├── fsticuffs-recognize.svg │ │ ├── grapheme-to-phoneme.svg │ │ ├── intent-graph.svg │ │ ├── language-model-mixing.svg │ │ ├── language-model.svg │ │ ├── pronunciation-dictionary.svg │ │ ├── sentences-and-training.svg │ │ ├── sentences-to-graph.svg │ │ └── speech-recognizer.svg ├── index.md ├── install.md ├── profiles.md ├── recipes.md ├── requirements.txt ├── sentences.md └── whitepaper.md ├── etc ├── bin │ └── voice2json ├── kaldi-src-configure.patch ├── linux_atlas_aarch64.mk ├── precise │ ├── athena.pb │ ├── athena.pb.params │ ├── athena.pbtxt │ ├── christopher-precise.pb │ ├── christopher-precise.pb.params │ ├── computer-en.pb │ ├── hey-mycroft-2.pb │ ├── hey-mycroft-2.pb.params │ ├── marvin.pb │ ├── marvin.pb.params │ ├── sheila-en.params │ └── sheila-en.pb ├── profile.defaults.yml ├── profiles │ ├── ca-es_pocketsphinx-cmu.yml │ ├── cs-cz_kaldi-rhasspy.yml │ ├── de_deepspeech-aashishag.yml │ ├── de_deepspeech-jaco.yml │ ├── de_kaldi-zamia.yml │ ├── de_pocketsphinx-cmu.yml │ ├── el-gr_pocketsphinx-cmu.yml │ ├── en-in_pocketsphinx-cmu.yml │ ├── en-us_deepspeech-mozilla.yml │ ├── en-us_kaldi-rhasspy.yml │ ├── en-us_kaldi-zamia.yml │ ├── en-us_pocketsphinx-cmu.yml │ ├── es-mexican_pocketsphinx-cmu.yml │ ├── es_deepspeech-jaco.yml │ ├── es_kaldi-rhasspy.yml │ ├── es_pocketsphinx-cmu.yml │ ├── fr_deepspeech-jaco.yml │ ├── fr_kaldi-guyot.yml │ ├── fr_kaldi-rhasspy.yml │ ├── fr_pocketsphinx-cmu.yml │ ├── hi_pocketsphinx-cmu.yml │ ├── it_deepspeech-jaco.yml │ ├── it_deepspeech-mozillaitalia.yml │ ├── it_kaldi-rhasspy.yml │ ├── it_pocketsphinx-cmu.yml │ ├── ko-kr_kaldi-montreal.yml │ ├── kz_pocketsphinx-cmu.yml │ ├── nl_kaldi-cgn.yml │ ├── nl_kaldi-rhasspy.yml │ ├── nl_pocketsphinx-cmu.yml │ ├── pl_deepspeech-jaco.yml │ ├── pl_julius-github.yml │ ├── pt-br_pocketsphinx-cmu.yml │ ├── ru_kaldi-rhasspy.yml │ ├── ru_pocketsphinx-cmu.yml │ ├── sv_kaldi-montreal.yml │ ├── sv_kaldi-rhasspy.yml │ ├── vi_kaldi-montreal.yml │ └── zh-cn_pocketsphinx-cmu.yml ├── shflags └── test │ ├── hey_mycroft.wav │ ├── turn_on_living_room_lamp.wav │ ├── what_time_is_it.wav │ └── would_you_please_turn_on_living_room_lamp.wav ├── install-sh ├── m4 └── python.m4 ├── missing ├── mkdocs.yml ├── mypy.ini ├── pylintrc ├── recipes ├── fluent_dataset │ ├── Fluent Speech Commands Public License.pdf │ ├── Makefile │ ├── README.md │ ├── results │ │ ├── report.json.gz │ │ ├── test_truth.jsonl │ │ └── test_truth.txt │ ├── sentences.ini │ └── test_files.txt ├── launch_program │ ├── README.md │ ├── beep_hi.wav │ ├── beep_lo.wav │ ├── custom_words.kaldi.txt │ ├── custom_words.pocketsphinx.txt │ ├── listen_and_launch.sh │ ├── sentences.ini │ ├── slots │ │ └── program │ └── test │ │ └── launch_firefox.wav ├── parallel_recognition │ ├── recognize_parallel.sh │ └── wav-file-names.txt ├── timers │ ├── README.md │ ├── alarm.wav │ ├── beep_hi.wav │ ├── beep_lo.wav │ ├── do_timer.py │ ├── listen_timer.sh │ └── sentences.ini └── train_rasa │ ├── README.md │ ├── config.yml │ ├── examples_to_rasa.py │ ├── rasa │ ├── recognize.sh │ ├── sentences.ini │ └── train.sh ├── requirements.txt ├── requirements_dev.txt ├── scripts ├── build-debian.sh ├── build-docker.sh ├── build-docs.sh ├── build │ ├── build-julius.sh │ ├── build-kaldi.sh │ ├── build-kenlm.sh │ ├── build-opengrm.sh │ └── build-phonetisaurus.sh ├── check-code.sh ├── create-venv.sh ├── format-code.sh ├── install │ ├── install-deepspeech.sh │ ├── install-julius.sh │ ├── install-kaldi.sh │ ├── install-kenlm.sh │ ├── install-opengrm.sh │ ├── install-phonetisaurus.sh │ └── install-precise.sh ├── test.sh └── test │ ├── test-all.sh │ ├── test-debian.sh │ ├── test-docker.sh │ ├── test-open-transcription.sh │ ├── test-print-profile.sh │ ├── test-print-version.sh │ ├── test-pronounce-word.sh │ ├── test-recognize-intent.sh │ ├── test-transcribe-wav.sh │ └── test-wait-wake.sh ├── setup.cfg ├── setup.py.in ├── tests └── test.py ├── voice2json.sh.in ├── voice2json.spec.in └── voice2json ├── __init__.py ├── __main__.py ├── core.py ├── generate.py ├── julius.py ├── pronounce.py ├── py.typed ├── recognize.py ├── record.py ├── sounds_like.py ├── speak.py ├── test.py ├── train.py ├── transcribe.py ├── utils.py └── wake.py /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !aclocal.m4 3 | !configure 4 | !config.sub 5 | !config.guess 6 | !install-sh 7 | !missing 8 | !m4/ 9 | !Makefile.in 10 | !setup.py.in 11 | !voice2json.sh.in 12 | 13 | !bin/ 14 | !etc/ 15 | !download/ 16 | !dist/ 17 | !requirements.txt 18 | !scripts/ 19 | !voice2json/*.py 20 | !VERSION 21 | !README.md 22 | !LICENSE 23 | !site/ 24 | 25 | !debian/sox 26 | !debian/control.in 27 | !voice2json.spec.in 28 | !debian/voice2json 29 | !__main__.py -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.venv/ 2 | __pycache__/ 3 | .mypy_cache/ 4 | /*.egg-info/ 5 | .doit.db 6 | 7 | /download/ 8 | 9 | /recipes/train_rasa/data/ 10 | /recipes/train_rasa/models/ 11 | /recipes/fluent_dataset/wavs/ 12 | /recipes/fluent_dataset/results/intents.jsonl 13 | /recipes/fluent_dataset/results/transcriptions.jsonl 14 | /site/ 15 | /build/ 16 | /dist/ 17 | /log/ 18 | .ipynb_checkpoints/ 19 | 20 | /setup.py 21 | /Makefile 22 | /voice2json.sh 23 | /voice2json.spec 24 | 25 | /autom4te.cache/ 26 | /config.log 27 | /config.status 28 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output=3 3 | include_trailing_comma=True 4 | force_grid_wrap=0 5 | use_parentheses=True 6 | line_length=88 7 | -------------------------------------------------------------------------------- /.projectile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/.projectile -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.7.7 2 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Michael Hansen 2 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | ## [2.1] - 3 Jun 2021 2 | 3 | ### Added 4 | 5 | * download-profile command and auto-download of relevant language files from Github 6 | * New Kaldi profiles for Czech, English, Spanish, French, Dutch, Russian, and Swedish 7 | * New DeepSpeech profiles for German, French, and Italian 8 | 9 | ### Changed 10 | 11 | * Upgrade DeepSpeech support to v0.9 12 | * --profile argument can now be a language name, profile name, or directory 13 | 14 | ## [2.0] - 7 May 2020 15 | 16 | ### Added 17 | 18 | * Support for Mozilla's DeepSpeech speech to text engine (version 0.6.1). 19 | * transcribe-stream for live transcription 20 | * Number ranges, slot programs, and converters to template language. 21 | * --intent-filter for recognize-intent command. 22 | * --certfile and --keyfile arguments for SSL support. 23 | * print-downloads command (download only necessary files). 24 | * print-files command (for backups). 25 | * print-version command (or --version). 26 | 27 | ### Changed 28 | 29 | * wait-wake command uses Mycroft Precise instead of porcupine. 30 | * speak-sentence command takes --marytts flag instead of --espeak. Defaults to eSpeak. 31 | * Using autoconf for source install. 32 | * Minimum required Python version is 3.7 33 | 34 | ### Removed 35 | 36 | * MaryTTS server is no longer embedded in Docker image. Must be hosted externally. 37 | 38 | ## [1.0] - 20 November 2019 39 | 40 | * First version -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:buster as build 2 | 3 | ENV LANG C.UTF-8 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN --mount=type=cache,id=apt-build,target=/var/apt/cache \ 7 | apt-get update && \ 8 | apt-get install --no-install-recommends --yes \ 9 | python3 python3-dev python3-setuptools python3-pip python3-venv \ 10 | build-essential swig libatlas-base-dev portaudio19-dev \ 11 | curl ca-certificates 12 | 13 | ENV APP_DIR=/usr/lib/voice2json 14 | ENV BUILD_DIR=/build 15 | 16 | # Directory of prebuilt tools 17 | ARG TARGETARCH 18 | ARG TARGETVARIANT 19 | COPY download/shared ${BUILD_DIR}/download/ 20 | COPY download/${TARGETARCH}${TARGETVARIANT}/ ${BUILD_DIR}/download/ 21 | 22 | COPY m4/ ${BUILD_DIR}/m4/ 23 | COPY configure config.sub config.guess \ 24 | install-sh missing aclocal.m4 \ 25 | Makefile.in setup.py.in voice2json.sh.in voice2json.spec.in \ 26 | requirements.txt \ 27 | ${BUILD_DIR}/ 28 | 29 | RUN cd ${BUILD_DIR} && \ 30 | ./configure --enable-in-place --prefix=${APP_DIR}/.venv 31 | 32 | COPY scripts/install/ ${BUILD_DIR}/scripts/install/ 33 | 34 | RUN --mount=type=cache,id=pip-build,target=/root/.cache/pip \ 35 | cd ${BUILD_DIR} && \ 36 | make && \ 37 | make install 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | FROM debian:buster as run 42 | 43 | ENV LANG C.UTF-8 44 | 45 | RUN --mount=type=cache,id=apt-run,target=/var/apt/cache \ 46 | apt-get update && \ 47 | apt-get install --yes --no-install-recommends \ 48 | python3 \ 49 | libportaudio2 libatlas3-base libgfortran4 \ 50 | ca-certificates \ 51 | perl sox alsa-utils espeak-ng jq 52 | 53 | ENV APP_DIR=/usr/lib/voice2json 54 | COPY --from=build ${APP_DIR}/ ${APP_DIR}/ 55 | COPY --from=build /build/voice2json.sh ${APP_DIR}/ 56 | 57 | COPY etc/profile.defaults.yml ${APP_DIR}/etc/ 58 | COPY etc/profiles/ ${APP_DIR}/etc/profiles/ 59 | COPY etc/precise/ ${APP_DIR}/etc/precise/ 60 | COPY site/ ${APP_DIR}/site/ 61 | COPY bin/voice2json ${APP_DIR}/bin/ 62 | 63 | COPY VERSION ${APP_DIR}/ 64 | COPY voice2json/ ${APP_DIR}/voice2json/ 65 | 66 | ENTRYPOINT ["bash", "/usr/lib/voice2json/voice2json.sh"] 67 | -------------------------------------------------------------------------------- /Dockerfile.debian: -------------------------------------------------------------------------------- 1 | FROM debian:buster as base 2 | 3 | ENV LANG C.UTF-8 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN --mount=type=cache,id=apt-base,target=/var/cache/apt \ 7 | apt-get update && \ 8 | apt-get install --no-install-recommends --yes \ 9 | build-essential \ 10 | python3 python3-dev python3-pip python3-setuptools python3-venv \ 11 | swig portaudio19-dev libatlas-base-dev \ 12 | fakeroot curl 13 | 14 | ENV NUM_JOBS=8 15 | 16 | ENV BUILD_DIR=/build 17 | ENV APP_DIR=${BUILD_DIR}/voice2json/usr/lib/voice2json 18 | 19 | # ----------------------------------------------------------------------------- 20 | # Python 3.7 21 | # ----------------------------------------------------------------------------- 22 | 23 | FROM base as python 24 | 25 | RUN --mount=type=cache,id=apt-python,target=/var/cache/apt \ 26 | apt-get update && \ 27 | apt-get install --yes --no-install-recommends \ 28 | git zlib1g-dev patchelf rsync \ 29 | libncursesw5-dev libreadline-gplv2-dev libssl-dev \ 30 | libgdbm-dev libc6-dev libsqlite3-dev libbz2-dev libffi-dev 31 | 32 | ADD download/source/Python-3.7.10.tar.xz /build 33 | 34 | RUN cd /build/Python-3.7.10 && \ 35 | ./configure && \ 36 | make -j ${NUM_JOBS} && \ 37 | make install DESTDIR=${APP_DIR} 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | FROM base as build 42 | 43 | COPY --from=python ${APP_DIR}/ ${APP_DIR}/ 44 | COPY --from=python ${APP_DIR}/usr/local/include/python3.7m/ /usr/include/ 45 | ENV PYTHON=${APP_DIR}/usr/local/bin/python3 46 | 47 | # Directory of prebuilt tools 48 | ARG TARGETARCH 49 | ARG TARGETVARIANT 50 | COPY download/shared/ ${BUILD_DIR}/download/ 51 | COPY download/${TARGETARCH}${TARGETVARIANT}/ ${BUILD_DIR}/download/ 52 | 53 | COPY m4/ ${BUILD_DIR}/m4/ 54 | COPY configure config.sub config.guess \ 55 | install-sh missing aclocal.m4 \ 56 | VERSION Makefile.in setup.py.in voice2json.sh.in voice2json.spec.in \ 57 | requirements.txt \ 58 | ${BUILD_DIR}/ 59 | 60 | RUN cd ${BUILD_DIR} && \ 61 | ./configure --prefix=${APP_DIR} --disable-virtualenv 62 | 63 | COPY scripts/install/ ${BUILD_DIR}/scripts/install/ 64 | 65 | COPY etc/profile.defaults.yml ${BUILD_DIR}/etc/ 66 | COPY etc/profiles/ ${BUILD_DIR}/etc/profiles/ 67 | COPY etc/precise/ ${BUILD_DIR}/etc/precise/ 68 | COPY site/ ${BUILD_DIR}/site/ 69 | 70 | COPY README.md LICENSE ${BUILD_DIR}/ 71 | COPY voice2json/ ${BUILD_DIR}/voice2json/ 72 | 73 | RUN --mount=type=cache,id=pip-build,target=/root/.cache/pip \ 74 | cd ${BUILD_DIR} && \ 75 | make && \ 76 | make install 77 | 78 | COPY __main__.py ${BUILD_DIR}/ 79 | 80 | COPY debian/control.in / 81 | 82 | # Write shared variables to /.vars 83 | RUN export DEBIAN_ARCH="$(dpkg-architecture | grep DEB_BUILD_ARCH= | sed -e 's/[^=]\\+=//')" && \ 84 | export VERSION="$(cat ${BUILD_DIR}/VERSION)" && \ 85 | mkdir -p ${BUILD_DIR}/voice2json/DEBIAN && \ 86 | sed -e s"/@VERSION@/${VERSION}/" -e "s/@DEBIAN_ARCH@/${DEBIAN_ARCH}/" < /control.in > ${BUILD_DIR}/voice2json/DEBIAN/control 87 | 88 | COPY VERSION ${BUILD_DIR}/voice2json/usr/lib/voice2json/ 89 | COPY debian/voice2json ${BUILD_DIR}/voice2json/usr/bin/ 90 | 91 | # Fix precise-engine link 92 | RUN cd ${APP_DIR}/bin && \ 93 | ln -sf ../lib/precise/precise-engine 94 | 95 | RUN cd ${BUILD_DIR} && \ 96 | dpkg --build voice2json 97 | 98 | RUN cd ${BUILD_DIR} && \ 99 | dpkg-name *.deb 100 | 101 | # ----------------------------------------------------------------------------- 102 | 103 | FROM scratch 104 | 105 | COPY --from=build /build/*.deb / 106 | -------------------------------------------------------------------------------- /Dockerfile.test.debian: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan as base 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | python3 dpkg-dev \ 8 | libportaudio2 libatlas3-base libgfortran4 \ 9 | ca-certificates \ 10 | perl sox alsa-utils espeak jq 11 | 12 | COPY dist/ /dist/ 13 | COPY VERSION / 14 | 15 | RUN export DEBIAN_ARCH="$(dpkg-architecture | grep DEB_BUILD_ARCH= | sed -e 's/[^=]\+=//')" && \ 16 | export VERSION="$(cat ${BUILD_DIR}/VERSION)" && \ 17 | cd /dist && \ 18 | apt install ./voice2json_${VERSION}_${DEBIAN_ARCH}.deb 19 | 20 | ENTRYPOINT ["voice2json"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2021 Michael Hansen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.2 2 | Name: voice2json 3 | Version: 0.2.0 4 | Author-email: mike@rhasspy.org 5 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | * Add slot_programs to all profiles 4 | * Finish files.yml for all profiles 5 | * stop words for recognize 6 | * intent whitelist 7 | * flatpak (deprecate Debian) 8 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 2.1.0 -------------------------------------------------------------------------------- /__main__.py: -------------------------------------------------------------------------------- 1 | """PyInstaller entry point""" 2 | import asyncio 3 | 4 | from voice2json.__main__ import main 5 | 6 | asyncio.run(main()) 7 | -------------------------------------------------------------------------------- /architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script tries to guess your system's architecture. 4 | # Rhasspy expects one of the following architecture names: 5 | # - amd64 (Intel/AMD) 6 | # - armv7 (Raspberry Pi 2-3 B) 7 | # - arm64 (Raspberry Pi 3B+, 4) 8 | # - armv6 (Raspberry Pi 1, Zero, Zero W) 9 | 10 | if [[ ! -z "$1" ]]; then 11 | cpu_arch="$1" 12 | else 13 | cpu_arch="$(uname -m)" 14 | fi 15 | 16 | # Try lookup table first 17 | declare -A known_archs 18 | known_archs=(['x86_64']='amd64' ['arm32v7']='armv7' ['armv7l']='armv7' ['arm64v8']='arm64' ['aarch64']='arm64' ['armv6l']='armv6' ['arm32v6']='armv6') 19 | 20 | guess_arch="${known_archs[${cpu_arch}]}" 21 | if [[ -z "${guess_arch}" ]]; then 22 | # Try using Debian command 23 | if [[ -n "$(command -v dpkg-architecture)" ]]; then 24 | guess_arch="$(dpkg-architecture | grep DEB_BUILD_ARCH= | sed 's/[^=]\+=//')" 25 | fi 26 | 27 | if [[ -z "${guess_arch}" ]]; then 28 | # Fall back to CPU architecture 29 | cpu_arch="${guess_arch}" 30 | fi 31 | fi 32 | 33 | echo "${guess_arch}" 34 | -------------------------------------------------------------------------------- /bin/download-files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # ----------------------------------------------------------------------------- 4 | # Helper script to download profile files from voice2json's print-downloads 5 | # command. Requires curl. 6 | # 7 | # Use --dry-run to see commands that would be executed. 8 | # ----------------------------------------------------------------------------- 9 | 10 | dry_run='' 11 | 12 | if [[ "$1" = '--help' ]]; then 13 | # Print help message 14 | echo 'Usage: download-files.sh [--dry-run]' 15 | exit 0 16 | fi 17 | 18 | if [[ "$1" = '--dry-run' ]]; then 19 | dry_run='yes' 20 | fi 21 | 22 | # ----------------------------------------------------------------------------- 23 | 24 | # Read output of `voice2json print-downloads --only-missing` 25 | while read -r json; do 26 | # Source URL 27 | url="$(echo "${json}" | jq --raw-output .url)" 28 | 29 | # Destination directory and file path 30 | profile_dir="$(echo "${json}" | jq --raw-output '.["profile-directory"]')" 31 | dest_file="$(echo "${json}" | jq --raw-output .file)" 32 | dest_file="${profile_dir}/${dest_file}" 33 | 34 | # Directory of destination file 35 | dest_dir="$(dirname "${dest_file}")" 36 | 37 | echo "${url} => ${dest_file}" 38 | 39 | if [[ -z "${dry_run}" ]]; then 40 | # Create destination directory and download file 41 | mkdir -p "${dest_dir}" 42 | curl -sSfL -o "${dest_file}" "${url}" 43 | else 44 | # Dry run 45 | echo mkdir -p "${dest_dir}" 46 | echo curl -sSfL -o "${dest_file}" "${url}" 47 | fi 48 | done 49 | -------------------------------------------------------------------------------- /bin/verify-profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import collections 4 | import logging 5 | import platform 6 | import subprocess 7 | from pathlib import Path 8 | 9 | import yaml 10 | 11 | _LOGGER = logging.getLogger("verify_profile") 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser(prog="verify-profile.py") 16 | parser.add_argument( 17 | "profile_yml", help="Path to profile YAML file with file details" 18 | ) 19 | parser.add_argument("--profile-name", help="Override profile name from file") 20 | parser.add_argument("--url-format", help="Change url download format") 21 | parser.add_argument( 22 | "--machine", default=platform.machine(), help="Override platform.machine" 23 | ) 24 | parser.add_argument( 25 | "--debug", action="store_true", help="Print DEBUG messages to console" 26 | ) 27 | args = parser.parse_args() 28 | 29 | if args.debug: 30 | logging.basicConfig(level=logging.DEBUG) 31 | else: 32 | logging.basicConfig(level=logging.INFO) 33 | 34 | _LOGGER.debug(args) 35 | 36 | args.profile_yml = Path(args.profile_yml) 37 | 38 | if args.profile_name is None: 39 | args.profile_name = args.profile_yml.stem 40 | 41 | with open(args.profile_yml, "r") as profile_file: 42 | files_dict = yaml.safe_load(profile_file) 43 | 44 | url_format = files_dict["url_format"] 45 | 46 | if args.url_format: 47 | url_format = args.url_format 48 | 49 | for condition, files in files_dict.items(): 50 | if not isinstance(files, collections.abc.Mapping): 51 | continue 52 | 53 | for file_path, file_info in files.items(): 54 | try: 55 | url = url_format.format( 56 | profile=args.profile_name, file=file_path, machine=args.machine 57 | ) 58 | expected_size = int(file_info["bytes"]) 59 | headers = ( 60 | subprocess.check_output( 61 | ["curl", "--silent", "--location", "--head", url] 62 | ) 63 | .decode() 64 | .splitlines() 65 | ) 66 | actual_size = None 67 | for header in headers: 68 | header = header.strip() 69 | if header: 70 | if header.startswith("HTTP"): 71 | assert header.split()[-1] in ( 72 | "200", 73 | "302", 74 | ), f"{url} {header}" 75 | continue 76 | 77 | header_name, header_value = header.split(":", maxsplit=1) 78 | header_name = header_name.strip().lower() 79 | 80 | if header_name == "content-length": 81 | actual_size = int(header_value) 82 | 83 | if actual_size is None: 84 | _LOGGER.error("%s (no size)", url) 85 | continue 86 | 87 | if expected_size != actual_size: 88 | _LOGGER.error( 89 | "%s (wrong size, expected %s, got %s)", 90 | file_path, 91 | expected_size, 92 | actual_size, 93 | ) 94 | continue 95 | 96 | _LOGGER.debug("%s %s %s %s", file_path, url, expected_size, actual_size) 97 | except Exception as e: 98 | _LOGGER.exception("%s %s", file_path, file_info) 99 | raise e 100 | 101 | 102 | # ----------------------------------------------------------------------------- 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /bin/voice2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 3 | src_dir="$(realpath "${this_dir}/..")" 4 | 5 | export voice2json_dir="${src_dir}" 6 | 7 | : "${PYTHON=python3}" 8 | 9 | venv="${src_dir}/.venv" 10 | 11 | if [[ -d "${venv}" ]]; then 12 | # Use virtual environment 13 | source "${venv}/bin/activate" 14 | PYTHON="${venv}/bin/python" 15 | fi 16 | 17 | export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}" 18 | export PATH="${venv}/bin:${PATH}" 19 | export KALDI_DIR="${venv}/lib/kaldi" 20 | 21 | export PYTHONPATH="${src_dir}:${PYTHONPATH}" 22 | "${PYTHON}" -m voice2json "$@" 23 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | autoreconf -fvi 4 | -------------------------------------------------------------------------------- /debian/control.in: -------------------------------------------------------------------------------- 1 | Package: voice2json 2 | Version: @VERSION@ 3 | Section: utils 4 | Priority: optional 5 | Depends: sox,alsa-utils,espeak-ng,jq,libstdc++6,perl,libportaudio2,libatlas3-base 6 | Architecture: @DEBIAN_ARCH@ 7 | Maintainer: Michael Hansen 8 | Description: Command-line tools for offline speech/intent recognition 9 | -------------------------------------------------------------------------------- /debian/sox: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Avoid use of voice2json libs when calling external sox 4 | LD_LIBRARY_PATH="${OLD_LD_LIBRARY_PATH}" \ 5 | PATH="${OLD_PATH}" \ 6 | sox "$@" 7 | -------------------------------------------------------------------------------- /debian/voice2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | APPDIR='/usr/lib/voice2json' 3 | 4 | export PATH="${APPDIR}/bin:${APPDIR}/usr/local/bin:${PATH}" 5 | export LD_LIBRARY_PATH="${APPDIR}/lib:${APPDIR}/usr/local/lib:${LD_LIBRARY_PATH}" 6 | 7 | export voice2json_dir="${APPDIR}/share/voice2json" 8 | export KALDI_DIR="${APPDIR}/lib/kaldi" 9 | 10 | export PYTHONPATH="${voice2json_dir}:${PYTHONPATH}" 11 | 12 | "${APPDIR}/usr/local/bin/python3" -m voice2json "$@" 13 | -------------------------------------------------------------------------------- /docker/Dockerfile.pyinstaller: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan as build 2 | ARG TARGETPLATFORM 3 | ARG TARGETARCH 4 | ARG TARGETVARIANT 5 | 6 | ENV LANG C.UTF-8 7 | 8 | ENV LANG C.UTF-8 9 | 10 | RUN apt-get update && \ 11 | apt-get install --yes --no-install-recommends \ 12 | python3 python3-dev python3-pip \ 13 | build-essential \ 14 | autoconf automake libtool \ 15 | wget 16 | 17 | # ----------------------------------------------------------------------------- 18 | 19 | RUN cd / && \ 20 | wget 'https://github.com/pyinstaller/pyinstaller/releases/download/v3.6/PyInstaller-3.6.tar.gz' && \ 21 | tar -xf /PyInstaller-3.6.tar.gz 22 | 23 | RUN cd /PyInstaller-3.6/bootloader && \ 24 | python3 ./waf all 25 | 26 | RUN cd /PyInstaller-3.6 && \ 27 | python3 -m pip install -e . -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.build: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan as ubuntu-base 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | build-essential \ 8 | python3 python3-dev python3-pip python3-setuptools python3-venv \ 9 | swig portaudio19-dev libatlas-base-dev \ 10 | fakeroot 11 | 12 | FROM ubuntu-base as base-amd64 13 | 14 | FROM ubuntu-base as base-armv7 15 | 16 | FROM ubuntu-base as base-arm64 17 | 18 | ARG TARGETARCH 19 | ARG TARGETVARIANT 20 | FROM base-$TARGETARCH$TARGETVARIANT -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.build.amd64: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | build-essential \ 8 | python3 python3-dev python3-pip python3-setuptools python3-venv \ 9 | swig portaudio19-dev libatlas-base-dev \ 10 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.build.arm64: -------------------------------------------------------------------------------- 1 | FROM balenalib/aarch64-debian-python:3.7.6-buster-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.build.armv6: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv6-debian-python:3.7.6-buster-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.build.armv7: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv7hf-debian-python:3.7.6-buster-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.debian: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan as base 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | build-essential \ 8 | python3 python3-dev python3-pip python3-setuptools python3-venv \ 9 | swig portaudio19-dev libatlas-base-dev \ 10 | fakeroot 11 | 12 | RUN pip3 install pyinstaller 13 | 14 | FROM base as base-amd64 15 | 16 | FROM base as base-armv7 17 | 18 | FROM base as base-arm64 19 | 20 | ARG TARGETARCH 21 | ARG TARGETVARIANT 22 | FROM base-$TARGETARCH$TARGETVARIANT 23 | 24 | ENV APP_DIR=/usr/lib/voice2json 25 | ENV BUILD_DIR=/build 26 | 27 | COPY VERSION ${BUILD_DIR}/ 28 | 29 | RUN export DEBIAN_ARCH="$(dpkg-architecture | grep DEB_BUILD_ARCH= | sed -e 's/[^=]\+=//')" && \ 30 | export VERSION="$(cat ${BUILD_DIR}/VERSION)" && \ 31 | export PACKAGE_NAME=voice2json_${VERSION}_${DEBIAN_ARCH} && \ 32 | export DEBIAN_DIR=${BUILD_DIR}/${PACKAGE_NAME} 33 | 34 | RUN mkdir -p ${DEBIAN_DIR}/DEBIAN 35 | COPY debian/control.in / 36 | 37 | RUN sed -e "s/@DEBIAN_ARCH@/${DEBIAN_ARCH}/" < /control.in > ${DEBIAN_DIR}/DEBIAN/control 38 | 39 | # Directory of prebuilt tools 40 | COPY download/ ${BUILD_DIR}/download/ 41 | 42 | # Cache pip downloads 43 | COPY requirements.txt ${BUILD_DIR}/ 44 | RUN pip3 download --dest /pipcache pip wheel setuptools 45 | RUN pip3 download --dest /pipcache -r ${BUILD_DIR}/requirements.txt 46 | 47 | COPY m4/ ${BUILD_DIR}/m4/ 48 | COPY configure config.sub config.guess \ 49 | install-sh missing aclocal.m4 \ 50 | Makefile.in setup.py.in voice2json.sh.in ${BUILD_DIR}/ 51 | 52 | RUN cd ${BUILD_DIR} && \ 53 | ./configure --prefix=${APP_DIR} 54 | 55 | COPY scripts/install/ ${BUILD_DIR}/scripts/install/ 56 | 57 | COPY etc/profile.defaults.yml ${BUILD_DIR}/etc/ 58 | COPY etc/precise/ ${BUILD_DIR}/etc/precise/ 59 | COPY site/ ${BUILD_DIR}/site/ 60 | 61 | COPY README.md LICENSE ${BUILD_DIR}/ 62 | COPY voice2json/ ${BUILD_DIR}/voice2json/ 63 | 64 | RUN export VIRTUALENV_FLAGS='--copies' && \ 65 | export PIP_INSTALL_ARGS='-f /pipcache --no-index' && \ 66 | cd ${BUILD_DIR} && \ 67 | make && \ 68 | make install && \ 69 | mkdir -p ${DEBIAN_DIR}${APP_DIR} && \ 70 | mv ${APP_DIR}/* ${DEBIAN_DIR}${APP_DIR}/ 71 | 72 | RUN mkdir -p ${DEBIAN_DIR}/usr/bin/ && \ 73 | cp ${DEBIAN_DIR}${APP_DIR}/bin/voice2json ${DEBIAN_DIR}/usr/bin/ 74 | 75 | # Copy libpython to virtual env 76 | RUN cp -a /usr/local/lib/libpython3.7*.so* ${DEBIAN_DIR}${APP_DIR}/lib/ 77 | 78 | # Strip binaries and shared libraries 79 | RUN (find ${APP_VENV} -type f \( -name '*.so*' -or -executable \) -print0 | xargs -0 strip --strip-unneeded -- 2>/dev/null) || true 80 | 81 | RUN cd ${BUILD_DIR} && \ 82 | fakeroot dpkg --build ${PACKAGE_NAME} -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.debian.amd64: -------------------------------------------------------------------------------- 1 | FROM python:3.7.7-stretch 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | build-essential \ 8 | python3 python3-dev python3-pip python3-setuptools python3-venv \ 9 | swig portaudio19-dev libatlas-base-dev \ 10 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.debian.arm64: -------------------------------------------------------------------------------- 1 | FROM balenalib/aarch64-debian-python:3.7.6-stretch-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.debian.armv6: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv6-debian-python:3.7.6-stretch-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.debian.armv7: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv7hf-debian-python:3.7.6-stretch-build 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages swig portaudio19-dev libatlas-base-dev \ 6 | fakeroot -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.pyinstaller: -------------------------------------------------------------------------------- 1 | ARG BUILD_FROM 2 | FROM $BUILD_FROM 3 | 4 | ARG MAKE_THREADS=8 5 | ARG DEBIAN_ARCH 6 | ARG CPU_ARCH 7 | 8 | ENV LANG C.UTF-8 9 | 10 | COPY docker/multiarch_build/bin/qemu-* /usr/bin/ 11 | 12 | RUN apt-get update && \ 13 | apt-get install -y \ 14 | python3 python3-dev python3-pip python3-venv \ 15 | build-essential \ 16 | autoconf automake libtool \ 17 | bison swig \ 18 | libssl-dev libacl1-dev liblz4-dev libfuse-dev fuse pkg-config \ 19 | fakeroot build-essential wget \ 20 | zlib1g-dev libbz2-dev libncurses5-dev \ 21 | libreadline-dev liblzma-dev libsqlite3-dev \ 22 | curl subversion rsync \ 23 | libatlas-base-dev libatlas3-base gfortran \ 24 | sox git unzip python2.7 25 | 26 | # ----------------------------------------------------------------------------- 27 | 28 | RUN cd / && \ 29 | wget https://github.com/pyinstaller/pyinstaller/releases/download/v3.5/PyInstaller-3.5.tar.gz && \ 30 | tar -xf /PyInstaller-3.5.tar.gz 31 | 32 | RUN cd /PyInstaller-3.5/bootloader && \ 33 | python3 ./waf all --no-lsb 34 | 35 | RUN cd /PyInstaller-3.5 && \ 36 | python3 -m pip install -e . 37 | 38 | # ----------------------------------------------------------------------------- 39 | 40 | # Fake sudo 41 | COPY docker/multiarch_build/bin/sudo /usr/bin/ 42 | 43 | ENTRYPOINT ["bash"] 44 | -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.run: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan as ubuntu-base 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | python3 \ 8 | libportaudio2 libatlas3-base libgfortran4 \ 9 | ca-certificates \ 10 | perl sox alsa-utils espeak jq 11 | 12 | FROM ubuntu-base as base-amd64 13 | 14 | FROM ubuntu-base as base-armv7 15 | 16 | FROM ubuntu-base as base-arm64 17 | 18 | ARG TARGETARCH 19 | ARG TARGETVARIANT 20 | FROM base-$TARGETARCH$TARGETVARIANT 21 | -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.run.amd64: -------------------------------------------------------------------------------- 1 | FROM ubuntu:eoan 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN apt-get update && \ 6 | apt-get install --yes --no-install-recommends \ 7 | python3 \ 8 | libportaudio2 libatlas3-base libgfortran4 \ 9 | ca-certificates \ 10 | perl sox alsa-utils espeak -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.run.arm64: -------------------------------------------------------------------------------- 1 | FROM balenalib/aarch64-debian-python:3.7.6-buster-run 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages \ 6 | libportaudio2 libatlas3-base libgfortran4 \ 7 | ca-certificates \ 8 | perl sox alsa-utils espeak -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.run.armv6: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv6-debian-python:3.7.6-buster-run 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages \ 6 | libportaudio2 libatlas3-base libgfortran4 \ 7 | ca-certificates \ 8 | perl sox alsa-utils espeak -------------------------------------------------------------------------------- /docker/multiarch_build/Dockerfile.run.armv7: -------------------------------------------------------------------------------- 1 | FROM balenalib/armv7hf-debian-python:3.7.6-buster-run 2 | 3 | ENV LANG C.UTF-8 4 | 5 | RUN install_packages \ 6 | libportaudio2 libatlas3-base libgfortran4 \ 7 | ca-certificates \ 8 | perl sox alsa-utils espeak -------------------------------------------------------------------------------- /docker/multiarch_build/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: run debian 2 | DOCKER_REGISTRY ?= docker.io 3 | DOCKER_PLATFORMS ?= linux/amd64,linux/arm/v7,linux/arm64 4 | 5 | all: run debian 6 | 7 | run: 8 | docker buildx build . -f Dockerfile.run "--platform=$(DOCKER_PLATFORMS)" --tag "$(DOCKER_REGISTRY)/voice2json-run" --push 9 | 10 | debian: 11 | docker buildx build . -f Dockerfile.debian "--platform=$(DOCKER_PLATFORMS)" --tag "$(DOCKER_REGISTRY)/voice2json-debian" --push 12 | -------------------------------------------------------------------------------- /docker/multiarch_build/bin/sudo: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | "$@" -------------------------------------------------------------------------------- /docker/multiarch_build/requirements.txt: -------------------------------------------------------------------------------- 1 | pyinstaller 2 | -------------------------------------------------------------------------------- /docker/preprocess.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Very basic preprocessor that reads from stdin and prints to stdout. 4 | # 5 | # Lines starting with '# IFDEF FOO' ignore the lines until '# ENDIF' if $FOO is 6 | # empty in the environment. 7 | # 8 | # Within IFDEF/ENDIF blocks, lines starting with #! will have environment 9 | # variables expanded using envsubst and the #! prefix stripped. 10 | 11 | drop_line='' 12 | ifdef_regex='^# IFDEF (.+)$' 13 | reveal_regex='^#!' 14 | 15 | while read line || [ -n "${line}" ]; 16 | do 17 | if [[ "${line}" =~ ${ifdef_regex} ]]; then 18 | name="${BASH_REMATCH[1]}" 19 | if [[ -z "${!name}" ]]; then 20 | drop_line='1' 21 | fi 22 | 23 | # Don't output preprocessor directive 24 | continue 25 | elif [[ "${line}" == '# ENDIF' ]]; then 26 | drop_line='' 27 | 28 | # Don't output preprocessor directive 29 | continue 30 | fi 31 | 32 | if [[ -z "${drop_line}" ]]; then 33 | if [[ "${line}" =~ ${reveal_regex} ]]; then 34 | # Strip #! prefix and expand environment variables 35 | line="$(echo "${line:2}" | envsubst)" 36 | fi 37 | 38 | # Output line 39 | echo "${line}" 40 | fi 41 | done 42 | -------------------------------------------------------------------------------- /docker/voice2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | tag='latest' 3 | args=() 4 | 5 | while [[ -n "$1" ]]; do 6 | if [[ "$1" == '--update' ]]; then 7 | # Update Docker image 8 | update='1' 9 | else 10 | args+=("$1") 11 | fi 12 | 13 | shift 1 14 | done 15 | 16 | if [[ -n "${update}" ]]; then 17 | docker pull "synesthesiam/voice2json:${tag}" 18 | fi 19 | 20 | docker run -i \ 21 | --init \ 22 | -v "${HOME}:${HOME}" \ 23 | -v "/dev/shm/:/dev/shm/" \ 24 | -w "$(pwd)" \ 25 | -e "HOME=${HOME}" \ 26 | --user "$(id -u):$(id -g)" \ 27 | "synesthesiam/voice2json:${tag}" "${args[@]}" 28 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | voice2json.org -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /docs/data/LightState.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/docs/data/LightState.fst -------------------------------------------------------------------------------- /docs/data/LightState.gram: -------------------------------------------------------------------------------- 1 | #JSGF V1.0; 2 | grammar LightState; 3 | 4 | public = ((turn (){state} [the] light)); 5 | = ((on | off)); 6 | -------------------------------------------------------------------------------- /docs/data/intent.fst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/docs/data/intent.fst -------------------------------------------------------------------------------- /docs/img/microphone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/docs/img/microphone.png -------------------------------------------------------------------------------- /docs/img/mike-head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/docs/img/mike-head.png -------------------------------------------------------------------------------- /docs/img/output_21_0.svg: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | FST 11 | 12 | 13 | 14 | 0 15 | 16 | 0 17 | 18 | 19 | 20 | 1 21 | 22 | 1 23 | 24 | 25 | 26 | 0->1 27 | 28 | 29 | hello:hello 30 | 31 | 32 | 33 | 2 34 | 35 | 36 | 2 37 | 38 | 39 | 40 | 1->2 41 | 42 | 43 | world:world 44 | 45 | 46 | 47 | 2->0 48 | 49 | 50 | <eps>:nope 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/img/v2_architecture.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | blockdiag 10 | blockdiag { 11 | "Web Interface" [shape="cloud"] 12 | "Dialogue Manager" [shape="roundedbox", color="yellow"] 13 | "Sub-System Actors" [shape="roundedbox", stacked] 14 | 15 | "Web Interface" <-> "Dialogue Manager" <-> "Sub-System Actors"; 16 | } 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | Web Interface 25 | 26 | Dialogue Manager 27 | 28 | 29 | 30 | Sub-System Actors 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/img/v2_sentences.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | blockdiag 10 | blockdiag { 11 | "sentences.ini" [color="yellow"] 12 | "JSGF Grammars" [stacked] 13 | "Speech Recognizer" [color="#CCCCFF"] 14 | "Intent Recognizer" [color="#CCCCFF"] 15 | 16 | "sentences.ini" -> "JSGF Grammars"; 17 | "JSGF Grammars" -> "Language Model"; 18 | "JSGF Grammars" -> "Dictionary"; 19 | "JSGF Grammars" -> "Intent Recognizer"; 20 | 21 | "Language Model" -> "Speech Recognizer"; 22 | "Dictionary" -> "Speech Recognizer"; 23 | 24 | "Speech Recognizer" -> "Intent Recognizer"; 25 | } 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | sentences.ini 37 | 38 | 39 | 40 | JSGF Grammars 41 | 42 | Speech Recognizer 43 | 44 | Language Model 45 | 46 | Dictionary 47 | 48 | Intent Recognizer 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /docs/img/voice2json-inverted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/docs/img/voice2json-inverted.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs>=1.1 2 | mkdocs-material 3 | -------------------------------------------------------------------------------- /etc/bin/voice2json: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 3 | bash "${this_dir}/../../voice2json.sh" "$@" 4 | -------------------------------------------------------------------------------- /etc/kaldi-src-configure.patch: -------------------------------------------------------------------------------- 1 | 1188a1189 2 | > aarch64) cat makefiles/linux_atlas_aarch64.mk ;; 3 | -------------------------------------------------------------------------------- /etc/linux_atlas_aarch64.mk: -------------------------------------------------------------------------------- 1 | # ATLAS specific Linux ARM configuration 2 | 3 | ifndef DOUBLE_PRECISION 4 | $(error DOUBLE_PRECISION not defined.) 5 | endif 6 | ifndef OPENFSTINC 7 | $(error OPENFSTINC not defined.) 8 | endif 9 | ifndef OPENFSTLIBS 10 | $(error OPENFSTLIBS not defined.) 11 | endif 12 | ifndef ATLASINC 13 | $(error ATLASINC not defined.) 14 | endif 15 | ifndef ATLASLIBS 16 | $(error ATLASLIBS not defined.) 17 | endif 18 | 19 | CXXFLAGS = -std=c++11 -I.. -isystem $(OPENFSTINC) -O1 $(EXTRA_CXXFLAGS) \ 20 | -Wall -Wno-sign-compare -Wno-unused-local-typedefs \ 21 | -Wno-deprecated-declarations -Winit-self \ 22 | -DKALDI_DOUBLEPRECISION=$(DOUBLE_PRECISION) \ 23 | -DHAVE_EXECINFO_H=1 -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(ATLASINC) \ 24 | -ftree-vectorize -pthread \ 25 | -g # -O0 -DKALDI_PARANOID 26 | 27 | ifeq ($(KALDI_FLAVOR), dynamic) 28 | CXXFLAGS += -fPIC 29 | endif 30 | 31 | # Compiler specific flags 32 | COMPILER = $(shell $(CXX) -v 2>&1) 33 | ifeq ($(findstring clang,$(COMPILER)),clang) 34 | # Suppress annoying clang warnings that are perfectly valid per spec. 35 | CXXFLAGS += -Wno-mismatched-tags 36 | endif 37 | 38 | LDFLAGS = $(EXTRA_LDFLAGS) $(OPENFSTLDFLAGS) -rdynamic 39 | LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl 40 | -------------------------------------------------------------------------------- /etc/precise/athena.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/athena.pb -------------------------------------------------------------------------------- /etc/precise/athena.pb.params: -------------------------------------------------------------------------------- 1 | {"window_t": 0.1, "hop_t": 0.05, "buffer_t": 1.5, "sample_rate": 16000, "sample_depth": 2, "n_mfcc": 13, "n_filt": 20, "n_fft": 512, "use_delta": false, "vectorizer": 2} -------------------------------------------------------------------------------- /etc/precise/christopher-precise.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/christopher-precise.pb -------------------------------------------------------------------------------- /etc/precise/christopher-precise.pb.params: -------------------------------------------------------------------------------- 1 | {"window_t": 0.1, "hop_t": 0.05, "buffer_t": 1.5, "sample_rate": 16000, "sample_depth": 2, "n_mfcc": 13, "n_filt": 20, "n_fft": 512} 2 | -------------------------------------------------------------------------------- /etc/precise/computer-en.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/computer-en.pb -------------------------------------------------------------------------------- /etc/precise/hey-mycroft-2.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/hey-mycroft-2.pb -------------------------------------------------------------------------------- /etc/precise/hey-mycroft-2.pb.params: -------------------------------------------------------------------------------- 1 | {"window_t": 0.1, "hop_t": 0.05, "buffer_t": 1.5, "sample_rate": 16000, "sample_depth": 2, "n_mfcc": 13, "n_filt": 20, "n_fft": 512, "use_delta": false} -------------------------------------------------------------------------------- /etc/precise/marvin.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/marvin.pb -------------------------------------------------------------------------------- /etc/precise/marvin.pb.params: -------------------------------------------------------------------------------- 1 | {"window_t": 0.1, "hop_t": 0.05, "buffer_t": 1.5, "sample_rate": 16000, "sample_depth": 2, "n_mfcc": 13, "n_filt": 20, "n_fft": 512, "use_delta": false, "vectorizer": 2, "threshold_config": [[6, 4]], "threshold_center": 0.2} -------------------------------------------------------------------------------- /etc/precise/sheila-en.params: -------------------------------------------------------------------------------- 1 | {"window_t": 0.1, "hop_t": 0.05, "buffer_t": 1.5, "sample_rate": 16000, "sample_depth": 2, "n_mfcc": 13, "n_filt": 20, "n_fft": 512, "use_delta": false, "vectorizer": 2, "threshold_config": [[6, 4]], "threshold_center": 0.2} -------------------------------------------------------------------------------- /etc/precise/sheila-en.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/precise/sheila-en.pb -------------------------------------------------------------------------------- /etc/profiles/ca-es_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Catalan Pocketsphinx model" 3 | default: 4 | LICENSE: 5 | bytes: 537 6 | sha256: da543899f829027ffb9ded48093a47083586c1e821ccc0c3900c63092c9226bc 7 | acoustic_model/feat.params: 8 | bytes: 110 9 | sha256: 6ad725f620b3f0c1d82bba878498630e8be4fa1f0b0bf035a7fe9ffb36f5a735 10 | acoustic_model/mdef: 11 | bytes: 3211695 12 | sha256: 6b56249aebb104641ea43013e3e7492bf9d250a586c2bcf76ba1658a63389eb9 13 | acoustic_model/means: 14 | bytes: 40520132 15 | sha256: c89f45c7509e41351d2500014f082ac130f41d3f7022bea45316e9b5d4719897 16 | acoustic_model/mixture_weights: 17 | bytes: 1039040 18 | sha256: e272692a298d544b009f0460cd15f253d7f9225654ace188fb4f2c661c3a5b57 19 | acoustic_model/noisedict: 20 | bytes: 28 21 | sha256: 03f12722d48d01c5055a51c280fcd0e8db854a489ce4fb2b7f74eb0a3f7cca2f 22 | acoustic_model/transition_matrices: 23 | bytes: 1936 24 | sha256: 548f1d51b386a01df0d4cd48a0a7e88aa3baebd2a29a153ad0f708177df65d76 25 | acoustic_model/variances: 26 | bytes: 40520132 27 | sha256: ca1ce9a64f87a2102df6275bce8cfc655d46a7ec1fe8eb588359f076d7f09de9 28 | base_dictionary.txt.gz: 29 | bytes: 923136 30 | sha256: f20a16a27cce5d453664604451b78f209789ced3fd140c106513b56097a8e411 31 | file-name: base_dictionary.txt 32 | profile.yml: 33 | bytes: 445 34 | sha256: 6e4105307c07152262136732a8891002c393a728f759b6f7f794aa1c19f7f72f 35 | slot_programs/rhasspy/number: 36 | bytes: 651 37 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 38 | stop_words.txt: 39 | bytes: 4532 40 | sha256: 69c43ce97fdc651aa01d3a5bb8aa98d7bfcdea9cfa9823b624dcc4360bff2a3a 41 | 42 | # Files needed to guess word pronunciations 43 | grapheme-to-phoneme: 44 | g2p.fst.gz: 45 | bytes: 10361119 46 | sha256: ee45c4d557835bef666b6bed5cb3a21503a1c0ee1f7ebfb515d857f4cf88ea8a 47 | file-name: g2p.fst 48 | 49 | # Files needed for open transcription (pre-built model) 50 | open-transcription: 51 | base_language_model.txt.gz: 52 | bytes: 1746859 53 | sha256: 63334c9b435380f7d18951b3cfed47a3564ecddba3ac38a5ebe1daf71b21b52a 54 | file-name: base_language_model.txt 55 | 56 | # Files needed for mixed language modeling (pre-built + custom) 57 | mixed-language-model: 58 | base_language_model.fst.gz: 59 | bytes: 2181120 60 | sha256: 22810ea1bd810d8f9af52f25a2964ad2bccc95e0e375fb7a27548aa0fe07bbdd 61 | file-name: base_language_model.fst 62 | 63 | # Files needed for text to speech 64 | text-to-speech: 65 | espeak_phonemes.txt: 66 | bytes: 172 67 | sha256: 0d8070b738a56a3c86cb2e12423ff42bed7062308d62f68ac8d837adacf13eca 68 | 69 | # Sample sentences, etc. 70 | examples: 71 | sentences.ini: 72 | bytes: 330 73 | sha256: b669b6416e9f9e1d0b718c55720907e6b5231766c6af5448dfa12895aba3aac4 74 | -------------------------------------------------------------------------------- /etc/profiles/de_deepspeech-aashishag.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "German DeepSpeech v0.9 model from https://github.com/AASHISHAG/deepspeech-german" 3 | url_format: "https://raw.githubusercontent.com/synesthesiam/{profile}/3f7b39e7f7a12a7af6dbb9ccf986506495c67b99/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 11359 8 | sha256: 600cc67cc4cb2f5ea317dcfc687ad1c74dc4bec8782bbe9db0afd83513b935b7 9 | model/alphabet.txt: 10 | bytes: 339 11 | sha256: 572929130cecccf7d56789805178c07593356b83d18d2424a967e945e935c634 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: eadafd73dfa06b26a4420d6902224fb461b8b1c4c89c3580cca56556e34f788e 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: c4faf7dbc4dee7e8609033bcc65adf1ea842aab7c373717477b45f5de8194e67 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: 057935d17fc216b936d35bdf6914cf2c33a94839ee67588b037c49ca8e6e471c 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 1f668d4000544e494800587e9017ed26d8d2732c7b926134f2d6a5d043cf6d1e 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 1e7bea81a20cf938c011de5b3453c176e430ae34d21d0813c9169322f8fee9f7 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: a20a282d42fb830c853d38c272edd78ebb90fc3af0fb563d1025a3c528d573f1 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18082753 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: b0df632b983b7ae4e26240c0b1264b40c4a1b8b8457c1dbec627b37524ece282 54 | model/output_graph.tflite.gz: 55 | bytes: 20516562 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: 13e4db8fb511d050b95a1411415ec9cefe06bd62c7ef0257cbf2de5520b96d2f 63 | profile.yml: 64 | bytes: 927 65 | sha256: ee9dec98c29027abcfa6f391f7baffca91ded9fc8e3de3387ba4fb4ae1411743 66 | stop_words.txt: 67 | bytes: 1357 68 | sha256: c5cd6233fe22514361b6bd2ba174195ed17c7a0626ca58fec662ac8914afde2b 69 | 70 | # Files needed for open transcription (pre-built model) 71 | open-transcription: 72 | model/base.scorer.gz.part-00: 73 | bytes: 26214400 74 | file-name: base.scorer 75 | sha256: c566666a1c6cf46771cb95a74a90c21b3ad21a1b3f38b6976dfbc34c7a33ac58 76 | model/base.scorer.gz.part-01: 77 | bytes: 26214400 78 | file-name: base.scorer 79 | sha256: 031f97808c3e26c0b9f883f0d9295f930eb77e4c2fee4565c69fcf46d8277995 80 | model/base.scorer.gz.part-02: 81 | bytes: 26214400 82 | file-name: base.scorer 83 | sha256: a8f9db01745564effbe8aee3d072891c38b1e2dd0a2cb666c4b1aa4b568d9aba 84 | model/base.scorer.gz.part-03: 85 | bytes: 26214400 86 | file-name: base.scorer 87 | sha256: c609ef28c4598604c7ffcf0561a4c90a1245b821e30eff2b059c9b24f8096c69 88 | model/base.scorer.gz.part-04: 89 | bytes: 26214400 90 | file-name: base.scorer 91 | sha256: 34732151c1db99de8adc50f70017881efffec422ba9147e6e941fad95d37b8c0 92 | model/base.scorer.gz.part-05: 93 | bytes: 26214400 94 | file-name: base.scorer 95 | sha256: 6fb326b8e41aa14edc9ff4f9163e58f78b4cd49108990f15aeeacb39964fe087 96 | model/base.scorer.gz.part-06: 97 | bytes: 26214400 98 | file-name: base.scorer 99 | sha256: 6dd5e77247a7693f31fb33cfa31bc6fb91dcedd93b122226d0a2b26212878070 100 | model/base.scorer.gz.part-07: 101 | bytes: 26214400 102 | file-name: base.scorer 103 | sha256: cf9dee9ef23c0089b534574bad7c724492e7c3864e0e40e497522a162a38a19b 104 | model/base.scorer.gz.part-08: 105 | bytes: 8657715 106 | file-name: base.scorer 107 | sha256: 37afd4a38325dd6ae7ef79ecef534eea478cb044dd45e18d9cc6990b08cc4316 108 | 109 | # Files needed for mixed language modeling (pre-built + custom) 110 | mixed-language-model: 111 | base_language_model.fst.gz: 112 | bytes: 46294251 113 | file-name: base_language_model.fst 114 | sha256: 665aac1f57aaa1d7a4c4a95a8ce12e027f0ac70f134d04b0557690c210011ffe 115 | 116 | # Files needed for text to speech 117 | text-to-speech: 118 | marytts/marytts-lang-de-5.2.jar: 119 | bytes: 5335101 120 | sha256: 8d6c632844faa00ef4e0694a25cc37a5458f2337e09131db977cd91031d4e10b 121 | marytts/voice-bits1-hsmm-5.2.jar: 122 | bytes: 1361946 123 | sha256: 4507c1e3bf429fb0fa985841680f71a571020bbb519281c07cee7f38dfa825f7 124 | 125 | # Sample sentences, etc. 126 | examples: 127 | sentences.ini: 128 | bytes: 326 129 | sha256: be7ec036ee001e026d455966db34868edb3559acfb6a64c9a18f19784d560d48 130 | -------------------------------------------------------------------------------- /etc/profiles/de_deepspeech-jaco.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "German DeepSpeech v0.9 model from https://gitlab.com/Jaco-Assistant" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/e34746469e69b62faa12268b4ceca678ac7803f9/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 7652 8 | sha256: e3a994d82e644b03a792a930f574002658412f62407f5fee083f2555c5f23118 9 | model/alphabet.txt: 10 | bytes: 329 11 | sha256: a29eab570b46a5aa60e2bfeaa40fb15ff0b4ce995bc16ba957ddf1a5e1f35304 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: bded311b7c36057bd1a464ca01e6b4b585757de3702814b60b7e32bc4b8f66a1 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: 9c428733d6635866ce448416a4422a669315afb06801d91b8234252b0521648d 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: 1b7ff77cb53835cc3176eac6338bfd75eccea5465c03f8e927e6c5870c89d10f 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 8760375f397dc0704eaae4d7c3b3ec858835cc8fa48b625cfe7a02009902ede8 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 83ff4a76c09b2ed5be4efa7ed40256db7926afb07ba668831cd9790d97dc9669 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: b3991aae67b24402f685996b1b92f7b431ee2e6c58ebed2272f7631415dffc9e 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18096075 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: 10be806ae4fac1618ce8703f54a03549ffafc367e35fc2ceb19e1dffc32af61c 54 | model/output_graph.tflite.gz: 55 | bytes: 20296313 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: f5fed575e8f1a6244ad824a753e0803290e268dc19303140ac82e67528bdd8df 63 | profile.yml: 64 | bytes: 972 65 | sha256: fab1a8730a14ec1a743bc27506b841e785c761d01f24b1162f200a33912e295c 66 | stop_words.txt: 67 | bytes: 1357 68 | sha256: c5cd6233fe22514361b6bd2ba174195ed17c7a0626ca58fec662ac8914afde2b 69 | 70 | # Files needed for open transcription (pre-built model) 71 | open-transcription: 72 | model/base.scorer.gz.part-00: 73 | bytes: 26214400 74 | file-name: base.scorer 75 | sha256: f74cf842a4212dde12105ded3a68b4b5b9a03f356373010d99b09cd0985e2ebd 76 | model/base.scorer.gz.part-01: 77 | bytes: 26214400 78 | file-name: base.scorer 79 | sha256: 2ac5f3445a7e45247d7a931e07f3bbf9d2915a6a72a5bab9353ad87876962997 80 | model/base.scorer.gz.part-02: 81 | bytes: 26214400 82 | file-name: base.scorer 83 | sha256: 5a522d9b7863e89ad5f700dc749f02c04870aa0e2a01a11e93ad5e5be8d23c83 84 | model/base.scorer.gz.part-03: 85 | bytes: 26214400 86 | file-name: base.scorer 87 | sha256: 19099457087f642d88fcc75fc62ab07559ec5371c4307a06e908a776b6a294e3 88 | model/base.scorer.gz.part-04: 89 | bytes: 26214400 90 | file-name: base.scorer 91 | sha256: 066a76d1d0c47b5cc4c25b3f3d783d8b0bdb4ebdd01bb6464f8749d61582c8ac 92 | model/base.scorer.gz.part-05: 93 | bytes: 26214400 94 | file-name: base.scorer 95 | sha256: 30b289d22c54d69b6b76c4891f076abe7358d3edc97044e4cc143b40e950907b 96 | model/base.scorer.gz.part-06: 97 | bytes: 26214400 98 | file-name: base.scorer 99 | sha256: 3f3755c1073d64e988d5cb7f474059a86ecff2a53aef018a9acb6f6f2c4bf02a 100 | model/base.scorer.gz.part-07: 101 | bytes: 26214400 102 | file-name: base.scorer 103 | sha256: 02da8a28577ce6977841690d304577f68179148cc40473a413a9fc88d364e44f 104 | model/base.scorer.gz.part-08: 105 | bytes: 20189647 106 | file-name: base.scorer 107 | sha256: a96fe347935cea6eb16b0359d50845c81283afe4bcf4c820287c7599a60079b5 108 | 109 | # Files needed for mixed language modeling (pre-built + custom) 110 | mixed-language-model: 111 | base_language_model.fst.gz: 112 | bytes: 46294251 113 | file-name: base_language_model.fst 114 | sha256: 665aac1f57aaa1d7a4c4a95a8ce12e027f0ac70f134d04b0557690c210011ffe 115 | 116 | # Files needed for text to speech 117 | text-to-speech: 118 | marytts/marytts-lang-de-5.2.jar: 119 | bytes: 5335101 120 | sha256: 8d6c632844faa00ef4e0694a25cc37a5458f2337e09131db977cd91031d4e10b 121 | marytts/voice-bits1-hsmm-5.2.jar: 122 | bytes: 1361946 123 | sha256: 4507c1e3bf429fb0fa985841680f71a571020bbb519281c07cee7f38dfa825f7 124 | 125 | # Sample sentences, etc. 126 | examples: 127 | sentences.ini: 128 | bytes: 326 129 | sha256: be7ec036ee001e026d455966db34868edb3559acfb6a64c9a18f19784d560d48 130 | -------------------------------------------------------------------------------- /etc/profiles/de_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "German Pocketsphinx model" 3 | 4 | default: 5 | acoustic_model/feat.params: 6 | bytes: 112 7 | sha256: 44ec61d22acd9e9ba8f49fee48800a06da5139d92d3491a35d0660d8a4349838 8 | acoustic_model/feature_transform: 9 | bytes: 4568 10 | sha256: 9f41f154de64b02bd5d4025ec0bc259e5992025da944e15a87cf58d6097bd542 11 | acoustic_model/mdef: 12 | bytes: 5125554 13 | sha256: 0a0bbe49c8b769f35c48e82b2e147c1d331f2fe73847bc473b264784fd98b993 14 | acoustic_model/means: 15 | bytes: 11509124 16 | sha256: e05bfc3190ceb9186da4416a1a213b65199823f806ce04df1d0a03ecb23a087e 17 | acoustic_model/mixture_weights: 18 | bytes: 396928 19 | sha256: 9da2c2c56a4963f2b6082fb219a64c84f1cdac562c35b1b7e223157bff8f3104 20 | acoustic_model/noisedict: 21 | bytes: 27 22 | sha256: e5ba928c9f8950b4afce4ea84e6be0d219e878b1f926e5edcc5ca329d06eecc9 23 | acoustic_model/transition_matrices: 24 | bytes: 3280 25 | sha256: b89561e58dc1cda96c5c4ccf3cbdde060193503a33e116565a666ff7df0e3ce6 26 | acoustic_model/variances: 27 | bytes: 11509124 28 | sha256: da3c7a3310368da78eebd7e4f565ead76ca81756771a6bc75de39d41388db156 29 | base_dictionary.txt.gz: 30 | bytes: 264226 31 | sha256: 9b1156c13db366ac5c79b3aed0943c36d21c690d71032146677952f0d5d41ba4 32 | file-name: base_dictionary.txt 33 | profile.yml: 34 | bytes: 473 35 | sha256: 09e7cc09ce267dc35b6fb71a03d797a4a4234d6177cb4608f3e3de98739692a9 36 | slot_programs/rhasspy/number: 37 | bytes: 651 38 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 39 | stop_words.txt: 40 | bytes: 1357 41 | sha256: c5cd6233fe22514361b6bd2ba174195ed17c7a0626ca58fec662ac8914afde2b 42 | 43 | # Files needed to guess word pronunciations 44 | grapheme-to-phoneme: 45 | g2p.fst.gz: 46 | bytes: 4248814 47 | sha256: c3e13d35bc01ebc3b3be05a3b0383b86295b9e6b04f1ebf19f32969642b763cf 48 | file-name: g2p.fst 49 | 50 | # Files needed for open transcription (pre-built model) 51 | open-transcription: 52 | base_language_model.txt.gz: 53 | bytes: 35798869 54 | sha256: 0a6cba7fcadc1481dc266a6f4e921e79886389b6745dd1ff5c99586f0e843241 55 | file-name: base_language_model.fst 56 | 57 | # Files needed for mixed language modeling (pre-built + custom) 58 | mixed-language-model: 59 | base_language_model.fst.gz: 60 | bytes: 46294275 61 | sha256: ed84ae50db690c057b9f1e2024d94fbc004edc0ac819cf3ec64dcd9fef6bdcd1 62 | file-name: base_language_model.txt 63 | 64 | # Files needed for text to speech 65 | text-to-speech: 66 | espeak_phonemes.txt: 67 | bytes: 346 68 | sha256: 85341ad1a3d876e2232eecd2b8beaa276458e58deb931a31d5dab72f8666e643 69 | marytts/marytts-lang-de-5.2.jar: 70 | bytes: 5335101 71 | sha256: 8d6c632844faa00ef4e0694a25cc37a5458f2337e09131db977cd91031d4e10b 72 | marytts/voice-bits1-hsmm-5.2.jar: 73 | bytes: 1361946 74 | sha256: 4507c1e3bf429fb0fa985841680f71a571020bbb519281c07cee7f38dfa825f7 75 | 76 | # Sample sentences, etc. 77 | examples: 78 | custom_words.txt: 79 | bytes: 90 80 | sha256: 8fef96cabf05265d849254909df60cea424e7dcdcd3230f1229e15d22d240816 81 | sentences.ini: 82 | bytes: 326 83 | sha256: be7ec036ee001e026d455966db34868edb3559acfb6a64c9a18f19784d560d48 84 | -------------------------------------------------------------------------------- /etc/profiles/el-gr_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Greek Pocketsphinx model from Alpha Cephei" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 1525 7 | sha256: 5afb508bf4008c195c99084fc12b89abbc92271d0586ec5310ebaccda99bb071 8 | acoustic_model/feat.params: 9 | bytes: 127 10 | sha256: 187e5ed913fa86af61d155e27d3e7c6b928b4e4fa15621d68b009272bc8ef1da 11 | acoustic_model/feature_transform: 12 | bytes: 5660 13 | sha256: 25afe9fafd0cb0cad8931595395c59a1166447f9d9d14bff9aac1a121c0706db 14 | acoustic_model/mdef: 15 | bytes: 3223425 16 | sha256: 9938784a4e48c5c0afd208b47a8a0e7ff38d87a84b4c0e708b28891dc38e99fc 17 | acoustic_model/means: 18 | bytes: 23510084 19 | sha256: 02daa7ce237f83d546c7665e227fbddd9eb635b661771a0d133c4b3b54d3de2f 20 | acoustic_model/mixture_weights: 21 | bytes: 653120 22 | sha256: 6a47d659203c947658b61644836a9f613d5f3dcaa76bea52ffa7eb54b8a08a4f 23 | acoustic_model/noisedict: 24 | bytes: 60 25 | sha256: f9f30e70167dfb43605a1d68f86e15c0874655055e0488e34251789b668cf819 26 | acoustic_model/transition_matrices: 27 | bytes: 1696 28 | sha256: e23af3586cbd494b37b7e63c6ee07b95850c39d4fb2fb1bd590e17307a1c658c 29 | acoustic_model/variances: 30 | bytes: 23510084 31 | sha256: 859b08b7c08ece2ec769c22418b8d7278b37a6632b412581e2d1b1ccbe143640 32 | base_dictionary.txt.gz: 33 | bytes: 5106813 34 | file-name: base_dictionary.txt 35 | sha256: 77f8771a70acba30739d5cf2eb6a9146f9284bc00b5c456a5049252986a5de85 36 | profile.yml: 37 | bytes: 443 38 | sha256: f64712079b350a0ec01bd6543a80ebd08af5a130c3989a488bb231a75b279ddd 39 | slot_programs/rhasspy/number: 40 | bytes: 651 41 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 42 | stop_words.txt: 43 | bytes: 1262 44 | sha256: 26ba491a836684b576d07d5e5cf7e1ca75ab718aff27aaf11f34645d55b36379 45 | 46 | # Files needed to guess word pronunciations 47 | grapheme-to-phoneme: 48 | g2p.fst.gz: 49 | bytes: 33826556 50 | file-name: g2p.fst 51 | sha256: 14f35ef10c7c4db468717985f81c66eeb5ab65567971fa38cad04ae34b3d1c6c 52 | 53 | # Files needed for open transcription (pre-built model) 54 | open-transcription: 55 | base_language_model.txt.gz: 56 | bytes: 30766031 57 | file-name: base_language_model.txt 58 | sha256: 80deef672ba20ff6a734b8f60e55d1f45c981edbed8315f97d2545a205545397 59 | 60 | # Files needed for mixed language modeling (pre-built + custom) 61 | mixed-language-model: 62 | base_language_model.fst.gz: 63 | bytes: 33975780 64 | file-name: base_language_model.fst 65 | sha256: 6e24b8fc6912b74f02bdbfc183ff9b0344770eeab0f2629d3912697e99c47b8e 66 | 67 | # Files needed for text to speech 68 | text-to-speech: 69 | espeak_phonemes.txt: 70 | bytes: 150 71 | sha256: fb3dae2157c41b47817f91b1286400168a6e5574d73c8f6c15aa966bfec52662 72 | 73 | # Sample sentences, etc. 74 | examples: 75 | sentences.ini: 76 | bytes: 288 77 | sha256: 0bfac551fa9922384ef666d17fd83f4a4ee5dad7fc46eed2fbe2d1af8e127d20 78 | -------------------------------------------------------------------------------- /etc/profiles/en-in_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Indian English Pocketsphinx model from Alpha Cephei" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 1621 7 | sha256: dbc4656536419530472f36052573252fcfc6c81f69225b991333ab399cfe6ca9 8 | acoustic_model/feat.params: 9 | bytes: 110 10 | sha256: f66875cab13a64323b8288cab70025ad77cb24d0ef4d584669cf0b3e8b71b453 11 | acoustic_model/feature_transform: 12 | bytes: 5660 13 | sha256: 0a37abbe8dafa9f99a2da5355ce52e1a591bfed0c45b49cf2de20cbb335b4cdb 14 | acoustic_model/mdef: 15 | bytes: 9025339 16 | sha256: 219d8589f96c92687f37ba239fd1da50a42161cd4afb80cc712d7348eb8970fd 17 | acoustic_model/means: 18 | bytes: 23675972 19 | sha256: 9e859fd5f9ff805bba58034b27f770d80fc0ad5255d8d201c00a40833b5c3de7 20 | acoustic_model/mixture_weights: 21 | bytes: 657728 22 | sha256: 2f4aeb2b876d1a8085d2024067845e1b21869a72cc297b7322e525c2715c4495 23 | acoustic_model/noisedict: 24 | bytes: 28 25 | sha256: 062078d7f77ce73f1c5afdb9f70d1fc9dc20f64f9675447ea873966e5514b737 26 | acoustic_model/transition_matrices: 27 | bytes: 2272 28 | sha256: 917bb37a32f86a004c5a9c89524bffa3d010d43300f2a07e102109678b411cde 29 | acoustic_model/variances: 30 | bytes: 23675972 31 | sha256: 39563ec27626c813526bdbf4fe376c70a1d035be8d470c4dae01bc872bc666cb 32 | base_dictionary.txt.gz: 33 | bytes: 1585994 34 | file-name: base_dictionary.txt 35 | sha256: a52f36f8dae70ca281d05a361676db33f3470a306a1804306766b70f41715b7d 36 | profile.yml: 37 | bytes: 448 38 | sha256: 2a24fdd51df6f5bac21756048c921f43a6189c813916daf9c125fb23a74aa804 39 | slot_programs/rhasspy/number: 40 | bytes: 651 41 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 42 | stop_words.txt: 43 | bytes: 530 44 | sha256: c70d3a9220951c0d80c3bc83e751618a7384308162e52544f786b3406547d00b 45 | 46 | # Files needed to guess word pronunciations 47 | grapheme-to-phoneme: 48 | g2p.fst.gz: 49 | bytes: 18830007 50 | file-name: g2p.fst 51 | sha256: de57d66731ba94201d3ec3ed64bc6ade8dd340d77d718cb637f1f83a21726c81 52 | 53 | # Files needed for open transcription (pre-built model) 54 | open-transcription: 55 | base_language_model.txt.gz: 56 | bytes: 33456672 57 | file-name: base_language_model.txt 58 | sha256: c354a19b53b2e5bdef1736fb713c0ce632bcca675da967604fc8b140dcb616ee 59 | 60 | # Files needed for mixed language modeling (pre-built + custom) 61 | mixed-language-model: 62 | base_language_model.fst.gz: 63 | bytes: 37267042 64 | file-name: base_language_model.fst 65 | sha256: 49053b0d02d3edcc84974d9a6fcf10b773a52a7cdb815a450b61733ac452d37d 66 | 67 | # Files needed for text to speech 68 | text-to-speech: 69 | espeak_phonemes.txt: 70 | bytes: 219 71 | sha256: bd994fa117d0fe5880da761d36ad5ef3ec67115dcc9a6d6bb8d00399c9cfe24c 72 | 73 | # Sample sentences, etc. 74 | examples: 75 | sentences.ini: 76 | bytes: 538 77 | sha256: 839b4f949ab5248401920bf4290e8f13a479f17f37626cb5306d4db5d76faf6c 78 | -------------------------------------------------------------------------------- /etc/profiles/en-us_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "U.S. English Pocketsphinx model from Carnegie Mellon University" 3 | default: 4 | "LICENSE": 5 | bytes: 1537 6 | sha256: 49181202f2b991d25f6cac8cd1705994494b9600d4311794ecbb9fcf8b188aef 7 | "acoustic_model/feat.params": 8 | bytes: 129 9 | sha256: 9f4883ca1680270667565cec30a0b827081799617bb825ff6adace06f50c178f 10 | "acoustic_model/feature_transform": 11 | bytes: 5660 12 | sha256: 05cd0ef213623137b6ac76d72922776c8a14f252e032c4a3f331d41760ef30cc 13 | "acoustic_model/mdef": 14 | bytes: 6992233 15 | sha256: a100d7401e8d59ed597ea6083e0acca77e41637b7db6db66f43a0799a2eba840 16 | "acoustic_model/means": 17 | bytes: 23675972 18 | sha256: 10c8a3c1b0718bc786f4c82ba1824b22cd9cdc7ffb0589559c08c0189884f0f1 19 | "acoustic_model/mixture_weights": 20 | bytes: 657728 21 | sha256: a756459b78bfaf85ad59215c77caaf03f9e58e91956975e33de7b8179d551c1f 22 | "acoustic_model/noisedict": 23 | bytes: 113 24 | sha256: 394633701a17db97a512838a79de395020fc571e0537b0498b2179f727642a48 25 | "acoustic_model/transition_matrices": 26 | bytes: 2272 27 | sha256: 020e3a8998d12db0d02b620aed95ee1534676fd8e50afaad29d5b432f1e6f893 28 | "acoustic_model/variances": 29 | bytes: 23675972 30 | sha256: 66bc86ddf763cf27d6194247cbf4f2e912d78789ced246811e7f323f1e8280da 31 | "base_dictionary.txt.gz": 32 | bytes: 850548 33 | sha256: 82d0b1b38e791fcdff67d2dd1c3f57dbf15d5e6338ba3d4fc13a5d90cbe9f10d 34 | file-name: "base_dictionary.txt" 35 | "profile.yml": 36 | bytes: 594 37 | sha256: 4d03b85c245668ae0cc96c3f786661b6f80d1c968b399902064e66d403f0d0eb 38 | "slot_programs/rhasspy/number": 39 | bytes: 651 40 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 41 | "stop_words.txt": 42 | bytes: 530 43 | sha256: c70d3a9220951c0d80c3bc83e751618a7384308162e52544f786b3406547d00b 44 | 45 | # Files needed to guess word pronunciations 46 | grapheme-to-phoneme: 47 | "g2p.fst.gz": 48 | bytes: 12796914 49 | sha256: c2e5c173e22931d5bee2c3a42816a175a3af77d133ba0bd35f6e16e7bbd29768 50 | file-name: "g2p.fst" 51 | 52 | # Files needed for open transcription (pre-built model) 53 | open-transcription: 54 | "base_language_model.txt.gz": 55 | bytes: 33456672 56 | sha256: c354a19b53b2e5bdef1736fb713c0ce632bcca675da967604fc8b140dcb616ee 57 | file-name: "base_language_model.txt" 58 | 59 | # Files needed for mixed language modeling (pre-built + custom) 60 | mixed-language-model: 61 | "base_language_model.fst.gz": 62 | bytes: 37267066 63 | sha256: 8e016f27d7c55a4759a0b0e7031b96ee2cbc74a6b6f33138a15950281937e886 64 | file-name: "base_language_model.fst" 65 | 66 | # Files needed for text to speech 67 | text-to-speech: 68 | "espeak_phonemes.txt": 69 | bytes: 189 70 | sha256: 43b1a6cc93f038ba92a5b40da10041350551bccf160eb900552756cd5990f2e4 71 | "marytts_phonemes.txt": 72 | bytes: 187 73 | sha256: ddf040f81c9048ae7e2c193ef19bf38c4562ff3bc4d9612fc6ed47d0bf47dedf 74 | "marytts/marytts-lang-en-5.2.jar": 75 | bytes: 6908619 76 | sha256: f46cc60abbf1b7444ac37a75e72f60f1aa566bf2448c458463f2f50369fcb3d1 77 | "marytts/voice-cmu-slt-hsmm-5.2.jar": 78 | bytes: 1242746 79 | sha256: d012c43f109c60dba106741ce1c18a6caf4bb1476d7af8861ceaab4c655294cb 80 | 81 | # Sample sentences, etc. 82 | examples: 83 | "sentences.ini": 84 | bytes: 538 85 | sha256: 839b4f949ab5248401920bf4290e8f13a479f17f37626cb5306d4db5d76faf6c 86 | -------------------------------------------------------------------------------- /etc/profiles/es-mexican_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Mexican Spanish model" 3 | 4 | default: 5 | acoustic_model/feat.params: 6 | bytes: 110 7 | sha256: 6ad725f620b3f0c1d82bba878498630e8be4fa1f0b0bf035a7fe9ffb36f5a735 8 | acoustic_model/mdef: 9 | bytes: 3946962 10 | sha256: d85f99257959f313d98aa978be35bd53658b6498490fb444897c2cbfbc81d594 11 | acoustic_model/means: 12 | bytes: 50369348 13 | sha256: 110f88685a46f912357cd675c3565ad01383d8e81eddf4a29974d380875138df 14 | acoustic_model/mixture_weights: 15 | bytes: 1291584 16 | sha256: c49eb7b1cdeded79a9db3dbf6aaaf9aff60f6e8e29669b9b5b03714d1d3a04a3 17 | acoustic_model/noisedict: 18 | bytes: 60 19 | sha256: f9f30e70167dfb43605a1d68f86e15c0874655055e0488e34251789b668cf819 20 | acoustic_model/transition_matrices: 21 | bytes: 1504 22 | sha256: c42587630dde772ef349fea9c050cdd8adcb360b919cfc460c3a18749b08ec24 23 | acoustic_model/variances: 24 | bytes: 50369348 25 | sha256: 1df4e4583458cf744fdf0787dfd52afb4270637787010fce2438a2bbc3a29f01 26 | base_dictionary.txt.gz: 27 | bytes: 1824883 28 | sha256: 08241ba28cb89eed45575e897754918ab70f62834701439002d9729e58720350 29 | profile.yml: 30 | bytes: 455 31 | sha256: 83bc4255a0e939f1d5fd641a7aaf588d0454bdb5ab8b3d04834f32e7bb260a7d 32 | slot_programs/rhasspy/number: 33 | bytes: 651 34 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 35 | stop_words.txt: 36 | bytes: 2178 37 | sha256: 7450c1f7196c161d9a4e537de43f88bd55cdb64028c808e14587bd7a6ac7be3d 38 | 39 | # Files needed to guess word pronunciations 40 | grapheme-to-phoneme: 41 | g2p.fst.gz: 42 | bytes: 2010314 43 | sha256: 8ca3d13386ec214288125543142ad5a87d9312ee8ae61de4f276823413b9f543 44 | 45 | # Files needed for open transcription (pre-built model) 46 | open-transcription: 47 | base_language_model.txt.gz: 48 | bytes: 23391433 49 | sha256: 3a5426601c24eaaf6658734d078f6fed4bb1a38b6521f2feecf2bfa755e350b4 50 | 51 | # Files needed for mixed language modeling (pre-built + custom) 52 | mixed-language-model: 53 | base_language_model.fst.gz: 54 | bytes: 25525538 55 | sha256: 74a0a00e4f1cceac02a058aaa0fc98e0c307c1a47296b392e769899695591fc2 56 | 57 | # Files needed for text to speech 58 | text-to-speech: 59 | espeak_phonemes.txt: 60 | bytes: 142 61 | sha256: 67bc9f2f8797a99f9023d8368993ad30b6543eec6afbcfcbd744bf460bdb7718 62 | 63 | # Sample sentences, etc. 64 | examples: 65 | custom_words.txt: 66 | bytes: 82 67 | sha256: 6294114c9169771da6bc98b85cbe48291ca907bc789c5d3bda6ec0f9d47a3f70 68 | sentences.ini: 69 | bytes: 317 70 | sha256: 5942f84939e5a7ccc4f062726defae82628d5e6d899d485eeb8e77f9c44a2a20 71 | -------------------------------------------------------------------------------- /etc/profiles/es_deepspeech-jaco.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Spanish Deepspeech v0.9 model from https://gitlab.com/Jaco-Assistant" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/bbfe4adbb6db2d229d4060510319cc91e90e8a1f/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 7652 8 | sha256: e3a994d82e644b03a792a930f574002658412f62407f5fee083f2555c5f23118 9 | model/alphabet.txt: 10 | bytes: 351 11 | sha256: fec6f979d13e5ff48dc9848bda9b4246de478074f5ae52a6770e647168748050 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: f26d12c840718e35e4c56926e9ef904c4a0a0a78d11a06d85f3f5c9e7efc1f4a 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: b3d8e7d437a783ff9d3442b16f94ddca3a0ceb825b11297403d03655503d6b76 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: 3eef3b513fcac7efe31cf68f77b67d2d70c8f9d86de1c363b3a5975e54a421a6 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 89f9460617f8173ae4dac325ada901b77be8b88b00687edaced392fd576a23f3 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 48c912e44b2e71ea64d048f63676fe09ec7bc109a970e0c916b58cecd9002994 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: a69c7c810aeb293c509c70d2aa1f0c948811350daefb0d3c40ca4e53f7f078e4 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18094880 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: 1b8d5dbf2debbeda50893a1ea3153c0831b5341614966de28bf3f63604189850 54 | model/output_graph.tflite.gz: 55 | bytes: 20430228 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: 8806ba80b8666b41ad5c0748bc36579be452ccb55198f92f1939ff166d88bd8c 63 | profile.yml: 64 | bytes: 938 65 | sha256: b36a0518505b154de2f5528b03ccb054b731206ee077b2f7c65d8989b7837708 66 | stop_words.txt: 67 | bytes: 2178 68 | sha256: 7450c1f7196c161d9a4e537de43f88bd55cdb64028c808e14587bd7a6ac7be3d 69 | 70 | # Files needed for open transcription (pre-built model) 71 | open-transcription: 72 | model/base.scorer.gz.part-00: 73 | bytes: 26214400 74 | file-name: base.scorer 75 | sha256: 41252b05b46d1c0f4026b6cac34b5a95c2ca4251eab2a53e38a4fddb087c00f4 76 | model/base.scorer.gz.part-01: 77 | bytes: 26214400 78 | file-name: base.scorer 79 | sha256: cf7b45c325e3337b2a3cf3224896d88d71ba9615d8108ba61fa8122342f6f4e7 80 | model/base.scorer.gz.part-02: 81 | bytes: 26214400 82 | file-name: base.scorer 83 | sha256: 4ce013c8c6a15c66bc6644231647c29d3f2e6166bd2c37d855c681cd9e613b20 84 | model/base.scorer.gz.part-03: 85 | bytes: 26214400 86 | file-name: base.scorer 87 | sha256: 84c3bb258ce8b6c8f5774df6038f1c1b24c3232f90b9b499b9facc91a12faf32 88 | model/base.scorer.gz.part-04: 89 | bytes: 26214400 90 | file-name: base.scorer 91 | sha256: 1b96687e5b2a873e6e1291f7ba0bd7ccf64eae246dc12ea7a1f434ced5a29c54 92 | model/base.scorer.gz.part-05: 93 | bytes: 26214400 94 | file-name: base.scorer 95 | sha256: be87e5d5d82d7167739c81259982594188fad86a000596956810f3026a4f2dcb 96 | model/base.scorer.gz.part-06: 97 | bytes: 26214400 98 | file-name: base.scorer 99 | sha256: a8c275e4c2df960fe05be4d0891d72535512cfe949e88dccc62783ae81fb7351 100 | model/base.scorer.gz.part-07: 101 | bytes: 26214400 102 | file-name: base.scorer 103 | sha256: 1af1cca7ae1c8687911a76b26648406855ebe110e4bc49d531d737a002a61822 104 | model/base.scorer.gz.part-08: 105 | bytes: 26214400 106 | file-name: base.scorer 107 | sha256: 3be0ba33212e45411f4d912f0f560f6328dddca34e363a70afa63c8bf0dd2e6d 108 | model/base.scorer.gz.part-09: 109 | bytes: 11758932 110 | file-name: base.scorer 111 | sha256: 0f723a25d69c8ea759b4c9bc34e7bbfd9a24479856f9216d9e48fe2580540b3e 112 | 113 | # Files needed for mixed language modeling (pre-built + custom) 114 | mixed-language-model: 115 | base_language_model.fst.gz.part-00: 116 | bytes: 26214400 117 | file-name: base_language_model.fst 118 | sha256: 29b2a945f08c2a89927ee881b70e5c9b03328f23b210550873f30d9cc7e254cf 119 | base_language_model.fst.gz.part-01: 120 | bytes: 26214400 121 | file-name: base_language_model.fst 122 | sha256: 704461bb5e505044c5f4f64e7258ff381e1b51284cb1c8c112efcf76c061f709 123 | 124 | # Sample sentences, etc. 125 | examples: 126 | sentences.ini: 127 | bytes: 317 128 | sha256: 5942f84939e5a7ccc4f062726defae82628d5e6d899d485eeb8e77f9c44a2a20 129 | -------------------------------------------------------------------------------- /etc/profiles/es_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Spanish Pocketsphinx model" 3 | 4 | default: 5 | acoustic_model/feat.params: 6 | bytes: 148 7 | sha256: 70c003b0bdfaab851d7253ccb1b3df5d7a0ac7c34e9c6fa8c419ea8bbedf2330 8 | acoustic_model/mdef: 9 | bytes: 1351979 10 | sha256: b60ee4dce093ff3f501d4a66ba23ae3419c117827f0f42b9aa3f31bbeaea21c2 11 | acoustic_model/means: 12 | bytes: 519244 13 | sha256: 4e924f28348fa2baa791942c79a1dc0725b6b9113b5a7cb0d3064933c0d526ad 14 | acoustic_model/mixture_weights: 15 | bytes: 6263872 16 | sha256: 16e2043ee1d99acab306c75634d68de6134afb2e6bbadba84c200ec4d6772f28 17 | acoustic_model/noisedict: 18 | bytes: 27 19 | sha256: af281d8c53f0a24ace7193e014dcb876e4e517f96b34791d5cd3e71fc69fc83b 20 | acoustic_model/transition_matrices: 21 | bytes: 1312 22 | sha256: 65001e41c829cbe79c161a15eb07e29c2b63c5aac00d4e97a55db75bc35e3d6b 23 | acoustic_model/variances: 24 | bytes: 519244 25 | sha256: 6b31010a78b7c09c710a1e5e2dd6d1158c5c5c9637e1420bd6b76fe3859cc117 26 | base_dictionary.txt.gz: 27 | bytes: 151621 28 | sha256: 807f2813dfdfa07852148951d2b58681eab9d7e97128b41b78dc44d936fc8eab 29 | profile.yml: 30 | bytes: 387 31 | sha256: 98f0f06bc5988aecdb4f43047f33d987c844a38894cf170bd1b2cbd1ca2812a6 32 | slot_programs/rhasspy/number: 33 | bytes: 651 34 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 35 | stop_words.txt: 36 | bytes: 2178 37 | sha256: 7450c1f7196c161d9a4e537de43f88bd55cdb64028c808e14587bd7a6ac7be3d 38 | 39 | # Files needed to guess word pronunciations 40 | grapheme-to-phoneme: 41 | g2p.fst.gz: 42 | bytes: 2034897 43 | sha256: 870c3ac620565059ba3a91709e4df674593254328c10af4e8def9815d025fe07 44 | 45 | # Files needed for open transcription (pre-built model) 46 | open-transcription: 47 | base_language_model.txt.gz: 48 | bytes: 23391433 49 | sha256: 3a5426601c24eaaf6658734d078f6fed4bb1a38b6521f2feecf2bfa755e350b4 50 | 51 | # Files needed for mixed language modeling (pre-built + custom) 52 | mixed-language-model: 53 | base_language_model.fst.gz: 54 | bytes: 25525538 55 | sha256: 6eabe8b20ae5366bca5ef238e1d7d7b678961375fe1182efc7e1550d2a1070c3 56 | 57 | # Files needed for text to speech 58 | text-to-speech: 59 | espeak_phonemes.txt: 60 | bytes: 108 61 | sha256: c854dd0c98362203bc45c63a3ed2fb4677d35b95e5f893ce7e9539b2547c4d40 62 | 63 | # Sample sentences, etc. 64 | examples: 65 | custom_words.txt: 66 | bytes: 82 67 | sha256: 6294114c9169771da6bc98b85cbe48291ca907bc789c5d3bda6ec0f9d47a3f70 68 | sentences.ini: 69 | bytes: 317 70 | sha256: 5942f84939e5a7ccc4f062726defae82628d5e6d899d485eeb8e77f9c44a2a20 71 | -------------------------------------------------------------------------------- /etc/profiles/fr_deepspeech-jaco.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "French DeepSpeech v0.9 model from https://gitlab.com/Jaco-Assistant" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/45288df39e5e620e9cb282d4653d341b920c09e0/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 7652 8 | sha256: e3a994d82e644b03a792a930f574002658412f62407f5fee083f2555c5f23118 9 | model/alphabet.txt: 10 | bytes: 70 11 | sha256: 2cdb37365c02fefd480c98329de8652bd4f129cff4befdc2aabbf8b1a3d76a5a 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: 26f0592776f3c78a85bd1bc0315b2f1f2aa6b3d549a3cfd83380da010d01c3b7 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: d4c7f5c232d2743986aff4ddbb4b0304816cdda8b3515e5333e7156e7a471ab0 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: fba12d8c267f92a55ae22daf5cb773de8d0d346d7877480603c3479b46bb19e6 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 9744112f7068bce616ddfd8ecd6613510e7603a65086d3f81f99f6610968ee1d 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 0bab06790d504c39f4053734f4023159c6f0d8763f8dc84ddc1f52c766fc9270 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: 53aa6215041bd19c609133e332f61b917ff6e10a94c43b033188c7b437156fb3 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18208359 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: f3594fb8abcba51f7dff808170ef8ad0035dea32bdb1a787ad808e3e29295abc 54 | model/output_graph.tflite.gz: 55 | bytes: 20519594 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: 02a0485ca619805ca9a4a88b03453f3eb053dd7936903915564390daa171dbb8 63 | profile.yml: 64 | bytes: 884 65 | sha256: 5c2c20309548de8b8934719a5df89e13ace8ffcb9e46c2b6a8fdf7640044eaad 66 | stop_words.txt: 67 | bytes: 805 68 | sha256: 6ca60ffd4257c35cc3981d0e923881c3d8b85c8c0f6ef7a6697bd4ae97200fd8 69 | 70 | # Files needed for open transcription (pre-built model) 71 | open-transcription: 72 | model/base.scorer.gz.part-00: 73 | bytes: 26214400 74 | file-name: base.scorer 75 | sha256: 7e96ce13d660b236c39c7ea022202a25368537f0d9e17fbb92b2640fea073515 76 | model/base.scorer.gz.part-01: 77 | bytes: 26214400 78 | file-name: base.scorer 79 | sha256: f21ff900e778439bc5c80e2ff984a2f7fc62610a2db5fe613d9e144a1b33ab09 80 | model/base.scorer.gz.part-02: 81 | bytes: 26214400 82 | file-name: base.scorer 83 | sha256: 81037cf4c2e0db6492877b0d7bce0fff1816c861331aacaf5896cfa83efb6fca 84 | model/base.scorer.gz.part-03: 85 | bytes: 26214400 86 | file-name: base.scorer 87 | sha256: acef09cbe489208cb4f97e6247aa800c203d8a035d51e669273fbe31c0e40431 88 | model/base.scorer.gz.part-04: 89 | bytes: 26214400 90 | file-name: base.scorer 91 | sha256: 642a2ecb4f27ee509dcef510f4230988942f29333a40541184f4b2fd7803e015 92 | model/base.scorer.gz.part-05: 93 | bytes: 26214400 94 | file-name: base.scorer 95 | sha256: 9f028daf8f4e310eefd6bd70f13e1762d80ccbad6796a637948d028f5a78940d 96 | model/base.scorer.gz.part-06: 97 | bytes: 26214400 98 | file-name: base.scorer 99 | sha256: dcc7ec71fb70659ff568ae1879c7256de6c531388a8ae47d43539093b404074d 100 | model/base.scorer.gz.part-07: 101 | bytes: 26214400 102 | file-name: base.scorer 103 | sha256: 0a87b5b001bbdcf80902498c8d8356264f009b4c8632e54b8dbd85f4d73abc22 104 | model/base.scorer.gz.part-08: 105 | bytes: 16230543 106 | file-name: base.scorer 107 | sha256: 8152f00b626dd7e772690117f399f2f849bc7d791b854c2926989467691e95ae 108 | 109 | # Files needed for mixed language modeling (pre-built + custom) 110 | mixed-language-model: 111 | base_language_model.fst.gz: 112 | bytes: 27944625 113 | file-name: base_language_model.fst 114 | sha256: 2ec34a484eb4fbb17175f66e52bcab2e0933a3b797938471c0826ff1253f3ea2 115 | 116 | # Sample sentences, etc. 117 | examples: 118 | sentences.ini: 119 | bytes: 459 120 | sha256: bbab9a0c7f0a87aefdc6ef5ea72f81d69d1da52312479bf51f0a4fa4f32b4201 121 | -------------------------------------------------------------------------------- /etc/profiles/fr_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "French Pocketsphinx model from Alpha Cephei" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 1525 7 | sha256: 9325b9a7e4605f5752d17c36ac2753dc226d0c9f75cfddd4aa0d0417b0af52f2 8 | acoustic_model/feat.params: 9 | bytes: 112 10 | sha256: 44ec61d22acd9e9ba8f49fee48800a06da5139d92d3491a35d0660d8a4349838 11 | acoustic_model/feature_transform: 12 | bytes: 5036 13 | sha256: c20b2a4b36c6e232ea90d3c7788f1c076de2e2fb6c8c13a1e345a59b71fa3033 14 | acoustic_model/mdef: 15 | bytes: 4950081 16 | sha256: 93bbae9f2c3ea724d7b200ee9dcbc404c0d6375cdf3b2950cdc6f5f81c4c0759 17 | acoustic_model/means: 18 | bytes: 2158660 19 | sha256: a4999995c08218de512fc0580b567bc819ece0f859b15537e66386655ab1ccb8 20 | acoustic_model/mixture_weights: 21 | bytes: 67520 22 | sha256: c59e1d4c1d4ccebacfd9d0e265b666ac0c796229a9fb8de6802be2bc0f1e6eda 23 | acoustic_model/noisedict: 24 | bytes: 27 25 | sha256: af281d8c53f0a24ace7193e014dcb876e4e517f96b34791d5cd3e71fc69fc83b 26 | acoustic_model/transition_matrices: 27 | bytes: 1792 28 | sha256: 23db4d08509bd0d13836933869b58a010772cc1bf753b2b22ad686ad6e9e9175 29 | acoustic_model/variances: 30 | bytes: 2158660 31 | sha256: ca4ffa8f0f9f94fb9137bd74a660b5ab94c71aa33465759953b774a4a02f15ff 32 | base_dictionary.txt.gz: 33 | bytes: 623346 34 | file-name: base_dictionary.txt 35 | sha256: 73b2ec95b796e8dfb180b06a48b43b5b00b987c3cc0406d0c04c7a1686a9b8a0 36 | profile.yml: 37 | bytes: 479 38 | sha256: 29e45e9f6668a99ef99b05bc6cdcfb1fc51a83ee98b802afc50acb3c55de1d85 39 | slot_programs/rhasspy/number: 40 | bytes: 651 41 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 42 | stop_words.txt: 43 | bytes: 805 44 | sha256: 6ca60ffd4257c35cc3981d0e923881c3d8b85c8c0f6ef7a6697bd4ae97200fd8 45 | 46 | # Files needed to guess word pronunciations 47 | grapheme-to-phoneme: 48 | g2p.fst.gz: 49 | bytes: 7520942 50 | file-name: g2p.fst 51 | sha256: 5f0669a9bb084c59356779960a22d643b532cc04690f8e458718fc1dfd7a0c00 52 | 53 | # Files needed for open transcription (pre-built model) 54 | open-transcription: 55 | base_language_model.txt.gz: 56 | bytes: 21453362 57 | file-name: base_language_model.txt 58 | sha256: 5bad59d04c983afeea8a384851257c4f9fcf14c1b4a9a9d1949d882b64cf7fe5 59 | 60 | # Files needed for mixed language modeling (pre-built + custom) 61 | mixed-language-model: 62 | base_language_model.fst.gz: 63 | bytes: 27944649 64 | file-name: base_language_model.fst 65 | sha256: 955e4e8973f054c1e857a3ae5ba528f739428a0b167682eac41cd1469cf31598 66 | 67 | # Files needed for text to speech 68 | text-to-speech: 69 | espeak_phonemes.txt: 70 | bytes: 180 71 | sha256: 62e9d039be7bf60b1632ab33881cc565b87b8cb711274465a216db1a4417281b 72 | marytts/marytts-lang-fr-5.2.jar: 73 | bytes: 301765 74 | sha256: 317fb669755278f93adf24e58ede0d39d041df58f0aac8296697dc883063bfc8 75 | marytts/voice-upmc-pierre-hsmm-5.2.jar: 76 | bytes: 1558945 77 | sha256: 14b320fd7c8808eace209471982ecd24e8db223e85bfa87d5ebfad45131b14a8 78 | 79 | # Sample sentences, etc. 80 | examples: 81 | custom_words.txt: 82 | bytes: 37 83 | sha256: 0a177acaf089ceb8c3a2cbef36b73e507679e2bf8a07a054b3544ad954c14e97 84 | sentences.ini: 85 | bytes: 462 86 | sha256: 0647a3ade5a0c246fc4d51028871440e82a0292db51c199fba6018066757e7d2 87 | -------------------------------------------------------------------------------- /etc/profiles/hi_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Hindi Pocketsphinx model" 3 | default: 4 | LICENSE: 5 | bytes: 17993 6 | sha256: e1aa57b16350b3f421e49bd8b9401252d2b667616def1718c72576286249ae4f 7 | acoustic_model/feat.params: 8 | bytes: 86 9 | sha256: 0e10ccb900bff1f17991481a7085febb7185c2d19c6e139fb19a5bf610475e59 10 | acoustic_model/mdef: 11 | bytes: 4821459 12 | sha256: 4c482fc5a413d656277a5a928ce22741a6fecc89679b56f3079afb00aaa5c184 13 | acoustic_model/means: 14 | bytes: 1468958 15 | sha256: 01ecb2595b8d6a3e0d12c5220c57fa382a5174dd206a0fffe1c40d52c861f5f6 16 | acoustic_model/mixture_weights: 17 | bytes: 37722 18 | sha256: dc862b62007a5cc1c0bd56f041a536fa9c6ab70b695628f661f66493c834afd1 19 | acoustic_model/noisedict: 20 | bytes: 27 21 | sha256: e5ba928c9f8950b4afce4ea84e6be0d219e878b1f926e5edcc5ca329d06eecc9 22 | acoustic_model/transition_matrices: 23 | bytes: 2890 24 | sha256: 6f654dc9778515cb4ae80007fba3381a339b82d46d6e8b9dbbd6e7781a47ed0c 25 | acoustic_model/variances: 26 | bytes: 1468958 27 | sha256: 84b4af7d01f88aef0d535d47c992146258ea6de64d55c843482f43a2fc156ef5 28 | base_dictionary.txt.gz: 29 | bytes: 55657 30 | file-name: base_dictionary.txt 31 | sha256: f55b7e4d087f555260e22b80fb4127fe730b5ef3cd59f2c4ab8aaf08a7b8ac00 32 | profile.yml: 33 | bytes: 385 34 | sha256: 0f2e1a43c9cf52696078681da8e8de3d70df9143a8b9fd4813bf46b8ca6472da 35 | slot_programs/rhasspy/number: 36 | bytes: 651 37 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 38 | stop_words.txt: 39 | bytes: 0 40 | sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 41 | 42 | # Files needed to guess word pronunciations 43 | grapheme-to-phoneme: 44 | g2p.fst.gz: 45 | bytes: 969336 46 | file-name: g2p.fst 47 | sha256: bef52289248bd358893dadb64b9a957e88754c524cb3875043b1e876ca57c335 48 | 49 | # Files needed for open transcription (pre-built model) 50 | open-transcription: 51 | base_language_model.txt.gz: 52 | bytes: 340008 53 | file-name: base_language_model.txt 54 | sha256: 4dd30cd547671f1bba0dbb0ed5231beaf95209b845f8e784400d0f01bbaf3178 55 | 56 | # Files needed for text to speech 57 | text-to-speech: 58 | espeak_phonemes.txt: 59 | bytes: 0 60 | sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 61 | 62 | # Sample sentences, etc. 63 | examples: 64 | sentences.ini: 65 | bytes: 174 66 | sha256: b4dde8267f67ad66be058e1cba48c4ece0cf7255ad40b5425cc043b400a27889 67 | -------------------------------------------------------------------------------- /etc/profiles/it_deepspeech-jaco.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Italian Kaldi model from https://gitlab.com/Jaco-Assistant/deepspeech-polyglot" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/4c85f1412f36462888a47d47a0df294d5193a1c1/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 7652 8 | sha256: e3a994d82e644b03a792a930f574002658412f62407f5fee083f2555c5f23118 9 | model/alphabet.txt: 10 | bytes: 329 11 | sha256: a29eab570b46a5aa60e2bfeaa40fb15ff0b4ce995bc16ba957ddf1a5e1f35304 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: 969c8ae46b11fbbfb4cff273af2988efb65a79e0fb0e503e3face20d856e686e 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: 74869078de7790b7a5589a0f36b21a74e23976bcea1eb612f9a9054f7ad2d458 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: aadb06bf918be45732499f65e3227d91bd6d3c2e11560d39fd2a8524d4eaaff7 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 97a67949842106514d163abe18c43abed9b5915e3a9536a3bbd95d9f6e328121 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 80f21a07cea74ec62325c1f9ce83e2c66a48a28822bbec775e4526ea9ab3c57f 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: f5a4eb93ac281855a61cf1e6a60e9fe009bd192aa45c91bb7096d2b6f8c48b09 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18071736 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: 6e446aeeccbbc0b38bed4bcabdd0df9215c5df67369671d058ce5784ce21646d 54 | model/output_graph.tflite.gz: 55 | bytes: 20542809 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: caa6336dd638e6e43ef03ecfbc16bd0619a9cabe1fa7171bd223180f6cee0575 63 | profile.yml: 64 | bytes: 845 65 | sha256: 71362d11d6c8112c2bd19e7665c9528c33dff7a1cd46c1ff01f8d224f69d184c 66 | slot_programs/rhasspy/number: 67 | bytes: 651 68 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 69 | stop_words.txt: 70 | bytes: 1654 71 | sha256: 293d7841f198e4012f49e8e5653c3bfd073a58cea1259fa2c0fcec894167b628 72 | 73 | # Files needed for open transcription (pre-built model) 74 | open-transcription: 75 | model/base.scorer.gz: 76 | bytes: 5350776 77 | sha256: 297341e8c43f668906fb9d191a085812b7ac03bbc1758aac14f3025a918544a2 78 | 79 | # Files needed for mixed language modeling (pre-built + custom) 80 | mixed-language-model: 81 | base_language_model.fst.gz.part-00: 82 | bytes: 26214400 83 | file-name: base-language-model.fst 84 | sha256: 3c4247c91ea26ae9a51510cff99ffef62eaefd922887adaea2d73f1119056b8f 85 | base_language_model.fst.gz.part-01: 86 | bytes: 12756015 87 | file-name: base-language-model.fst 88 | sha256: fb9c017385b822b1246811f2108dc41445aeb9651435dd85a7dd9f1da25501fa 89 | 90 | # Sample sentences, etc. 91 | examples: 92 | sentences.ini: 93 | bytes: 307 94 | sha256: 4c6263e3a5a2a24031873933168a9e11b38b6072b7280d7e858b2d1448088cfa 95 | -------------------------------------------------------------------------------- /etc/profiles/it_deepspeech-mozillaitalia.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Italian Kaldi model from https://github.com/MozillaItalia/DeepSpeech-Italian-Model" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/b09e25a482b0901c3db32a937694663ca631ae45/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 35150 8 | sha256: e57f1c320b8cf8798a7d2ff83a6f9e06a33a03585f6e065fea97f1d86db84052 9 | model/alphabet.txt: 10 | bytes: 86 11 | sha256: 7dc989af1ae3fd10b4d47af104eef04bbb522fab1ad6b7e843339e3e9d3f9e3b 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: 33c94690f2f807ca9d1f30f54513c8ce330b0bcb26595c35d7edb757c3377e71 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: 5d743fb74b00454ed4866b85f52f8ccbdac93c16c5be3462d5eaa74a9988f5c0 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: 5f0d265fa695781f7bc98ba8733826519660322973367fa1368f17038557d95c 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 6e0e7b114c64daa8de7f4d00ee76169eebc8610882de4e9f484d74f8d2afe714 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: b34f675ab85d641f50b522e5710a95418c9c4d43607045a0388350c9816cd725 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: 104738a47a74aba958dae7a88e09d30caf01172cbafc39925b7d9dd490923a6e 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18136891 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: 5a76a31c6ba48eddc8f5feae8a32cd335671a09747610165177e315f9eeb0974 54 | model/output_graph.tflite.gz: 55 | bytes: 20690678 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: d4bae454289cd7baf8e0984ee530ece7198dfd4247169d1cb096f99d7269737e 63 | profile.yml: 64 | bytes: 848 65 | sha256: 31c87154de9c74d20d26bbd4218027c415ecb375968fc831cfa37273e7264f06 66 | slot_programs/rhasspy/number: 67 | bytes: 651 68 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 69 | stop_words.txt: 70 | bytes: 1654 71 | sha256: 293d7841f198e4012f49e8e5653c3bfd073a58cea1259fa2c0fcec894167b628 72 | 73 | # Files needed for open transcription (pre-built model) 74 | open-transcription: 75 | model/base.scorer.gz.part-00: 76 | bytes: 26214400 77 | file-name: base.scorer 78 | sha256: e197a4cc1ed47e33dddb7d8a660174d250a5d172e677fe1ac637f9ebdbf8fabc 79 | model/base.scorer.gz.part-01: 80 | bytes: 26214400 81 | file-name: base.scorer 82 | sha256: 7329e9f599a87f3aed2aa2f63dd80187edd6bfcb796f4f6b4e98e233a9942b3b 83 | model/base.scorer.gz.part-02: 84 | bytes: 26214400 85 | file-name: base.scorer 86 | sha256: 7dadfdb1b66b68635f5965b005b2fe6e5b750f47da1d72ceb8de76b22105eb33 87 | model/base.scorer.gz.part-03: 88 | bytes: 26214400 89 | file-name: base.scorer 90 | sha256: 1cc639afeae4a7ce92e2070f9ecbadd33568df1a8600a2ed0164d658dc98b74c 91 | model/base.scorer.gz.part-04: 92 | bytes: 26214400 93 | file-name: base.scorer 94 | sha256: 5be220713c30419a9aa0acf3a4d999a225fd444a893b1af7f780296ca55a8136 95 | model/base.scorer.gz.part-05: 96 | bytes: 26214400 97 | file-name: base.scorer 98 | sha256: c921d3040b69ce5afe0787226b5f42237c699a734ef3852a216680d7dd4991f6 99 | model/base.scorer.gz.part-06: 100 | bytes: 3428548 101 | file-name: base.scorer 102 | sha256: 7746113e53597507fb34feb8da91d3ff29875098f1d96d80fddf4f617159488c 103 | 104 | # Files needed for mixed language modeling (pre-built + custom) 105 | mixed-language-model: 106 | base_language_model.fst.gz.part-00: 107 | bytes: 26214400 108 | file-name: base-language-model.fst 109 | sha256: 3c4247c91ea26ae9a51510cff99ffef62eaefd922887adaea2d73f1119056b8f 110 | base_language_model.fst.gz.part-01: 111 | bytes: 12756015 112 | file-name: base-language-model.fst 113 | sha256: fb9c017385b822b1246811f2108dc41445aeb9651435dd85a7dd9f1da25501fa 114 | 115 | # Sample sentences, etc. 116 | examples: 117 | sentences.ini: 118 | bytes: 307 119 | sha256: 4c6263e3a5a2a24031873933168a9e11b38b6072b7280d7e858b2d1448088cfa 120 | -------------------------------------------------------------------------------- /etc/profiles/it_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Italian Pocketsphinx model" 3 | default: 4 | LICENSE: 5 | bytes: 8 6 | sha256: c80c3db2b2cb606bacf75bac6c2e4b9221e58958b5c800a6b0031b726c3f4bc1 7 | acoustic_model/feat.params: 8 | bytes: 127 9 | sha256: 187e5ed913fa86af61d155e27d3e7c6b928b4e4fa15621d68b009272bc8ef1da 10 | acoustic_model/feature_transform: 11 | bytes: 5036 12 | sha256: 2979d79ba3a09eb353aa88624015e8d46defbedab688d8f68b9f66236529aaef 13 | acoustic_model/mdef: 14 | bytes: 2257434 15 | sha256: 4372886afc37c4b1616dc928abb4a6549bdd55cd62255ff74b584446cb67b762 16 | acoustic_model/means: 17 | bytes: 4329540 18 | sha256: baa7dc7d3dd85a894dae008b88ab920b5f581ecbd88457c2e1f953745fc7ca36 19 | acoustic_model/mixture_weights: 20 | bytes: 135360 21 | sha256: cef59a22e3253ee4166862fec20011dfeff857ab2aa90bfb877a37afda5f9b95 22 | acoustic_model/noisedict: 23 | bytes: 27 24 | sha256: af281d8c53f0a24ace7193e014dcb876e4e517f96b34791d5cd3e71fc69fc83b 25 | acoustic_model/transition_matrices: 26 | bytes: 1888 27 | sha256: ee453c4b89ba5478581649f6ed33ab066ab6db9cce1da7ec6b99ae48c210ed3a 28 | acoustic_model/variances: 29 | bytes: 4329540 30 | sha256: ee8360d621478b7bfd5d6b6635209a310ebade3148ca33d553d863d121a899e0 31 | base_dictionary.txt.gz: 32 | bytes: 62841 33 | file-name: base_dictionary.txt 34 | sha256: 291706733ad0418b81241e2da09b9d40280c7bff8e846e1023e097cf59393d3d 35 | profile.yml: 36 | bytes: 479 37 | sha256: 2ec5df50a25f25746e317060a435dff2c1a0ab9ed71c842bf4b0af6a0539ec44 38 | slot_programs/rhasspy/number: 39 | bytes: 651 40 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 41 | stop_words.txt: 42 | bytes: 1654 43 | sha256: 293d7841f198e4012f49e8e5653c3bfd073a58cea1259fa2c0fcec894167b628 44 | 45 | # Files needed to guess word pronunciations 46 | grapheme-to-phoneme: 47 | g2p.fst.gz: 48 | bytes: 768941 49 | file-name: g2p.fst 50 | sha256: 2ae6272a7456687fdaaead9c2d927bb723bd7395abd3dc6dd05f8f245e130b1d 51 | 52 | # Files needed for open transcription (pre-built model) 53 | open-transcription: 54 | base_language_model.txt.gz: 55 | bytes: 242973 56 | file-name: base_language_model.txt 57 | sha256: 1968e91aa9ec7311c3a4416e202c69eb79c5f4dac5714d03529e5d64fdc467c5 58 | 59 | # Files needed for mixed language modeling (pre-built + custom) 60 | mixed-language-model: 61 | base_language_model.fst.gz: 62 | bytes: 290458 63 | file-name: base_language_model.fst 64 | sha256: fcb21a75976fd6d12d91b3ec4fb6a19716cf50c8ecc618678b627a401f0a358a 65 | 66 | # Files needed for text to speech 67 | text-to-speech: 68 | espeak_phonemes.txt: 69 | bytes: 0 70 | sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 71 | marytts/marytts-lang-it-5.2.jar: 72 | bytes: 2597058 73 | sha256: 852c0e3b14fcd2461169ea8f38397f3d878cd0af06e0701857031692dad66311 74 | marytts/voice-istc-lucia-hsmm-5.2.jar: 75 | bytes: 1468104 76 | sha256: 9e7cdb8281334434fc4d9725c6182af3093f15eb9072b53d6fc19b761decf47b 77 | 78 | # Sample sentences, etc. 79 | examples: 80 | custom_words.txt: 81 | bytes: 153 82 | sha256: 3aa283ce4f8947c498a1faa853f474e25330895b07b8b1f51eb928e2c61cec30 83 | sentences.ini: 84 | bytes: 307 85 | sha256: 4c6263e3a5a2a24031873933168a9e11b38b6072b7280d7e858b2d1448088cfa 86 | -------------------------------------------------------------------------------- /etc/profiles/ko-kr_kaldi-montreal.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Korean Kaldi model from https://montreal-forced-aligner.readthedocs.io/en/latest/pretrained_models.html" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 1067 7 | sha256: 1f77beb37e2d62386c87a3b619e18a56d6f8cfc1dccf80016803612c6631384d 8 | acoustic_model/conf/mfcc.conf: 9 | bytes: 92 10 | sha256: 9837b298604f7c0a04e341cbfa6125a23dd96d672c7cab6db67a7660469f7ece 11 | acoustic_model/model/final.mdl.gz: 12 | bytes: 14629239 13 | file-name: final.mdl 14 | sha256: 472ec3cf3adc26738e729338f6b23ef153da754f753a382a5bc0d4efc6b06a3e 15 | acoustic_model/model/tree: 16 | bytes: 339960 17 | sha256: 77cf83696a7161652d2be086bbe45071f58ee80db9b36ece096affebebdd6679 18 | acoustic_model/path.sh: 19 | bytes: 34 20 | sha256: 43d1297df8223a71449a2f92ca420919edf61ca8ab1d58f94c31fe5397ba9d97 21 | acoustic_model/phones/nonsilence_phones.txt: 22 | bytes: 109 23 | sha256: 751be34169af51d77ccb936e1694b3d89aa1dc65b2463b69e5ec34fa4af02e12 24 | acoustic_model/phones/optional_silence.txt: 25 | bytes: 4 26 | sha256: b2897f4f7491c02d663ee433d54dadc3b1b920ce91ce47064e2d031c7625dde7 27 | acoustic_model/phones/silence_phones.txt: 28 | bytes: 11 29 | sha256: 63beafd4cd218ead279dce08a3e93514d657254d54435e3c2d0ba62de358bc48 30 | base_dictionary.txt.gz: 31 | bytes: 309248 32 | file-name: base_dictionary.txt 33 | sha256: 84e4febbb9e2da44c71620ac7a0eaa7a30174e5177fd45f43218e2ffb76bdb72 34 | profile.yml: 35 | bytes: 613 36 | sha256: fc7b83b8d83c26385d6cb2bb2e3e561734e5c0e956d426531f73160e33e315b0 37 | slot_programs/rhasspy/number: 38 | bytes: 651 39 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 40 | stop_words.txt: 41 | bytes: 6264 42 | sha256: 35d32449d8863aa8ca8146cd7248ffbce122d8c3f74f8a307f496bd923a5a0e2 43 | 44 | # Files needed to guess word pronunciations 45 | grapheme-to-phoneme: 46 | g2p.fst.gz: 47 | bytes: 7731489 48 | file-name: g2p.fst 49 | sha256: baeddd7a4bb38d40235aa97654b034e481709d671c8ce7de1bd89722627dbd54 50 | 51 | # Files needed for open transcription (pre-built model) 52 | open-transcription: 53 | base_language_model.txt.gz: 54 | bytes: 829493 55 | file-name: base_language_model.txt 56 | sha256: b0094ac8a061e7e202b444ff3b55d0d884f423559c93664c73e6643d57f14204 57 | acoustic_model/model/graph/HCLG.fst.gz: 58 | bytes: 23797111 59 | file-name: HCLG.fst 60 | sha256: 661a5e69142cf5538e324860d4a045bcad134bc0e38be6516f9dea103db50c7e 61 | acoustic_model/model/graph/num_pdfs: 62 | bytes: 5 63 | sha256: 1cf348eb10f33501dde7d5217d7fedd29e4ac5fc5926badb9e152e68b2080d39 64 | acoustic_model/model/graph/phones.txt: 65 | bytes: 1761 66 | sha256: 692f77583a16d1c794f34ea4c0f8cf7306a8248f5ef6356f50a3dc430b6d36cb 67 | acoustic_model/model/graph/phones/align_lexicon.int: 68 | bytes: 1313071 69 | sha256: e703f183469570273463121e495fb769ca556d442a3afda2557edd43ca55923b 70 | acoustic_model/model/graph/phones/align_lexicon.txt: 71 | bytes: 2026908 72 | sha256: 322b7b7e75ec15b77ba41bc8ef2a2a81fdae49ebe86206d556cb2d387a3dbc47 73 | acoustic_model/model/graph/phones/disambig.int: 74 | bytes: 156 75 | sha256: e3fafeadda6ae1336c546a10ea7ec07fc88b43c67b6c2033900c40cd0ea8db5a 76 | acoustic_model/model/graph/phones/disambig.txt: 77 | bytes: 146 78 | sha256: 3162b994687ed9f2a0b1938c4d86f0028f1a79072e7d26e217a216e2cdcc181e 79 | acoustic_model/model/graph/phones/optional_silence.csl: 80 | bytes: 2 81 | sha256: 4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865 82 | acoustic_model/model/graph/phones/optional_silence.int: 83 | bytes: 2 84 | sha256: 4355a46b19d348dc2f57c046f8ef63d4538ebb936000f3c9ee954a27460dd865 85 | acoustic_model/model/graph/phones/optional_silence.txt: 86 | bytes: 4 87 | sha256: b2897f4f7491c02d663ee433d54dadc3b1b920ce91ce47064e2d031c7625dde7 88 | acoustic_model/model/graph/phones/silence.csl: 89 | bytes: 36 90 | sha256: 1d406213cfb0a47a154d2c517d8b7a03128cc22892d217b0ead039795c32fad4 91 | acoustic_model/model/graph/phones/word_boundary.int: 92 | bytes: 1908 93 | sha256: f289ed53cc26038ec4286ef03246ddd9999ada06c3f27636ff9f679339e5d0cb 94 | acoustic_model/model/graph/phones/word_boundary.txt: 95 | bytes: 2143 96 | sha256: c4950aa44b5b55503b156af8c3819d97c660280b6fb53e668ba8f2e9afa32128 97 | acoustic_model/model/graph/words.txt: 98 | bytes: 607790 99 | sha256: c465a3ecfdcc02bc4381699238243664fffc0d5eab4c24df6b923470c5a79cd4 100 | 101 | # Files needed for mixed language modeling (pre-built + custom) 102 | mixed-language-model: 103 | base_language_model.fst.gz: 104 | bytes: 1124895 105 | file-name: base_language_model.fst 106 | sha256: 21327e6f2ef6d27b11e6bb890d0735bae9b24aebdaf4e63e208508a7dc34bce1 107 | 108 | # Files needed for text to speech 109 | text-to-speech: 110 | espeak_phonemes.txt: 111 | bytes: 240 112 | sha256: 47d2a8a41e4cb850174052f3213791f5937d128fa3d4cb9bda64a70cfba2daa4 113 | 114 | # Sample sentences, etc. 115 | examples: 116 | custom_words.txt: 117 | bytes: 250 118 | sha256: d2a2cb0c5cebcf5f5acb54df0509643b3adb9e0767b588619b055ff68c1a69f9 119 | sentences.ini: 120 | bytes: 300 121 | sha256: 077473fa66af6799564a2d664dbda2425ce669a382d0bf427f2d0f2d7b58b91f 122 | -------------------------------------------------------------------------------- /etc/profiles/kz_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Kazakh Pocketsphinx model" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 8 7 | sha256: c80c3db2b2cb606bacf75bac6c2e4b9221e58958b5c800a6b0031b726c3f4bc1 8 | acoustic_model/feat.params: 9 | bytes: 112 10 | sha256: 44ec61d22acd9e9ba8f49fee48800a06da5139d92d3491a35d0660d8a4349838 11 | acoustic_model/mdef: 12 | bytes: 4033763 13 | sha256: 134ef8a84c2ce623ae217897b183c3f90fe3f41b557b8399a0911727db9f89c5 14 | acoustic_model/means: 15 | bytes: 433124 16 | sha256: a1a1419e2bd119eb1f45ac168cbf6edd76bc594422635574c11cad4121925515 17 | acoustic_model/mixture_weights: 18 | bytes: 11168 19 | sha256: 56dcdfdf78da7b4d03c1c96276a040ede26d76ba411c35a2d53f9ec7174fb8b6 20 | acoustic_model/noisedict: 21 | bytes: 60 22 | sha256: f9f30e70167dfb43605a1d68f86e15c0874655055e0488e34251789b668cf819 23 | acoustic_model/transition_matrices: 24 | bytes: 2416 25 | sha256: ad2b37c7e201e81a7cc4ee0b117cff4daa12fc698e405faf6a65d7175c6ea7b6 26 | acoustic_model/variances: 27 | bytes: 433124 28 | sha256: c92d76a8482b91bd4157262ba2c003771bf4a66713f938b258ad7e7d9721c53f 29 | base_dictionary.txt.gz: 30 | bytes: 79500 31 | file-name: base_dictionary.txt 32 | sha256: 5e12eb3853d97d4251462558810e0838229db7c5a15d38786c34708c37d9e878 33 | profile.yml: 34 | bytes: 395 35 | sha256: c0f0fc42aae335d1a88b6b725b4fc8d02697175a03c0897cb0adca61600cb71d 36 | slot_programs/rhasspy/number: 37 | bytes: 651 38 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 39 | stop_words.txt: 40 | bytes: 3745 41 | sha256: ecd3b7f85444a1f59a1fc75da203f5922408f47495439e5c3b1c794c77c9b340 42 | 43 | # Files needed to guess word pronunciations 44 | grapheme-to-phoneme: 45 | g2p.fst.gz: 46 | bytes: 1045970 47 | file-name: g2p.fst 48 | sha256: 80c9f71e34dae0331842f8a314aacd89bb0b5c93988b865dbcbc3ec69c0bb2e3 49 | 50 | # Files needed for open transcription (pre-built model) 51 | open-transcription: 52 | base_language_model.txt.gz: 53 | bytes: 5296 54 | file-name: base_language_model.txt 55 | sha256: 873a589648aaa7b2979a95f0a75334ff4ec5852c81328e9bc4643ac82c728eda 56 | 57 | # Files needed for mixed language modeling (pre-built + custom) 58 | mixed-language-model: 59 | base_language_model.fst.gz: 60 | bytes: 12819 61 | file-name: base_language_model.fst 62 | sha256: a6492af354a5e508de17de3c29101cef5bce49a9ae34d200240d7eb7af6d0c39 63 | 64 | # Files needed for text to speech 65 | text-to-speech: 66 | espeak_phonemes.txt: 67 | bytes: 224 68 | sha256: 7619b5d51e081635373529a22a10ed26424bbebe22111588659c53075eb6e57f 69 | 70 | # Sample sentences, etc. 71 | examples: 72 | custom_words.txt: 73 | bytes: 283 74 | sha256: d3e9ecc7c7b0b8733afe7e483e7717860a78e3ccbaa561967b4d4673ec4ccc56 75 | sentences.ini: 76 | bytes: 343 77 | sha256: 944b097915dedb37be75a0e56542502e2ed1e3f98b1ab3e2ef0049eed8bb69a3 78 | -------------------------------------------------------------------------------- /etc/profiles/nl_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Dutch Pocketsphinx model" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 475 7 | sha256: 06b65d41d760018d20e81777cfdf2078ec64ed4cd10b85c289add974d63d2693 8 | acoustic_model/feat.params: 9 | bytes: 110 10 | sha256: 6ad725f620b3f0c1d82bba878498630e8be4fa1f0b0bf035a7fe9ffb36f5a735 11 | acoustic_model/feature_transform: 12 | bytes: 5660 13 | sha256: 494a79e7e6c490d0c5ed09d7eb4c48e9c5be3eba4b2c19526291afb8e83c9aa5 14 | acoustic_model/mdef: 15 | bytes: 8570776 16 | sha256: 3190d883beccd5aa235fb484ad1bb0a0657ccf18cfb3c9dcec2305faf56cb526 17 | acoustic_model/means: 18 | bytes: 4877636 19 | sha256: 5c44c96726cb50b85661adf298e825bcddc5a56d834d60640e20d9134a25c1fe 20 | acoustic_model/mixture_weights: 21 | bytes: 135552 22 | sha256: 98cea4bc6e5da58e318be857c810a67fb042da4594f650b2e366848a327e284d 23 | acoustic_model/noisedict: 24 | bytes: 27 25 | sha256: e5ba928c9f8950b4afce4ea84e6be0d219e878b1f926e5edcc5ca329d06eecc9 26 | acoustic_model/transition_matrices: 27 | bytes: 1936 28 | sha256: e39f7eae367ec6ab50b21339ed8a593958ad774c3cda7762a3f8af1fcc7f8b2a 29 | acoustic_model/variances: 30 | bytes: 4877636 31 | sha256: f6896f1ba10dca2a685eeb4c256d8ea35aa9168ea801438b16a22109a3bbbe99 32 | base_dictionary.txt.gz: 33 | bytes: 11462365 34 | sha256: eff8d5860a302650cb9e61175bbcc5945196b8648f6c6fbfa3fad1f9570b785e 35 | file-name: base_dictionary.txt 36 | profile.yml: 37 | bytes: 437 38 | sha256: 4fc7d9e6adc4ce05bb221bed20416de90935f2d481edae56acdbe9d76802077b 39 | slot_programs/rhasspy/number: 40 | bytes: 651 41 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 42 | stop_words.txt: 43 | bytes: 453 44 | sha256: e5a2a7c390fe3ad0c0a132586ed11492b635d66a1f426cd89677f80b07bc76a6 45 | 46 | # Files needed to guess word pronunciations 47 | grapheme-to-phoneme: 48 | g2p.fst.gz.part-00: 49 | bytes: 26214400 50 | sha256: b696573b66cb30b2f8ec3572c07ceb645146158b29b4fd8ab6e080d332a667d5 51 | file-name: g2p.fst 52 | g2p.fst.gz.part-01: 53 | bytes: 26214400 54 | sha256: 8576bc37176b376d276a997486dba8126c06325baba4d223659d4c7b7e3dc0f4 55 | file-name: g2p.fst 56 | g2p.fst.gz.part-02: 57 | bytes: 26214400 58 | sha256: b0efb4a3738aa5a17dbed11cdd26addac5cd86a5be5e3c62ddb708c912114e23 59 | file-name: g2p.fst 60 | g2p.fst.gz.part-03: 61 | bytes: 26214400 62 | sha256: 51d191b4f930812931166bc0fac93502a05b143dee5e81f647eae11b87a66a76 63 | file-name: g2p.fst 64 | g2p.fst.gz.part-04: 65 | bytes: 13859764 66 | sha256: 3bdd797ebf4ec37372d48a59c1f06607ecbd7c3eb567e61d2a895a99925f9d53 67 | file-name: g2p.fst 68 | 69 | # Files needed for open transcription (pre-built model) 70 | open-transcription: 71 | base_language_model.txt.gz: 72 | bytes: 8318068 73 | sha256: 9120608aea8aa8fc12ca873617175474bbade00fe3df9bcb0cdd667b7beedb3b 74 | file-name: base_language_model.txt 75 | 76 | # Files needed for mixed language modeling (pre-built + custom) 77 | mixed-language-model: 78 | base_language_model.fst.gz: 79 | bytes: 9413696 80 | sha256: 3dbda0d59aa68e9bace783b46011bba8466fa42b8bed0e64be2215a19a4fd1b1 81 | file-name: base_language_model.fst 82 | 83 | # Files needed for text to speech 84 | text-to-speech: 85 | espeak_phonemes.txt: 86 | bytes: 171 87 | sha256: 2b56f471a09bf9c6ebc5f1e97c6da1131686a6e76dd41af6a92e266c1f14cbe9 88 | 89 | # Sample sentences, etc. 90 | examples: 91 | custom_words.txt: 92 | bytes: 143 93 | sha256: 32877a68e542067944155d2b615805ae0546b9a9a909060c037f4597287b6bcc 94 | sentences.ini: 95 | bytes: 255 96 | sha256: 8d1a8aa1fe5720b4129091da3548b2b4334ab67c391570c1a938f01df45ecd91 97 | -------------------------------------------------------------------------------- /etc/profiles/pl_deepspeech-jaco.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Polish DeepSpeech v0.9 model from https://gitlab.com/Jaco-Assistant" 3 | url_format: "https://raw.githubusercontent.com/rhasspy/{profile}/76669a0a0750940e5c3d7b77c031006eb447813e/{file}" 4 | 5 | default: 6 | LICENSE: 7 | bytes: 7652 8 | sha256: e3a994d82e644b03a792a930f574002658412f62407f5fee083f2555c5f23118 9 | model/alphabet.txt: 10 | bytes: 354 11 | sha256: dce267a83ebbb71b48e75def4672af5d4d7286bcd04821e38c5caee7b19cef16 12 | model/output_graph.pbmm.gz.part-00: 13 | bytes: 26214400 14 | file-name: output_graph.pbmm 15 | platform: 16 | - machine: x86_64 17 | sha256: 9456b6c9022ff89209575b377a36c2a9004c9b3059eea2ee9701192cc0675f0a 18 | model/output_graph.pbmm.gz.part-01: 19 | bytes: 26214400 20 | file-name: output_graph.pbmm 21 | platform: 22 | - machine: x86_64 23 | sha256: 25f21b57c21d731a9e3675588c299b2538d41deabcabad21065f3ac525727934 24 | model/output_graph.pbmm.gz.part-02: 25 | bytes: 26214400 26 | file-name: output_graph.pbmm 27 | platform: 28 | - machine: x86_64 29 | sha256: f7e07e4a538033e7f62408dd28edcbc52a0251dab4ec2c4d847fff43729a42ea 30 | model/output_graph.pbmm.gz.part-03: 31 | bytes: 26214400 32 | file-name: output_graph.pbmm 33 | platform: 34 | - machine: x86_64 35 | sha256: 0c2925932c20a614d579ac2155e040c38512c0d1db4cffbe77c02be095de1adc 36 | model/output_graph.pbmm.gz.part-04: 37 | bytes: 26214400 38 | file-name: output_graph.pbmm 39 | platform: 40 | - machine: x86_64 41 | sha256: 4e03552b7f0ad2e8656a74d7ab187492b4f783f187a97a8c1de1f2e767a5863a 42 | model/output_graph.pbmm.gz.part-05: 43 | bytes: 26214400 44 | file-name: output_graph.pbmm 45 | platform: 46 | - machine: x86_64 47 | sha256: 6f9bde9624d972f7f93659de0c56ecc4cad00dbc726e10878c1131af6e7c1b59 48 | model/output_graph.pbmm.gz.part-06: 49 | bytes: 18128454 50 | file-name: output_graph.pbmm 51 | platform: 52 | - machine: x86_64 53 | sha256: 838a1fb6ca160bc33077cf4806f5dd0cd910833c050bba715c36cd1c2a67bd84 54 | model/output_graph.tflite.gz: 55 | bytes: 20752162 56 | file-name: output_graph.tflite 57 | platform: 58 | - machine: armv6l 59 | - machine: armv7l 60 | - machine: arm64 61 | - machine: aarch64 62 | sha256: e50504822cb058f906955cfdcecae509997e631a6aeab20245d280e3acfb750e 63 | profile.yml: 64 | bytes: 884 65 | sha256: 6a654f5c1089e5174313ace1a1dd7c8b0d000ab4efed97225c37aff4f298e036 66 | stop_words.txt: 67 | bytes: 2016 68 | sha256: cf503834e5f967116b5b351c04c29df76fc40785e09849d613bf1baf61b29215 69 | 70 | # Files needed for open transcription (pre-built model) 71 | open-transcription: 72 | model/base.scorer.gz: 73 | bytes: 3000583 74 | sha256: 26fcf7c07024a58c8fee255b94f70b9b2552e97eeb16690c25633bcfbdae70bd 75 | 76 | # Sample sentences, etc. 77 | examples: 78 | sentences.ini: 79 | bytes: 306 80 | sha256: eb84528c4c9cfa53bd5e694b3afc591d5904d7c35e3cfcdc69e5d87b8412b11e 81 | -------------------------------------------------------------------------------- /etc/profiles/pt-br_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Portuguese model for Pocketsphinx" 3 | 4 | default: 5 | acoustic_model/feat.params: 6 | bytes: 112 7 | sha256: 44ec61d22acd9e9ba8f49fee48800a06da5139d92d3491a35d0660d8a4349838 8 | acoustic_model/mdef: 9 | bytes: 4927793 10 | sha256: 8d394659b7db658600d25f2d2be812eaf66c14314483d1d733010c923d4c60bc 11 | acoustic_model/means: 12 | bytes: 395684 13 | sha256: bb2b94f2f512c374405032e5ed1fd2ff84ad1231d0ca16d142187daab812aacf 14 | acoustic_model/mixture_weights: 15 | bytes: 10208 16 | sha256: 09fe80fe6ef7648589aa35d9f25b9b69af1a4d8dc03e9b4c8afba6d21dbb46f5 17 | acoustic_model/noisedict: 18 | bytes: 17 19 | sha256: d007e078a0e1ad3e0ae50bbe5cc066105bb2eb598b2ac6376f9adbd0f40105c8 20 | acoustic_model/transition_matrices: 21 | bytes: 1936 22 | sha256: 90983fe382076eff86fac1df6663b30e7679f7a53df9b436ac8bbcc79517da88 23 | acoustic_model/variances: 24 | bytes: 395684 25 | sha256: b51d277e8211918db6569f32841c780c763c7ea9c8b68f244753114026575966 26 | base_dictionary.txt.gz: 27 | bytes: 409151 28 | sha256: 30005834fa6bcc55c90c292aac64757debc45c72d8db5a2d4aa5e805cf927af9 29 | profile.yml: 30 | bytes: 448 31 | sha256: c64bc7c0e3aa65d671ef9558430bfa26eef3bd6ccf1d959099ca61cce9cdf862 32 | slot_programs/rhasspy/number: 33 | bytes: 651 34 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 35 | stop_words.txt: 36 | bytes: 1267 37 | sha256: 5f305fac1d830620eeb9b2a68a1b96bf26f65c2e9257d56c8413d6fb36ed6cac 38 | 39 | # Files needed to guess word pronunciations 40 | grapheme-to-phoneme: 41 | g2p.fst.gz: 42 | bytes: 5537250 43 | sha256: 535c106132d9d1e00f17668be93002408d9c69a77d40ec32342d20abf156d90f 44 | 45 | # Files needed for open transcription (pre-built model) 46 | open-transcription: 47 | base_language_model.txt.gz: 48 | bytes: 1206006 49 | sha256: 6d6aa2b38e075b01edf8160beea2d7c3d68add73c468362b18a25e58e6853a4a 50 | 51 | # Files needed for mixed language modeling (pre-built + custom) 52 | mixed-language-model: 53 | base_language_model.fst.gz: 54 | bytes: 1576373 55 | sha256: 0d5060914b9ca3b535a6cc2623616d4756995558fa73912e7c6f22fe84aed74a 56 | 57 | # Sample sentences, etc. 58 | examples: 59 | custom_words.txt: 60 | bytes: 67 61 | sha256: 1f7bf0f944451bcf036158ea2124f9732fd3a85399157c3dc3058c93c03852a2 62 | sentences.ini: 63 | bytes: 449 64 | sha256: 76a7145fb7dfbc97425e939b62576e1c103391b64d1d49b1af7308ba9cc1d364 65 | -------------------------------------------------------------------------------- /etc/profiles/ru_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Russian Pocketsphinx model" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 475 7 | sha256: 06b65d41d760018d20e81777cfdf2078ec64ed4cd10b85c289add974d63d2693 8 | acoustic_model/feat.params: 9 | bytes: 128 10 | sha256: 1f2f06f7a7400bc3a890d1944b9c3b5d768c8925a5040f43ea550ad40daf9752 11 | acoustic_model/feature_transform: 12 | bytes: 5660 13 | sha256: 46fbfd07e2cc07c852021a86b4008afe42d5acd1f0b62ef06345f21130a5d9b9 14 | acoustic_model/mdef: 15 | bytes: 14135693 16 | sha256: d86194aba8a4956efcb36d879f4dca24fc8e1aefb9f2735182b7b40453d441a4 17 | acoustic_model/means: 18 | bytes: 23717444 19 | sha256: f747d6027430d59c185be21e6ef658da6e05d186c22221682e26e69a38d9ad65 20 | acoustic_model/mixture_weights: 21 | bytes: 658880 22 | sha256: 9b73e3e6ba4ead3243b0970fea4ab4a947a9cc73365382c775c9fdcbfad25e5f 23 | acoustic_model/noisedict: 24 | bytes: 27 25 | sha256: af281d8c53f0a24ace7193e014dcb876e4e517f96b34791d5cd3e71fc69fc83b 26 | acoustic_model/transition_matrices: 27 | bytes: 2416 28 | sha256: b8d5dd0aa55e765f715ecf218060ab4c8f921220d0ba0f6ab851bc0d06725348 29 | acoustic_model/variances: 30 | bytes: 23717444 31 | sha256: 294fa8bf88f6371d490cbcea3f6eef49e18d6f6cbb18db69e125e54054596926 32 | base_dictionary.txt.gz: 33 | bytes: 3901055 34 | file-name: base_dictionary.txt 35 | sha256: 4843b95d233d89565c4539390e60b4f12c9a45c4b6ad84829f63ba5933236375 36 | profile.yml: 37 | bytes: 477 38 | sha256: 401ff004f28171d69bee474ada1b78d27fe20f20a398a21f5683344b7ca3d30d 39 | stop_words.txt: 40 | bytes: 1235 41 | sha256: 1743191192b4a4f77fcc216499455dc00c1b8626fdd407076a8deefff80e3d59 42 | 43 | # Files needed to guess word pronunciations 44 | grapheme-to-phoneme: 45 | g2p.fst.gz: 46 | bytes: 33185095 47 | file-name: g2p.fst 48 | sha256: 230b436f1fb7d47f2fa0339df36fa2b2eac0173f69b64d8ff3c091c044f9784b 49 | 50 | # Files needed for open transcription (pre-built model) 51 | open-transcription: 52 | base_language_model.txt.gz.part-00: 53 | bytes: 26214400 54 | file-name: base_language_model.txt 55 | sha256: 46833f7bbf708a7570ba8e43c3f736a3db2790eb829f0521280c68a531c029c2 56 | base_language_model.txt.gz.part-01: 57 | bytes: 26214400 58 | file-name: base_language_model.txt 59 | sha256: 6a29698de220e9bf667921ae424c5494a8f2e5031f22e93973fa9588db93f5ca 60 | base_language_model.txt.gz.part-02: 61 | bytes: 26214400 62 | file-name: base_language_model.txt 63 | sha256: 379bbd7fd52ff00ee4e1e96648a290e15e7bc548fcb1bf0ede427ce405dc5d1d 64 | base_language_model.txt.gz.part-03: 65 | bytes: 26214400 66 | file-name: base_language_model.txt 67 | sha256: a2efdde9df7396cc12c1fa5270eb0188c86e5b42ac253e6ab86316cbde9691d9 68 | base_language_model.txt.gz.part-04: 69 | bytes: 4015297 70 | file-name: base_language_model.txt 71 | sha256: ee05a8993f53558840f0fcb85d052bbf653a46d1b31bcd1bddda5085c8278668 72 | 73 | # Files needed for mixed language modeling (pre-built + custom) 74 | mixed-language-model: 75 | base_language_model.fst.gz.part-00: 76 | bytes: 26214400 77 | file-name: base_language_model.fst 78 | sha256: 9c292ace6ff6de20ab09b925bd3b4d53d99a28960ae63b74d68c49085208e1bc 79 | base_language_model.fst.gz.part-01: 80 | bytes: 26214400 81 | file-name: base_language_model.fst 82 | sha256: 9ce5890127c3e5f07046c24cdc09ea8b7faad1292cbcfed99c5484774ee2327f 83 | base_language_model.fst.gz.part-02: 84 | bytes: 26214400 85 | file-name: base_language_model.fst 86 | sha256: c26f3726c89df35ea7a62461fd8aa33953bf848bc076b49c475d1255e532f677 87 | base_language_model.fst.gz.part-03: 88 | bytes: 26214400 89 | file-name: base_language_model.fst 90 | sha256: c8c8c03508d0c274a26a7d39af961b36ca623499662f13ba71c675f140202dab 91 | base_language_model.fst.gz.part-04: 92 | bytes: 26214400 93 | file-name: base_language_model.fst 94 | sha256: 858b29be5b3ce276cb3e65d85260e8b52fd261f7b7b977d0cde3007f88ab188c 95 | base_language_model.fst.gz.part-05: 96 | bytes: 4324833 97 | file-name: base_language_model.fst 98 | sha256: a950d39843d3ceaa13d95d9c0bfdd64569d73cde5bc5c08aac9dec3c07ed1d76 99 | 100 | # Files needed for text to speech 101 | text-to-speech: 102 | espeak_phonemes.txt: 103 | bytes: 233 104 | sha256: 80f5efc5c6b827fbe849dd418263b4b7f48c581dcd91d8922b687ccf09e76186 105 | marytts/marytts-lang-ru-5.2.jar: 106 | bytes: 67343 107 | sha256: 9d1befc9d1ed0074f8e03caac97c020a1dc379c4bf5d98273e75e95f35f30a89 108 | marytts/voice-ac-irina-hsmm-5.2.jar: 109 | bytes: 2454758 110 | sha256: 4da847b83b42ffbd013505845b9ad401f232f820fd64a42e781915467f94a867 111 | 112 | # Sample sentences, etc. 113 | examples: 114 | sentences.ini: 115 | bytes: 399 116 | sha256: c1349e71641bebe1f52b81e209f1cd6a32a5fa9d3c38da47fde6e4bbc12d1520 117 | -------------------------------------------------------------------------------- /etc/profiles/zh-cn_pocketsphinx-cmu.yml: -------------------------------------------------------------------------------- 1 | --- 2 | description: "Mandarin Pocketsphinx model" 3 | 4 | default: 5 | LICENSE: 6 | bytes: 1526 7 | sha256: c7deb7e0ec6886e813b4688d7a54fc998fbb3107ed2f0fc4234ecaf04eee5d59 8 | acoustic_model/feat.params: 9 | bytes: 110 10 | sha256: 6ad725f620b3f0c1d82bba878498630e8be4fa1f0b0bf035a7fe9ffb36f5a735 11 | acoustic_model/feature_transform: 12 | bytes: 5660 13 | sha256: e3a849fff7abb021d903c7d9f704245aadf96660cdfc2dc7bc68715067ba401c 14 | acoustic_model/mdef: 15 | bytes: 18320672 16 | sha256: 1532954d9d723d351f7fd5679fa013a96176c15e2380be37ca28a26c103c07fb 17 | acoustic_model/means: 18 | bytes: 25694276 19 | sha256: 02d2c7114e06edb50923ec0c16286373b384bea75ef8a9966344395bf566b72d 20 | acoustic_model/mixture_weights: 21 | bytes: 713792 22 | sha256: 5d07f870febec71849385f44d7f197f67b3fca242b61d8f8368e98eb7b687fe8 23 | acoustic_model/noisedict: 24 | bytes: 27 25 | sha256: e5ba928c9f8950b4afce4ea84e6be0d219e878b1f926e5edcc5ca329d06eecc9 26 | acoustic_model/transition_matrices: 27 | bytes: 9280 28 | sha256: a212fc8a1a4d375553de5a9b7ec79028217a771b7036017281de6091ed253542 29 | acoustic_model/variances: 30 | bytes: 25694276 31 | sha256: 94ea1a42adbf80441565c8b866a18ce21af58e8d17fbe192d1ccb781f825ad3d 32 | base_dictionary.txt.gz: 33 | bytes: 1187984 34 | file-name: base_dictionary.txt 35 | sha256: c491a549c62c9c6dd1a084c94f8b22a7e936df191e5404d50978fb57e3d3512d 36 | profile.yml: 37 | bytes: 446 38 | sha256: 7f8248129f7af8a0df1b5f46c8be1ce78ed3a62a789d2eb3b2f64f411856a3c8 39 | slot_programs/rhasspy/number: 40 | bytes: 651 41 | sha256: d1f10ccea552c2c9ba2a59dd69705f8e82545592c7db5ca7078b462ed9d08387 42 | stop_words.txt: 43 | bytes: 0 44 | sha256: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 45 | 46 | # Files needed to guess word pronunciations 47 | grapheme-to-phoneme: 48 | g2p.fst.gz: 49 | bytes: 1788316 50 | file-name: g2p.fst 51 | sha256: ee10315631b9cb09a6bd4d6d790d1c79a2a0062c482595f0d7df6dc9800eeabd 52 | 53 | # Files needed for open transcription (pre-built model) 54 | open-transcription: 55 | base_language_model.txt.gz: 56 | bytes: 4684931 57 | file-name: base_language_model.txt 58 | sha256: 8c64fddccf7336390c01612fe92e3dd654b9ea2f5aa496711310504e4f98ae91 59 | 60 | # Files needed for mixed language modeling (pre-built + custom) 61 | mixed-language-model: 62 | base_language_model.fst.gz: 63 | bytes: 4712455 64 | file-name: base_language_model.fst 65 | sha256: 51d4265a72a6736832d7fa310a6316695d08a98b8d9f97f92df85164fedfd57a 66 | 67 | # Files needed for text to speech 68 | text-to-speech: 69 | espeak_phonemes.txt: 70 | bytes: 1182 71 | sha256: d085df9e92bc5c7b7d9d74b2d8cbe1d298b33e3e7da3a9b83f8bab0e380a2dfb 72 | 73 | # Sample sentences, etc. 74 | examples: 75 | sentences.ini: 76 | bytes: 55 77 | sha256: c30b9e32ac70e1e4de81f3ceeb0d47e2302d7792f2008569d0053f3cee2e406a 78 | -------------------------------------------------------------------------------- /etc/test/hey_mycroft.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/test/hey_mycroft.wav -------------------------------------------------------------------------------- /etc/test/turn_on_living_room_lamp.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/test/turn_on_living_room_lamp.wav -------------------------------------------------------------------------------- /etc/test/what_time_is_it.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/test/what_time_is_it.wav -------------------------------------------------------------------------------- /etc/test/would_you_please_turn_on_living_room_lamp.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/etc/test/would_you_please_turn_on_living_room_lamp.wav -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: voice2json 2 | theme: 3 | name: material 4 | 5 | nav: 6 | - Home: index.md 7 | - Installation Guide: install.md 8 | - Command Line Tools: commands.md 9 | - Template Language: sentences.md 10 | - Data Formats: formats.md 11 | - Profiles: profiles.md 12 | - Recipes: recipes.md 13 | - Whitepaper: whitepaper.md 14 | - About: about.md 15 | markdown_extensions: 16 | - codehilite 17 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-aioconsole.*] 4 | ignore_missing_imports = True 5 | 6 | [mypy-aiofiles.*] 7 | ignore_missing_imports = True 8 | 9 | [mypy-jsonlines.*] 10 | ignore_missing_imports = True 11 | 12 | [mypy-networkx.*] 13 | ignore_missing_imports = True 14 | 15 | [mypy-precise_runner.*] 16 | ignore_missing_imports = True 17 | 18 | [mypy-pydash.*] 19 | ignore_missing_imports = True 20 | 21 | [mypy-tqdm.*] 22 | ignore_missing_imports = True 23 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MESSAGES CONTROL] 2 | disable= 3 | format, 4 | abstract-class-little-used, 5 | abstract-method, 6 | cyclic-import, 7 | duplicate-code, 8 | global-statement, 9 | import-outside-toplevel, 10 | inconsistent-return-statements, 11 | locally-disabled, 12 | not-context-manager, 13 | redefined-variable-type, 14 | too-few-public-methods, 15 | too-many-arguments, 16 | too-many-branches, 17 | too-many-instance-attributes, 18 | too-many-lines, 19 | too-many-locals, 20 | too-many-public-methods, 21 | too-many-return-statements, 22 | too-many-statements, 23 | too-many-boolean-expressions, 24 | unnecessary-pass, 25 | unused-argument, 26 | broad-except, 27 | too-many-nested-blocks, 28 | invalid-name 29 | 30 | [FORMAT] 31 | expected-line-ending-format=LF -------------------------------------------------------------------------------- /recipes/fluent_dataset/Fluent Speech Commands Public License.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/fluent_dataset/Fluent Speech Commands Public License.pdf -------------------------------------------------------------------------------- /recipes/fluent_dataset/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | jobs := 10 3 | 4 | all: results/report.json 5 | 6 | results/report.json: results/intents.jsonl 7 | voice2json test-examples --expected results/test_truth.jsonl --actual $< | jq . > $@ 8 | 9 | results/intents.jsonl: results/transcriptions.jsonl 10 | voice2json recognize-intent < $< > $@ 11 | 12 | results/transcriptions.jsonl: test_files.txt 13 | cat $< | parallel -k --pipe -n $(jobs) \ 14 | 'voice2json transcribe-wav --stdin-files' > $@ 15 | 16 | clean: 17 | rm -f results/intents.jsonl results/transcriptions.jsonl 18 | -------------------------------------------------------------------------------- /recipes/fluent_dataset/README.md: -------------------------------------------------------------------------------- 1 | # Fluent Speech Dataset 2 | 3 | Demonstration of `voice2json`'s performance on a [public dataset from Fluent.ai](http://www.fluent.ai/research/fluent-speech-commands/). 4 | 5 | ## Results 6 | 7 | Using ~100 lines in [sentences.ini](sentences.ini) (excluding comments), I'm able to get 98.8% accuracy, which is as accurate as the end-to-end system trained in [Fluent.ai's published paper](https://arxiv.org/pdf/1904.03670.pdf)! While the sentences `voice2json` was trained with had to be hand-tuned to fit the test sets, it also did not require any audio data for training. 8 | 9 | ## Running 10 | 11 | Before getting started, make sure you have [GNU Parallel](http://www.gnu.org/s/parallel) and [jq](https://stedolan.github.io/jq/) installed: 12 | 13 | ```bash 14 | $ sudo apt-get install parallel jq 15 | ``` 16 | 17 | To reproduce the results, extract the [U.S. English Kaldi profile](https://github.com/synesthesiam/en-us_kaldi-zamia) to `$HOME/.config/voice2json`. Copy the `sentences.ini` from here to `$HOME/.config/voice2json/sentences.ini` and train the profile: 18 | 19 | ```bash 20 | $ voice2json train-profile 21 | ``` 22 | 23 | Next, [download the dataset](http://www.fluent.ai/research/fluent-speech-commands/) and extract the `wavs` directory here. 24 | 25 | Finally, run `make` in this directory: 26 | 27 | ```bash 28 | $ make 29 | ``` 30 | 31 | By default, this will run 10 parallel transcription/intent recognition processes and generate `results/report.json`. The top of mine looks like this: 32 | 33 | ```json 34 | "statistics": { 35 | "num_wavs": 3793, 36 | "num_words": 16523, 37 | "num_entities": 8140, 38 | "correct_transcriptions": 958, 39 | "correct_intent_names": 3780, 40 | "correct_words": 12231, 41 | "correct_entities": 8082, 42 | "transcription_accuracy": 0.740240876354173, 43 | "intent_accuracy": 0.9965726337991037, 44 | "entity_accuracy": 0.9928746928746929, 45 | "intent_entity_accuracy": 0.9883996836277353, 46 | "average_transcription_speedup": 3.392895180720205 47 | }, 48 | ... 49 | } 50 | ``` 51 | 52 | The `intent_entity_accuracy` metric is the number of examples where the recognized intent and entities matched **exactly** divided by the total number of examples. Note that this is actually higher than the transcription accuracy (95.2%); `voice2json` can recover from some transcription errors during intent recognition. 53 | -------------------------------------------------------------------------------- /recipes/fluent_dataset/results/report.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/fluent_dataset/results/report.json.gz -------------------------------------------------------------------------------- /recipes/fluent_dataset/sentences.ini: -------------------------------------------------------------------------------- 1 | [ActivateDeactivate] 2 | location = (bedroom | kitchen | washroom | bathroom:washroom){location} 3 | object = (heat | lamp | lights | music){object} 4 | action = (on:activate | off:deactivate){action} 5 | verb = (turn | switch | put) 6 | 7 | # heat off 8 | # lights on in the kitchen 9 | [in [the] ] 10 | 11 | # turn on the washroom lamp 12 | [the] [()] 13 | 14 | # turn off the heat in the bedroom 15 | [the] in [the] 16 | 17 | # switch the lamp on 18 | # switch the lamp on in the bedroom 19 | [the] [in [the] ] 20 | 21 | # put the bedroom music on 22 | [the] [()] 23 | 24 | # music off 25 | # bedroom lamp on 26 | \[()] 27 | 28 | # heat in the bedroom on 29 | in [the] () 30 | 31 | # start/stop the music 32 | (start:activate | stop:deactivate){action} [the] (music){object} 33 | 34 | # play 35 | # resume the music 36 | music_action = (play:activate | resume:activate | stop:deactivate | pause:deactivate){action} 37 | () [[the] (music){object}] 38 | () (:music){object} 39 | 40 | [Bring] 41 | object = (juice | newspaper | shoes | socks){object} 42 | verb = (bring | ([go] get) | fetch) 43 | 44 | # get me my shoes 45 | # bring me some juice 46 | [me] [my] [a | some | the] 47 | 48 | # ----------------------------------------------------------------------------- 49 | 50 | [ChangeLanguage] 51 | language = (chinese | english | german | korean){object} 52 | verb = (set | change | switch) 53 | 54 | # i need to practice my german switch the language 55 | i need to practice [my] [ [the] language] 56 | 57 | # set my phones language to chinese 58 | \[ok now] [the | (my [phones])] [main] language [to ] 59 | 60 | # switch my device to english 61 | my [phone | device] to 62 | 63 | switch languages 64 | \[open] language settings 65 | (use | allow) [a] different language 66 | 67 | # ----------------------------------------------------------------------------- 68 | 69 | [IncreaseDecrease] 70 | object = (([audio:] volume) | sound:volume | music:volume | heat | heating:heat | temperature:heat){object} 71 | action = (up:increase | down:decrease){action} 72 | 73 | # volume up/lower 74 | 75 | (lower:decrease) 76 | 77 | # turn down the kitchen heat 78 | turn [the] [] 79 | 80 | # turn up the music in the bedroom 81 | turn [the] in [the] [] 82 | 83 | # turn the temperature in the kitchen down 84 | turn [the] in [the] [] 85 | 86 | # turn the music down in the bedroom 87 | turn [the] [in [the] ] 88 | 89 | # turn the bedroom sound up 90 | turn [the] [] 91 | 92 | # could you increase the bedroom temperature please 93 | \[could you] (increase | decrease){action} [the] [] [please] 94 | 95 | # could you increase the temperature in the kitchen please 96 | \[could you] (increase | decrease){action} [the] [in [the] ] [please] 97 | 98 | # its too loud 99 | \[its] (quiet | quieter | too loud | too high){action:decrease} (:volume){object} 100 | 101 | # its too quiet 102 | \[its] (loud | louder | too quiet | too low){action:increase} (:volume){object} 103 | 104 | # make it louder 105 | make [it] (loud | louder){action:increase} (:volume){object} 106 | \[thats | its] too (quiet){action:increase} (:volume){object} 107 | 108 | # make it softer 109 | make [it] (quiet | quieter | softer){action:decrease} (:volume){object} 110 | \[thats | its] too (loud){action:decrease} (:volume){object} 111 | 112 | # lower the volume 113 | (lower | reduce | mute){action:decrease} [the] (volume){object} 114 | 115 | # i cant hear that 116 | i cant (hear){action:increase} (that){object:volume} 117 | 118 | # volume max 119 | (volume){object} (max){action:increase} 120 | 121 | # make it hotter/colder 122 | make [it] (hot | hotter){action:increase} (:heat){object} 123 | make [it] (cold | colder | cooler){action:decrease} (:heat){object} 124 | 125 | # more/less heat 126 | (more:increase){action} (heat){object} 127 | (less:decrease){action} (heat){object} 128 | 129 | # kitchen volume down 130 | \[] 131 | 132 | # volume mute 133 | (volume){object} (mute){action:decrease} 134 | 135 | # i need volume 136 | i need (volume){object} (:increase){action} 137 | 138 | # turn it down 139 | turn (it){object:volume} 140 | 141 | # ----------------------------------------------------------------------------- 142 | 143 | # Action 144 | # ----- 145 | # activate 146 | # bring 147 | # change language 148 | # deactivate 149 | # decrease 150 | # increase 151 | 152 | # Object 153 | # ------ 154 | # Chinese 155 | # English 156 | # German 157 | # heat 158 | # juice 159 | # Korean 160 | # lamp 161 | # lights 162 | # music 163 | # newspaper 164 | # none 165 | # shoes 166 | # socks 167 | # volume 168 | 169 | # Location 170 | #--------- 171 | # bedroom 172 | # kitchen 173 | # none 174 | # washroom -------------------------------------------------------------------------------- /recipes/launch_program/README.md: -------------------------------------------------------------------------------- 1 | # Launch a Program via Voice 2 | 3 | Listens for a wake word, then launches a program based on a voice command. 4 | 5 | ## Setup 6 | 7 | Once you have `voice2json` installed and a profile downloaded, copy `sentences.ini` and the `slots` directory into your profile directory (probably `$HOME/.config/voice2json`). Depending on whether you're using a Pocketsphinx or Kaldi-based profile, copy `custom_words.pocketsphinx.txt` or `custom_words.kaldi.txt` (respectively) to `custom_words.txt` in your profile. Make sure to backup your profile first if you've done any customization! 8 | 9 | Next, run the `listen_and_launch.sh` script: 10 | 11 | ```bash 12 | $ ./listen_and_launch.sh 13 | ``` 14 | 15 | Try saying the wake word into your microphone (default is "porcupine"). The program will beep when it's been recognized. Now, say a voice command like "run firefox". After a period of silence, another beep will play. If all goes well, the `listen_and_launch` script will attempt to run `/usr/bin/firefox`. 16 | -------------------------------------------------------------------------------- /recipes/launch_program/beep_hi.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/launch_program/beep_hi.wav -------------------------------------------------------------------------------- /recipes/launch_program/beep_lo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/launch_program/beep_lo.wav -------------------------------------------------------------------------------- /recipes/launch_program/custom_words.kaldi.txt: -------------------------------------------------------------------------------- 1 | firefox f 'aI 3 f O k s 2 | gimp g 'I m p 3 | -------------------------------------------------------------------------------- /recipes/launch_program/custom_words.pocketsphinx.txt: -------------------------------------------------------------------------------- 1 | firefox F AY ER F AA K S 2 | gimp G IH M P 3 | -------------------------------------------------------------------------------- /recipes/launch_program/listen_and_launch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 3 | 4 | # Train profile first 5 | voice2json train-profile 6 | 7 | # Use a temporary WAV file for recorded command. 8 | # Clean up when this script exits. 9 | temp_wav="$(mktemp)" 10 | function finish { 11 | rm -rf "${temp_wav}" 12 | exit 0 13 | } 14 | 15 | trap finish SIGTERM SIGINT 16 | 17 | # ---------- 18 | 19 | while true; 20 | do 21 | # Wait until the wake word has been spoken, then exit 22 | echo 'Waiting for wake word...' 23 | voice2json wait-wake --exit-count 1 24 | 25 | # Play a sound to tell the user we're recording 26 | aplay "${this_dir}/beep_hi.wav" 27 | 28 | # Record voice command until silence 29 | echo 'Recording voice command...' 30 | voice2json record-command > "${temp_wav}" 31 | 32 | # Play a sound to tell the user we're done recording 33 | aplay "${this_dir}/beep_lo.wav" & 34 | 35 | # 1. Transcribe the WAV file. 36 | # 2. Recognize the intent from the transcription. 37 | # 3. Extract the name of the program to launch 38 | echo 'Recognizing intent...' 39 | voice2json transcribe-wav "${temp_wav}" | \ 40 | voice2json recognize-intent | \ 41 | while read -r intent_json; 42 | do 43 | echo "${intent_json}" 44 | 45 | # Verify intent is LaunchProgram 46 | intent_name="$(echo "${intent_json}" | jq -r .intent.name)" 47 | if [[ "${intent_name}" == 'LaunchProgram' ]]; then 48 | 49 | # Extract "program" slot value 50 | program="$(echo "${intent_json}" | jq -r .slots.program)" 51 | if [[ ! -z "${program}" ]]; then 52 | # Run the program. 53 | # For simplicity, we assume its the name of a binary in /usr/bin. 54 | program_exe="/usr/bin/${program}" 55 | echo "Running ${program_exe}" 56 | 57 | # Detach the process from this terminal, so it will keep running. 58 | nohup "${program_exe}" & 59 | else 60 | echo 'No program was found in intent' 61 | fi 62 | else 63 | echo "Expected LaunchProgram intent, got ${intent_name}" 64 | fi 65 | done 66 | done 67 | -------------------------------------------------------------------------------- /recipes/launch_program/sentences.ini: -------------------------------------------------------------------------------- 1 | [LaunchProgram] 2 | (start | run | launch) ($program){program} -------------------------------------------------------------------------------- /recipes/launch_program/slots/program: -------------------------------------------------------------------------------- 1 | firefox 2 | (web browser):firefox 3 | (file browser):nemo 4 | (text editor):xed 5 | gimp 6 | mail:thunderbird 7 | -------------------------------------------------------------------------------- /recipes/launch_program/test/launch_firefox.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/launch_program/test/launch_firefox.wav -------------------------------------------------------------------------------- /recipes/parallel_recognition/recognize_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | num_jobs=10 3 | 4 | parallel -k --pipe -n "${num_jobs}" \ 5 | 'voice2json transcribe-wav --stdin-files | voice2json recognize-intent' 6 | -------------------------------------------------------------------------------- /recipes/timers/README.md: -------------------------------------------------------------------------------- 1 | # Set and Run Timers 2 | 3 | Waits for a "wake up" message over MQTT, then records a voice command like "set a timer for five minutes". Waits for the requested amount of time, and responds with a beep and an MQTT message. 4 | 5 | ## Setup 6 | 7 | This recipes assumes you have the `mosquitto_sub` and `mosquitto_pub` commands available. They can be installed with: 8 | 9 | ```bash 10 | $ sudo apt-get install mosquitto-clients 11 | ``` 12 | 13 | Once you have `voice2json` installed and a profile downloaded, copy `sentences.ini` into your profile directory (probably `$HOME/.config/voice2json`). Make sure to backup your profile first if you've done any customization! 14 | 15 | Next, run the `listen_timer` script: 16 | 17 | ```bash 18 | $ ./listen_timer.sh 19 | ``` 20 | 21 | The script is waiting for an MQTT message on the `timer/wake-up` topic. You could send this from a [Node-RED](https://nodered.org) flow or other IoT software. For now, we'll just use `mosquitto_pub`. 22 | 23 | From a terminal, run: 24 | 25 | ```bash 26 | $ mosquitto_pub -t 'timer/wake-up' -m '' 27 | ``` 28 | 29 | You should hear a beep from `listen_timer.sh`. Now say a command like "set a timer for five seconds". After 5 seconds, you should an alarm sound played (three short beeps). An MQTT message should also have been published to `timer/alarm`. 30 | -------------------------------------------------------------------------------- /recipes/timers/alarm.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/timers/alarm.wav -------------------------------------------------------------------------------- /recipes/timers/beep_hi.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/timers/beep_hi.wav -------------------------------------------------------------------------------- /recipes/timers/beep_lo.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/recipes/timers/beep_lo.wav -------------------------------------------------------------------------------- /recipes/timers/do_timer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import json 4 | import time 5 | 6 | for line in sys.stdin: 7 | intent = json.loads(line) 8 | 9 | # Extract time integers 10 | hours = int(intent["slots"].get("hours", 0)) 11 | minutes = int(intent["slots"].get("minutes", 0)) 12 | seconds = int(intent["slots"].get("seconds", 0)) 13 | 14 | # Compute total number of seconds to wait 15 | total_seconds = (hours * 60 * 60) + (minutes * 60) + seconds 16 | 17 | # Wait 18 | print(f"Waiting for {total_seconds} second(s)") 19 | time.sleep(total_seconds) 20 | -------------------------------------------------------------------------------- /recipes/timers/listen_timer.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 3 | 4 | # Train profile first 5 | voice2json train-profile 6 | 7 | # Use a temporary WAV file for recorded command. 8 | # Clean up when this script exits. 9 | temp_wav="$(mktemp)" 10 | function finish { 11 | rm -rf "${temp_wav}" 12 | exit 0 13 | } 14 | 15 | trap finish SIGTERM SIGINT 16 | 17 | # ---------- 18 | 19 | while true; 20 | do 21 | # Wait for an MQTT message 22 | echo 'Waiting for MQTT message on timer/wake-up' 23 | mosquitto_sub -t 'timer/wake-up' -C 1 24 | 25 | # Play a sound to tell the user we're recording 26 | aplay "${this_dir}/beep_hi.wav" 27 | 28 | # Record voice command until silence 29 | echo 'Recording voice command...' 30 | voice2json record-command > "${temp_wav}" 31 | 32 | # Play a sound to tell the user we're done recording 33 | aplay "${this_dir}/beep_lo.wav" & 34 | 35 | # 1. Transcribe the WAV file. 36 | # 2. Recognize the intent from the transcription. 37 | # 3. Wait until the timer is up 38 | # 4. Play an alarm sound 39 | echo 'Recognizing intent...' 40 | voice2json transcribe-wav "${temp_wav}" | \ 41 | tee >(jq --raw-output '.text' > /dev/stderr) | \ 42 | voice2json recognize-intent | \ 43 | while read -r intent_json; 44 | do 45 | echo "${intent_json}" 46 | 47 | # Verify intent is SetTimer 48 | intent_name="$(echo "${intent_json}" | jq -r .intent.name)" 49 | if [[ "${intent_name}" == 'SetTimer' ]]; then 50 | # Wait for timer 51 | echo "${intent_json}" | python3 "${this_dir}/do_timer.py" 52 | 53 | # Send an MQTT response 54 | mosquitto_pub -t 'timer/alarm' -m "${intent_json}" 55 | 56 | # Play an alarm sound 57 | aplay "${this_dir}/alarm.wav" 58 | fi 59 | done 60 | done 61 | -------------------------------------------------------------------------------- /recipes/timers/sentences.ini: -------------------------------------------------------------------------------- 1 | [SetTimer] 2 | hour_expr = (1..9){hours} [and (a half){minutes:30!int}] (hour | hours) 3 | minute_expr = (1..59){minutes} [and (a half){seconds:30!int}] (minute | minutes) 4 | second_expr = (1..59){seconds} (second | seconds) 5 | 6 | time_expr = (( [[and] ] [[and] ]) | ( [[and] ]) | ) 7 | 8 | set [a] timer for 9 | -------------------------------------------------------------------------------- /recipes/train_rasa/README.md: -------------------------------------------------------------------------------- 1 | # Train a Rasa NLY Bot 2 | 3 | Creates a basic [Rasa NLU assistant](https://rasa.com/docs/rasa/nlu/about/) using examples generated from `voice2json`. 4 | 5 | ## Setup 6 | 7 | This recipes assumes you have [Docker](https://docker.com/) installed. If you don't, please follow the [Docker installation instructions](https://docs.docker.com/install/). 8 | 9 | Once you have `voice2json` installed and a profile downloaded, copy `sentences.ini` into your profile directory (probably `$HOME/.config/voice2json`). Make sure to backup your profile first if you've done any customization! 10 | 11 | Next, run the `train.sh` script: 12 | 13 | ```bash 14 | $ ./train.sh 15 | ``` 16 | 17 | This script generates 5,000 random examples and converts them to Rasa NLU's [Markdown training data format](https://rasa.com/docs/rasa/nlu/training-data-format/#id5). An assistant is then trained using the [pretrained_embeddings_spacy pipeline](https://rasa.com/docs/rasa/nlu/choosing-a-pipeline/#id7). 18 | 19 | If all goes well, you should next run the `recognize.sh` script: 20 | 21 | ```bash 22 | $ ./recognize.sh 23 | ``` 24 | 25 | This will start a shell where you can type in sentences and see the JSON output. If you want to recognize intents remotely, you should use Rasa's [HTTP Server](https://rasa.com/docs/rasa/user-guide/running-the-server/). 26 | 27 | ```bash 28 | $ ./rasa run -m models --enable-api 29 | ``` 30 | 31 | With that running, you can `POST` some JSON to port 5005 and get a JSON response: 32 | 33 | ```bash 34 | $ curl -X POST -d '{ "text": "turn on the living room lamp" }' localhost:5005/model/parse 35 | ``` 36 | 37 | You can easily combine this with `voice2json`: 38 | 39 | ```bash 40 | $ voice2json transcribe-wav \ 41 | ../../etc/test/turn_on_living_room_lamp.wav | \ 42 | curl -X POST -d @- localhost:5005/model/parse 43 | ``` 44 | -------------------------------------------------------------------------------- /recipes/train_rasa/config.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: "pretrained_embeddings_spacy" 4 | -------------------------------------------------------------------------------- /recipes/train_rasa/examples_to_rasa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import json 4 | from collections import defaultdict 5 | 6 | examples_by_intent = defaultdict(list) 7 | 8 | # Gather all examples by intent name 9 | for line in sys.stdin: 10 | example = json.loads(line) 11 | intent_name = example["intent"]["name"] 12 | examples_by_intent[intent_name].append(example) 13 | 14 | # Write data in RasaNLU markdown training format 15 | for intent_name, examples in examples_by_intent.items(): 16 | print(f"## intent:{intent_name}") 17 | 18 | for example in examples: 19 | # Create mapping from start/stop character indexes to example entities 20 | entities_by_start = {e["raw_start"]: e for e in example["entities"]} 21 | entities_by_end = {e["raw_end"]: e for e in example["entities"]} 22 | 23 | # Current character index 24 | char_idx = 0 25 | 26 | # Final list of tokens that will be printed for the example 27 | tokens_to_print = [] 28 | 29 | # Current entity 30 | entity = None 31 | 32 | # Tokens that belong to the current entity 33 | entity_tokens = [] 34 | 35 | # Process "raw" tokens without substitutions 36 | for token in example["raw_tokens"]: 37 | if char_idx in entities_by_start: 38 | # Start entity 39 | entity = entities_by_start[char_idx] 40 | entity_tokens = [] 41 | 42 | if entity is None: 43 | # Use token as-is 44 | tokens_to_print.append(token) 45 | else: 46 | # Accumulate into entity token list 47 | entity_tokens.append(token) 48 | 49 | # Advance character index in raw text 50 | char_idx += len(token) + 1 # space 51 | 52 | if (char_idx - 1) in entities_by_end: 53 | # Finish entity 54 | entity_str = entity["entity"] 55 | if entity["value"] != entity["raw_value"]: 56 | # Include substitution 57 | entity_str += f":{entity['value']}" 58 | 59 | # Create Markdown-style entity 60 | token_str = "[" + " ".join(entity_tokens) + f"]({entity_str})" 61 | tokens_to_print.append(token_str) 62 | entity = None 63 | 64 | # Print example 65 | print("-", " ".join(tokens_to_print)) 66 | 67 | # Blank line between intents 68 | print("") 69 | -------------------------------------------------------------------------------- /recipes/train_rasa/rasa: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker run -it -v "$(pwd):/app" -p 5005:5005 rasa/rasa:latest-spacy-en "$@" 3 | 4 | -------------------------------------------------------------------------------- /recipes/train_rasa/recognize.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$(which docker)" ]]; then 5 | echo "You will need Docker to run this example" 6 | exit 1 7 | fi 8 | 9 | echo "Loading. Please wait..." 10 | bash rasa shell nlu 11 | -------------------------------------------------------------------------------- /recipes/train_rasa/sentences.ini: -------------------------------------------------------------------------------- 1 | [GetTime] 2 | what time is it 3 | tell me the time 4 | 5 | [GetTemperature] 6 | whats the temperature 7 | how (hot | cold) is it 8 | 9 | [GetGarageState] 10 | is the garage door (open | closed) 11 | 12 | [ChangeLightState] 13 | light_name = ((living room lamp | garage light) {name}) | 14 | light_state = (on | off) {state} 15 | 16 | turn [the] 17 | turn [the] 18 | 19 | [ChangeLightColor] 20 | light_name = (bedroom light) {name} 21 | color = (red | green | blue) {color} 22 | 23 | set [the] [to] 24 | make [the] 25 | -------------------------------------------------------------------------------- /recipes/train_rasa/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$(which docker)" ]]; then 5 | echo "You will need Docker to run this example" 6 | exit 1 7 | fi 8 | 9 | # Number of examples to generate for training. 10 | # More is better. 11 | num_examples=5000 12 | 13 | # Ensure profile has been trained 14 | voice2json train-profile 15 | 16 | # Generate training examples 17 | echo "Generating ${num_examples} example(s)..." 18 | mkdir -p data 19 | voice2json generate-examples \ 20 | --number "${num_examples}" | \ 21 | python3 examples_to_rasa.py \ 22 | > data/training-data.md 23 | 24 | # Train a Rasa NLU bot 25 | mkdir -p models 26 | bash rasa train nlu \ 27 | --verbose 28 | 29 | echo "Done" 30 | 31 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aioconsole==0.1.16 2 | aiofiles==0.4.0 3 | aiohttp==3.7.4 4 | jsonlines==1.2.0 5 | networkx==2.4 6 | pydash==4.7.6 7 | pyyaml==5.4 8 | rhasspy-asr-deepspeech~=0.4.0 9 | rhasspy-asr-kaldi~=0.6.0 10 | rhasspy-asr-pocketsphinx~=0.3.0 11 | rhasspy-nlu~=0.3.0 12 | rhasspy-silence~=0.4.0 13 | tqdm~=4.45.0 14 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | black==19.10b0 2 | coverage==5.0.4 3 | flake8==3.7.9 4 | mkdocs>=1.1 5 | mkdocs-material==5.1.1 6 | mypy==0.910 7 | pylint==2.10.2 8 | pytest==5.4.1 9 | pytest-cov==2.8.1 10 | -------------------------------------------------------------------------------- /scripts/build-debian.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$1" ]]; then 5 | echo "Usage: build-debian dist/" 6 | exit 1 7 | fi 8 | 9 | dist_dir="$(realpath "$1")" 10 | mkdir -p "${dist_dir}" 11 | shift 12 | 13 | # Directory of *this* script 14 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 15 | src_dir="$(realpath "${this_dir}/..")" 16 | 17 | version="$(cat "${src_dir}/VERSION")" 18 | out_version='2.1' 19 | 20 | # ----------------------------------------------------------------------------- 21 | 22 | : "${PLATFORMS=linux/amd64,linux/arm/v7,linux/arm64}" 23 | 24 | DOCKERFILE="${src_dir}/Dockerfile.debian" 25 | 26 | if [[ -z "${NO_PROXY}" ]]; then 27 | export PROXY_IP="$(hostname -I | awk '{print $1}')" 28 | export PROXY_PORT=3142 29 | export PROXY="${PROXY_IP}:${PROXY_PORT}" 30 | export PYPI_PORT=4000 31 | export PYPI="${PROXY_IP}:${PYPI_PORT}" 32 | export PYPI_HOST="${PROXY_IP}" 33 | 34 | # Use temporary file instead 35 | temp_dockerfile="$(mktemp -p "${src_dir}")" 36 | function cleanup { 37 | rm -f "${temp_dockerfile}" 38 | } 39 | 40 | trap cleanup EXIT 41 | 42 | # Run through pre-processor to replace variables 43 | "${src_dir}/docker/preprocess.sh" < "${DOCKERFILE}" > "${temp_dockerfile}" 44 | DOCKERFILE="${temp_dockerfile}" 45 | fi 46 | 47 | # ------------------------------------------------------------------------------ 48 | 49 | rm -f "${dist_dir}/voice2json_"*.deb 50 | 51 | echo "Building..." 52 | docker buildx build \ 53 | "${src_dir}" \ 54 | -f "${DOCKERFILE}" \ 55 | "--platform=${PLATFORMS}" \ 56 | --output "type=local,dest=${dist_dir}" 57 | 58 | # Manually copy out 59 | in_amd64="${dist_dir}/linux_amd64/voice2json_${version}_amd64.deb" 60 | out_amd64="${dist_dir}/voice2json_${out_version}_amd64.deb" 61 | if [[ -f "${in_amd64}" ]]; then 62 | cp "${in_amd64}" "${out_amd64}" 63 | fi 64 | 65 | in_armhf="${dist_dir}/linux_arm_v7/voice2json_${version}_armhf.deb" 66 | out_armhf="${dist_dir}/voice2json_${out_version}_armhf.deb" 67 | if [[ -f "${in_armhf}" ]]; then 68 | cp "${in_armhf}" "${out_armhf}" 69 | fi 70 | 71 | in_arm64="${dist_dir}/linux_arm64/voice2json_${version}_arm64.deb" 72 | out_arm64="${dist_dir}/voice2json_${out_version}_arm64.deb" 73 | if [[ -f "${in_arm64}" ]]; then 74 | cp "${in_arm64}" "${out_arm64}" 75 | fi 76 | 77 | in_armel="${dist_dir}/linux_arm_v6/voice2json_${version}_armhf.deb" 78 | out_armel="${dist_dir}/voice2json_${out_version}_armel.deb" 79 | if [[ -f "${in_armel}" ]]; then 80 | cp "${in_armel}" "${out_armel}" 81 | fi 82 | -------------------------------------------------------------------------------- /scripts/build-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/..")" 7 | 8 | version="$(cat "${src_dir}/VERSION")" 9 | 10 | # ----------------------------------------------------------------------------- 11 | 12 | : "${PLATFORMS=linux/amd64,linux/arm/v7,linux/arm64}" 13 | : "${DOCKER_REGISTRY=docker.io}" 14 | 15 | DOCKERFILE="${src_dir}/Dockerfile" 16 | 17 | docker buildx build \ 18 | "${src_dir}" \ 19 | -f "${DOCKERFILE}" \ 20 | "--platform=${PLATFORMS}" \ 21 | --tag "${DOCKER_REGISTRY}/synesthesiam/voice2json:latest" \ 22 | --push \ 23 | "$@" 24 | -------------------------------------------------------------------------------- /scripts/build-docs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/..")" 7 | 8 | venv="${src_dir}/.venv" 9 | if [[ -d "${venv}" ]]; then 10 | echo "Using virtual environment at ${venv}" 11 | source "${venv}/bin/activate" 12 | fi 13 | 14 | # ----------------------------------------------------------------------------- 15 | 16 | docs_dir="${src_dir}/site" 17 | mkdocs build --site-dir "${docs_dir}" 18 | 19 | # ----------------------------------------------------------------------------- 20 | 21 | echo "OK" 22 | -------------------------------------------------------------------------------- /scripts/build/build-julius.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | : "${MAKE_THREADS=4}" 4 | 5 | if [[ -z "$3" ]]; then 6 | echo "Usage: build-julius julius.tar.gz build/ output.tar.gz" 7 | exit 1 8 | fi 9 | 10 | julius_src="$(realpath "$1")" 11 | build_dir="$(realpath "$2")" 12 | output_file="$(realpath "$3")" 13 | 14 | # Julius 15 | julius_build="${build_dir}/julius" 16 | echo "Building Julius in ${julius_build} from ${julius_src}" 17 | mkdir -p "${julius_build}" 18 | tar -C "${julius_build}" --strip-components=1 -xf "${julius_src}" 19 | cd "${julius_build}" && \ 20 | mkdir -p build && \ 21 | ./configure "--prefix=${julius_build}/build" && \ 22 | make -j "${MAKE_THREADS}" && \ 23 | make install 24 | 25 | # Strip binaries 26 | echo "Tar-ing binary files to ${output_file}" 27 | cd "${julius_build}/build/bin" && \ 28 | (strip --strip-unneeded -- * || true) && \ 29 | tar -czf "${output_file}" -- * 30 | 31 | -------------------------------------------------------------------------------- /scripts/build/build-kaldi.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | : "${MAKE_THREADS=4}" 4 | 5 | if [[ -z "$3" ]]; then 6 | echo "Usage: build-kaldi kaldi.tar.gz build/ output.tar.gz" 7 | exit 1 8 | fi 9 | 10 | kaldi_src="$(realpath "$1")" 11 | build_dir="$(realpath "$2")" 12 | output_file="$(realpath "$3")" 13 | 14 | # Kaldi 15 | kaldi_build="${build_dir}/kaldi" 16 | echo "Building Kaldi in ${kaldi_build} from ${kaldi_src}" 17 | mkdir -p "${kaldi_build}" 18 | tar -C "${kaldi_build}" --strip-components=1 -xf "${kaldi_src}" 19 | 20 | cd "${kaldi_build}/tools" && \ 21 | make -j "${MAKE_THREADS}" 22 | 23 | cd "${kaldi_build}/src" && \ 24 | ./configure --shared --mathlib=ATLAS --use-cuda=no 25 | 26 | # Fix things for aarch64 (arm64v8) 27 | if [ "$(uname --m)" = "aarch64" ]; then 28 | sed -i 's/-msse -msse2/-ftree-vectorize/g' "${kaldi_build}/src/kaldi.mk" 29 | fi 30 | 31 | cd "${kaldi_build}/src" && \ 32 | make depend -j "${MAKE_THREADS}" && \ 33 | make -j "${MAKE_THREADS}" 34 | 35 | # Create dist 36 | dist_dir="${kaldi_build}/dist" 37 | mkdir -p "${dist_dir}/kaldi/egs" && \ 38 | cp -R "${kaldi_build}/egs/wsj" "${dist_dir}/kaldi/egs/" && \ 39 | rsync -av --exclude='*.o' --exclude='*.cc' "${kaldi_build}/src/bin/" "${dist_dir}/kaldi/" && \ 40 | cp "${kaldi_build}/src/lib"/*.so* "${dist_dir}/kaldi/" && \ 41 | rsync -av --include='*.so*' --include='fst' --exclude='*' "${kaldi_build}/tools/openfst/lib/" "${dist_dir}/kaldi/" && \ 42 | cp "${kaldi_build}/tools/openfst/bin/"* "${dist_dir}/kaldi/" 43 | 44 | # Fix rpaths 45 | # shellcheck disable=SC2016 46 | find "${dist_dir}/kaldi/" -type f -exec patchelf --set-rpath '$ORIGIN' {} \; 47 | 48 | # Strip binaries 49 | echo "Tar-ing binary files to ${output_file}" 50 | cd "${dist_dir}" && \ 51 | (strip --strip-unneeded kaldi/* || true) && \ 52 | tar -czf "${output_file}" kaldi 53 | 54 | -------------------------------------------------------------------------------- /scripts/build/build-kenlm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | : "${MAKE_THREADS=4}" 4 | 5 | if [[ -z "$4" ]]; then 6 | echo "Usage: build-kenlm kenlm.tar.gz eigen.tar.gz build/ output.tar.gz" 7 | exit 1 8 | fi 9 | 10 | kenlm_src="$(realpath "$1")" 11 | eigen_src="$(realpath "$2")" 12 | build_dir="$(realpath "$3")" 13 | output_file="$(realpath "$4")" 14 | 15 | # Eigen 16 | eigen_build="${build_dir}/eigen" 17 | echo "Building eigen in ${eigen_build} from ${eigen_src}" 18 | mkdir -p "${eigen_build}" 19 | tar -C "${eigen_build}" --strip-components=1 -xf "${eigen_src}" 20 | cd "${eigen_build}" && \ 21 | mkdir -p build && \ 22 | cd build && \ 23 | cmake .. && \ 24 | make -j "${MAKE_THREADS}" 25 | 26 | # KenLM 27 | kenlm_build="${build_dir}/kenlm" 28 | echo "Building KenLM in ${kenlm_build} from ${kenlm_src}" 29 | mkdir -p "${kenlm_build}" 30 | tar -C "${kenlm_build}" --strip-components=1 -xf "${kenlm_src}" 31 | cd "${kenlm_build}" && \ 32 | mkdir -p build && \ 33 | cd build && \ 34 | cmake .. && \ 35 | make -j "${MAKE_THREADS}" 36 | 37 | # Strip binaries 38 | echo "Tar-ing binary files to ${output_file}" 39 | cd "${kenlm_build}/build/bin" && \ 40 | (strip --strip-unneeded -- * || true) && \ 41 | tar -czf "${output_file}" -- * 42 | 43 | -------------------------------------------------------------------------------- /scripts/build/build-opengrm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | : "${MAKE_THREADS=4}" 4 | 5 | if [[ -z "$4" ]]; then 6 | echo "Usage: build-opengrm opengrm.tar.gz openfst.tar.gz build/ output.tar.gz" 7 | exit 1 8 | fi 9 | 10 | opengrm_src="$(realpath "$1")" 11 | openfst_src="$(realpath "$2")" 12 | build_dir="$(realpath "$3")" 13 | output_file="$(realpath "$4")" 14 | 15 | # OpenFST 16 | openfst_build="${build_dir}/opengrm/openfst" 17 | echo "Building OpenFST in ${openfst_build} from ${openfst_src}" 18 | mkdir -p "${openfst_build}" 19 | tar -C "${openfst_build}" --strip-components=1 -xf "${openfst_src}" 20 | cd "${openfst_build}" && \ 21 | mkdir -p build && \ 22 | ./configure "--prefix=${openfst_build}/build" \ 23 | --enable-static=no \ 24 | --enable-far && \ 25 | make -j "${MAKE_THREADS}" && \ 26 | make install 27 | 28 | # Opengrm 29 | opengrm_build="${build_dir}/opengrm" 30 | echo "Building Opengrm in ${opengrm_build} from ${opengrm_src}" 31 | mkdir -p "${opengrm_build}" 32 | tar -C "${opengrm_build}" --strip-components=1 -xf "${opengrm_src}" 33 | cd "${opengrm_build}" && \ 34 | mkdir -p build && \ 35 | CXXFLAGS="-I${openfst_build}/build/include" LDFLAGS="-L${openfst_build}/build/lib" ./configure "--prefix=${opengrm_build}/build" && \ 36 | make -j "${MAKE_THREADS}" && \ 37 | make install 38 | 39 | # Strip binaries 40 | echo "Tar-ing binary files to ${output_file}" 41 | cd "${opengrm_build}/build" && \ 42 | cp "${openfst_build}/build/bin"/* bin/ && 43 | cp "${openfst_build}/build/lib"/*.so* lib/ && 44 | rm -f lib/*.a lib/fst/*.a && \ 45 | (strip --strip-unneeded -- bin/* lib/* lib/fst/* || true) && \ 46 | tar -czf "${output_file}" -- * 47 | 48 | -------------------------------------------------------------------------------- /scripts/build/build-phonetisaurus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | : "${MAKE_THREADS=4}" 4 | 5 | if [[ -z "$4" ]]; then 6 | echo "Usage: build-phonetisaurus phonetisaurus.tar.gz openfst.tar.gz build/ output.tar.gz" 7 | exit 1 8 | fi 9 | 10 | phonetisaurus_src="$(realpath "$1")" 11 | openfst_src="$(realpath "$2")" 12 | build_dir="$(realpath "$3")" 13 | output_file="$(realpath "$4")" 14 | 15 | # OpenFST 16 | openfst_build="${build_dir}/phonetisaurus/openfst" 17 | echo "Building OpenFST in ${openfst_build} from ${openfst_src}" 18 | mkdir -p "${openfst_build}" 19 | tar -C "${openfst_build}" --strip-components=1 -xf "${openfst_src}" 20 | cd "${openfst_build}" && \ 21 | mkdir -p build && \ 22 | ./configure "--prefix=${openfst_build}/build" \ 23 | --enable-static \ 24 | --enable-shared \ 25 | --enable-far \ 26 | --enable-ngram-fsts && \ 27 | make -j "${MAKE_THREADS}" && \ 28 | make install 29 | 30 | # Phonetisaurus 31 | phonetisaurus_build="${build_dir}/phonetisaurus" 32 | echo "Building Phonetisaurus in ${phonetisaurus_build} from ${phonetisaurus_src}" 33 | mkdir -p "${phonetisaurus_build}" 34 | tar -C "${phonetisaurus_build}" --strip-components=1 -xf "${phonetisaurus_src}" 35 | cd "${phonetisaurus_build}" && \ 36 | mkdir -p build && \ 37 | ./configure "--prefix=${phonetisaurus_build}/build" \ 38 | "--with-openfst-includes=${openfst_build}/build/include" \ 39 | "--with-openfst-libs=${openfst_build}/build/lib" && \ 40 | make -j "${MAKE_THREADS}" && \ 41 | make install 42 | 43 | # Strip binaries 44 | echo "Tar-ing binary files to ${output_file}" 45 | cd "${phonetisaurus_build}/build" && \ 46 | mkdir -p bin lib && \ 47 | cp "${openfst_build}/build/bin"/* bin/ && 48 | cp "${openfst_build}/build/lib"/*.so* lib/ && 49 | rm -f lib/*.a lib/fst/*.a && \ 50 | (strip --strip-unneeded -- bin/* lib/* || true) && \ 51 | tar -czf "${output_file}" -- * 52 | 53 | -------------------------------------------------------------------------------- /scripts/check-code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/..")" 7 | 8 | venv="${src_dir}/.venv" 9 | if [[ -d "${venv}" ]]; then 10 | source "${venv}/bin/activate" 11 | fi 12 | 13 | code_dir="${src_dir}/voice2json" 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | flake8 "${code_dir}"/*.py 18 | pylint "${code_dir}"/*.py 19 | mypy "${code_dir}"/*.py 20 | black --check "${code_dir}" 21 | isort --check-only "${code_dir}"/*.py 22 | 23 | # ----------------------------------------------------------------------------- 24 | 25 | echo "OK" 26 | -------------------------------------------------------------------------------- /scripts/create-venv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "${PIP_INSTALL}" ]]; then 5 | PIP_INSTALL='install --upgrade' 6 | fi 7 | 8 | # Directory of *this* script 9 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 10 | src_dir="$(realpath "${this_dir}/..")" 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | target_arch="$1" 15 | if [[ -z "${target_arch}" ]]; then 16 | target_arch="$(bash "${src_dir}/architecture.sh")" 17 | fi 18 | 19 | if [[ -z "${venv}" ]]; then 20 | venv="${src_dir}/.venv" 21 | fi 22 | 23 | if [[ -z "${download}" ]]; then 24 | download="${src_dir}/download" 25 | fi 26 | 27 | # ----------------------------------------------------------------------------- 28 | 29 | if [[ ! -d "${venv}" ]]; then 30 | # Create virtual environment 31 | echo "Creating virtual environment at ${venv}" 32 | python3 -m venv "${venv}" 33 | fi 34 | 35 | source "${venv}/bin/activate" 36 | 37 | # Directory where pre-compiled binaries will be installed 38 | mkdir -p "${venv}/tools" 39 | 40 | # Install Python dependencies 41 | echo 'Installing Python dependencies' 42 | pip3 ${PIP_INSTALL} pip 43 | pip3 ${PIP_INSTALL} wheel setuptools 44 | 45 | # Install local Rhasspy dependencies if available 46 | grep '^rhasspy-' "${src_dir}/requirements.txt" | \ 47 | xargs pip3 ${PIP_INSTALL} -f "${download}" 48 | 49 | # Pocketsphinx 50 | if [[ -s "${download}/pocketsphinx-python.tar.gz" ]]; then 51 | echo 'Installing pocketsphinx' 52 | # Only install if not already present in venv 53 | if [[ -z "$(pip3 freeze | grep '^pocketsphinx==0.1.15$')" ]]; then 54 | pip3 ${PIP_INSTALL} "${download}/pocketsphinx-python.tar.gz" 55 | fi 56 | fi 57 | 58 | # Opengrm 59 | opengrm_file="${download}/opengrm-1.3.4-${target_arch}.tar.gz" 60 | if [[ -n "$(command -v ngramcount)" ]]; then 61 | echo 'Installing Opengrm' 62 | "${src_dir}/scripts/install-opengrm.sh" \ 63 | "${opengrm_file}" \ 64 | "${venv}/tools" 65 | fi 66 | 67 | # Phonetisaurus 68 | phonetisaurus_file="${download}/phonetisaurus-2019-${target_arch}.tar.gz" 69 | if [[ -n "$(command -v phonetisaurus-apply)" ]]; then 70 | echo 'Installing Phonetisaurus' 71 | "${src_dir}/scripts/install-phonetisaurus.sh" \ 72 | "${phonetisaurus_file}" \ 73 | "${venv}/tools" 74 | fi 75 | 76 | # Kaldi 77 | kaldi_file="${download}/kaldi-2021-${target_arch}.tar.gz" 78 | if [[ -s "${kaldi_file}" ]]; then 79 | echo 'Installing Kaldi' 80 | "${src_dir}/scripts/install-kaldi.sh" \ 81 | "${kaldi_file}" \ 82 | "${venv}/tools" 83 | fi 84 | 85 | # Mycroft Precise 86 | precise_file="${download}/precise-engine_0.3.0_${target_arch}.tar.gz" 87 | if [[ -s "${precise_file}" ]]; then 88 | echo 'Installing Mycroft Precise' 89 | "${src_dir}/scripts/install-precise.sh" \ 90 | "${precise_file}" \ 91 | "${venv}/tools" 92 | fi 93 | 94 | # Mozilla DeepSpeech 95 | deepspeech_file="${download}/native_client.${target_arch}.cpu.linux.0.9.3.tar.xz" 96 | if [[ -s "${deepspeech_file}" ]]; then 97 | echo 'Installing DeepSpeech Native Client' 98 | "${src_dir}/scripts/install-deepspeech.sh" \ 99 | "${deepspeech_file}" \ 100 | "${venv}/tools" 101 | fi 102 | 103 | # KenLM 104 | kenlm_file="${download}/kenlm-20210107_${target_arch}.tar.gz" 105 | if [[ -s "${kenlm_file}" ]]; then 106 | echo 'Installing Kenlm' 107 | "${src_dir}/scripts/install-kenlm.sh" \ 108 | "${kenlm_file}" \ 109 | "${venv}/tools" 110 | fi 111 | 112 | # Julius 113 | julius_file="${download}/julius-4.5_${target_arch}.tar.gz" 114 | if [[ -s "${julius_file}" ]]; then 115 | echo 'Installing Julius' 116 | "${src_dir}/scripts/install-julius.sh" \ 117 | "${julius_file}" \ 118 | "${venv}/tools" 119 | fi 120 | 121 | echo 'Installing requirements' 122 | pip3 ${PIP_INSTALL} -r requirements.txt 123 | 124 | # Optional development requirements 125 | echo 'Installing development requirements' 126 | if [[ -f 'requirements_dev.txt' ]]; then 127 | pip3 ${PIP_INSTALL} -r requirements_dev.txt || \ 128 | echo "Failed to install development requirements" 129 | fi 130 | 131 | # ----------------------------------------------------------------------------- 132 | 133 | echo "OK" 134 | -------------------------------------------------------------------------------- /scripts/format-code.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/..")" 7 | 8 | venv="${src_dir}/.venv" 9 | if [[ -d "${venv}" ]]; then 10 | source "${venv}/bin/activate" 11 | fi 12 | 13 | code_dir="${src_dir}/voice2json" 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | black "${code_dir}" 18 | isort "${code_dir}"/*.py 19 | 20 | # ----------------------------------------------------------------------------- 21 | 22 | echo "OK" 23 | -------------------------------------------------------------------------------- /scripts/install/install-deepspeech.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-deepspeech.sh native_client.tar.xz output-dir/" 6 | exit 1 7 | fi 8 | 9 | native_client="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | # Create a temporary directory for extraction 15 | temp_dir="$(mktemp -d)" 16 | 17 | function cleanup { 18 | rm -rf "${temp_dir}" 19 | } 20 | 21 | trap cleanup EXIT 22 | 23 | # ----------------------------------------------------------------------------- 24 | 25 | tar -C "${temp_dir}" -xf "${native_client}" 26 | install -D "--target-directory=${output}/bin" -- "${temp_dir}/generate_scorer_package" "${temp_dir}/deepspeech" 27 | install -D "--target-directory=${output}/lib" -- "${temp_dir}"/*.so* 28 | install -D "--target-directory=${output}/include" -- "${temp_dir}"/*.h 29 | -------------------------------------------------------------------------------- /scripts/install/install-julius.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-julius.sh julius.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | julius="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | mkdir -p "${output}/bin" 13 | tar -C "${output}/bin" -xf "${julius}" 14 | -------------------------------------------------------------------------------- /scripts/install/install-kaldi.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-kaldi.sh kaldi.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | kaldi="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | mkdir -p "${output}/lib/kaldi" 13 | tar -C "${output}/lib/kaldi" -xf "${kaldi}" --strip-components=1 14 | -------------------------------------------------------------------------------- /scripts/install/install-kenlm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-kenlm.sh kenlm.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | kenlm="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | mkdir -p "${output}/bin" 13 | tar -C "${output}/bin" -xf "${kenlm}" 14 | -------------------------------------------------------------------------------- /scripts/install/install-opengrm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-opengrm.sh opengrm.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | opengrm="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | # Create a temporary directory for extraction 15 | temp_dir="$(mktemp -d)" 16 | 17 | function cleanup { 18 | rm -rf "${temp_dir}" 19 | } 20 | 21 | trap cleanup EXIT 22 | 23 | # ----------------------------------------------------------------------------- 24 | 25 | tar -C "${temp_dir}" -xf "${opengrm}" 26 | install -D "--target-directory=${output}/bin" -- "${temp_dir}/bin"/* 27 | 28 | mkdir -p "${output}/lib" 29 | cp -a "${temp_dir}/lib"/*.so* "${output}/lib/" 30 | -------------------------------------------------------------------------------- /scripts/install/install-phonetisaurus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-phonetisaurus.sh phonetisaurus.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | phonetisaurus="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | # Create a temporary directory for extraction 15 | temp_dir="$(mktemp -d)" 16 | 17 | function cleanup { 18 | rm -rf "${temp_dir}" 19 | } 20 | 21 | trap cleanup EXIT 22 | 23 | # ----------------------------------------------------------------------------- 24 | 25 | tar -C "${temp_dir}" -xf "${phonetisaurus}" 26 | install -D "--target-directory=${output}/bin" -- "${temp_dir}/bin"/* 27 | 28 | mkdir -p "${output}/lib" 29 | cp -a "${temp_dir}/lib"/*.so* "${output}/lib/" 30 | -------------------------------------------------------------------------------- /scripts/install/install-precise.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$2" ]]; then 5 | echo "Usage: install-precise.sh precise-engine.tar.gz output-dir/" 6 | exit 1 7 | fi 8 | 9 | precise="$(realpath "$1")" 10 | output="$(realpath "$2")" 11 | 12 | mkdir -p "${output}/lib/precise" 13 | tar -C "${output}/lib/precise" -xf "${precise}" --strip-components=1 14 | ln -sf "${output}/lib/precise/precise-engine" "${output}/bin/precise-engine" 15 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 3 | profiles_dir="$(realpath "${this_dir}/../voice2json-profiles")" 4 | 5 | if [[ ! -d "${profiles_dir}" ]]; then 6 | echo "Expected profiles at ${profiles_dir}" 7 | exit 1 8 | fi 9 | 10 | CPU_ARCH="$(uname --m)" 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | # Create temporary directory to hold profiles 15 | temp_dir="$(mktemp -d)" 16 | function finish { 17 | rm -rf "${temp_dir}" 18 | } 19 | 20 | trap finish EXIT 21 | 22 | # Copy profiles into temp directory 23 | profiles=('english/en-us_pocketsphinx-cmu' 'english/en-us_kaldi-zamia') 24 | args=() 25 | 26 | for profile_dir in "${profiles[@]}"; 27 | do 28 | dest_dir="${temp_dir}/${profile_dir}" 29 | mkdir -p "${dest_dir}" 30 | 31 | cp -R "${profiles_dir}/${profile_dir}"/* "${dest_dir}/" 32 | args+=('-p' "${dest_dir}") 33 | done 34 | 35 | # ----------------------------------------------------------------------------- 36 | 37 | export voice2json_dir="$(realpath "${this_dir}")" 38 | venv="${this_dir}/.venv" 39 | 40 | if [[ -d "${venv}" ]]; then 41 | # Use virtual environment 42 | source "${venv}/bin/activate" 43 | export LD_LIBRARY_PATH="${venv}/lib:${LD_LIBRARY_PATH}" 44 | export PATH="${venv}/bin:${PATH}" 45 | fi 46 | 47 | # Add Kaldi to library path 48 | if [[ -z "${kaldi_dir}" ]]; then 49 | kaldi_dir="${this_dir}/build_${CPU_ARCH}/kaldi-master" 50 | fi 51 | 52 | if [[ -d "${kaldi_dir}" ]]; then 53 | export LD_LIBRARY_PATH="${kaldi_dir}/src/lib:${kaldi_dir}/tools/openfst/lib:${LD_LIBRARY_PATH}" 54 | fi 55 | 56 | # ----------------------------------------------------------------------------- 57 | 58 | export PYTHONPATH="${this_dir}" 59 | export PATH="${this_dir}/etc/bin:${this_dir}/bin:${PATH}" 60 | 61 | python3 "${this_dir}/voice2json/test.py" \ 62 | "${args[@]}" 63 | -------------------------------------------------------------------------------- /scripts/test/test-all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | if [[ -z "$1" ]]; then 5 | echo "Usage: test-all.sh profiles-root [PROFILE] [PROFILE] ..." 6 | exit 1 7 | fi 8 | 9 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 10 | src_dir="$(realpath "${this_dir}/../..")" 11 | 12 | # Parse command-line 13 | profiles_root=$(realpath "$1") 14 | shift 15 | 16 | do_mixed_tests='' 17 | profiles=() 18 | while [[ ! -z "$1" ]]; do 19 | if [[ "$1" == '--mixed' ]]; then 20 | do_mixed_tests='yes' 21 | else 22 | profiles+=("$1") 23 | fi 24 | 25 | shift 26 | done 27 | 28 | # ----------------------------------------------------------------------------- 29 | 30 | # Create a temporary directory for testing 31 | temp_dir="$(mktemp -d)" 32 | 33 | function cleanup { 34 | rm -rf "${temp_dir}" 35 | } 36 | 37 | trap cleanup EXIT 38 | 39 | # ----------------------------------------------------------------------------- 40 | 41 | # Profile names to test 42 | if [[ -z "${profiles[*]}" ]]; then 43 | # All profiles 44 | profiles=('en-us_pocketsphinx-cmu' 'en-us_kaldi-zamia' 'en-us_deepspeech-mozilla') 45 | fi 46 | 47 | stt_tests=('transcribe-wav' 'open-transcription') 48 | other_tests=('recognize-intent' 'wait-wake') 49 | all_tests=("${stt_tests[@]}" "${other_tests[@]}") 50 | 51 | declare -A test_errors 52 | 53 | for profile in "${profiles[@]}"; do 54 | profile_errors=() 55 | src_profile_dir="${profiles_root}/${profile}" 56 | dest_profile_dir="${temp_dir}/${profile}" 57 | 58 | # Copy profile files 59 | echo "${src_profile_dir} => ${dest_profile_dir}" 60 | rm -rf "${dest_profile_dir}" 61 | cp -R "${src_profile_dir}" "${dest_profile_dir}" 62 | 63 | # Train 64 | echo 'Training...' 65 | voice2json -p "${dest_profile_dir}" --debug train-profile 66 | 67 | # Test 68 | echo 'Testing...' 69 | for test_name in "${all_tests[@]}"; do 70 | echo "${test_name}" 71 | "${src_dir}/scripts/test/test-${test_name}.sh" --profile "${dest_profile_dir}" --debug || \ 72 | profile_errors+=("${test_name}") 73 | done 74 | 75 | if [[ ! -z "${do_mixed_tests}" ]]; then 76 | # Train (mixed) 77 | echo 'Training (mixed)...' 78 | voice2json --profile "${dest_profile_dir}" \ 79 | --setting 'training.base-language-model-weight' '0.05' \ 80 | --debug train-profile 81 | 82 | # Test (mixed) 83 | echo 'Testing (mixed)...' 84 | for test_name in "${stt_tests[@]}"; do 85 | echo "${test_name} (mixed)" 86 | "${src_dir}/scripts/test/test-${test_name}.sh" -p "${dest_profile_dir}" --debug || \ 87 | profile_errors+=("mixed-${test_name}") 88 | done 89 | fi 90 | 91 | # ------------------------------------------------------------------------- 92 | 93 | test_errors["${profile}"]="${profile_errors[*]}" 94 | 95 | echo '-------' 96 | echo '' 97 | done 98 | 99 | # ----------------------------------------------------------------------------- 100 | 101 | echo 'Summary' 102 | echo '-------' 103 | for profile in "${profiles[@]}"; do 104 | echo "${profile}: ${test_errors["${profile}"]}" 105 | done 106 | -------------------------------------------------------------------------------- /scripts/test/test-debian.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/../..")" 7 | 8 | version="$(cat "${src_dir}/VERSION")" 9 | 10 | # ----------------------------------------------------------------------------- 11 | 12 | profiles=() 13 | 14 | if [[ -z "$1" ]]; then 15 | platforms=('linux/amd64' 'linux/arm/v7' 'linux/arm64') 16 | else 17 | on_profiles='' 18 | platforms=() 19 | 20 | while [[ ! -z "$1" ]]; do 21 | if [[ "$1" = '--' ]]; then 22 | on_profiles='yes' 23 | elif [[ -z "${on_profiles}" ]]; then 24 | platforms+=("$1") 25 | else 26 | profiles+=("$1") 27 | fi 28 | 29 | shift 30 | done 31 | fi 32 | 33 | # ----------------------------------------------------------------------------- 34 | 35 | docker buildx build "${src_dir}" \ 36 | -f "${src_dir}/Dockerfile.test.debian" \ 37 | --platform "${platforms[@]}" \ 38 | --tag "${DOCKER_REGISTRY}/synesthesiam/voice2json-debian-test" \ 39 | --push 40 | 41 | # ----------------------------------------------------------------------------- 42 | 43 | # Create a temporary directory for testing 44 | temp_dir="$(mktemp -d)" 45 | 46 | function cleanup { 47 | rm -rf "${temp_dir}" 48 | } 49 | 50 | trap cleanup EXIT 51 | 52 | # ----------------------------------------------------------------------------- 53 | 54 | declare -A platform_to_target 55 | platform_to_target=(['linux/amd64']='amd64' ['linux/arm/v6']='armv6' ['linux/arm/v7']='armv7' ['linux/arm64']='arm64') 56 | 57 | for platform in "${platforms[@]}"; do 58 | echo "${platform}" 59 | target="${platform_to_target["${platform}"]}" 60 | if [[ -z "${target}" ]]; then 61 | echo "ERROR: ${platform}" 62 | exit 1 63 | fi 64 | 65 | docker pull \ 66 | --platform "${platform}" \ 67 | "${DOCKER_REGISTRY}/synesthesiam/voice2json-debian-test" 68 | 69 | target_dir="${temp_dir}/${target}" 70 | rm -rf "${target_dir}" 71 | mkdir -p "${target_dir}" 72 | 73 | # Create voice2json script for testing 74 | echo '#!/usr/bin/env bash 75 | docker run -i \ 76 | --platform "${voice2json_platform}" \ 77 | -v "${HOME}:${HOME}" \ 78 | -e "HOME=${HOME}" \ 79 | --user "$(id -u):$(id -g)" \ 80 | "${DOCKER_REGISTRY}/synesthesiam/voice2json-debian-test" "$@" 81 | ' > "${target_dir}/voice2json" 82 | chmod +x "${target_dir}/voice2json" 83 | 84 | export voice2json_platform="${platform}" 85 | export voice2json_version="${version}" 86 | 87 | # Execute test scripts 88 | PATH="${target_dir}:${PATH}" \ 89 | TMPDIR="${HOME}/.cache" \ 90 | "${src_dir}/scripts/test/test-all.sh" "${HOME}/opt/voice2json-profiles/english" "${profiles[@]}" 91 | done 92 | -------------------------------------------------------------------------------- /scripts/test/test-docker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Directory of *this* script 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | src_dir="$(realpath "${this_dir}/../..")" 7 | 8 | # ----------------------------------------------------------------------------- 9 | 10 | : "${PLATFORMS=linux/amd64,linux/arm/v7,linux/arm64}" 11 | : "${DOCKER_REGISTRY=docker.io}" 12 | 13 | IFS=',' read -ra platforms <<< "${PLATFORMS}" 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | # Create a temporary directory for testing 18 | temp_dir="$(mktemp -d)" 19 | 20 | function cleanup { 21 | rm -rf "${temp_dir}" 22 | } 23 | 24 | trap cleanup EXIT 25 | 26 | # ----------------------------------------------------------------------------- 27 | 28 | declare -A platform_to_target 29 | platform_to_target=(['linux/amd64']='amd64' ['linux/arm/v6']='armv6' ['linux/arm/v7']='armv7' ['linux/arm64']='arm64') 30 | 31 | for platform in "${platforms[@]}"; do 32 | echo "${platform}" 33 | target="${platform_to_target["${platform}"]}" 34 | if [[ -z "${target}" ]]; then 35 | echo "ERROR: ${platform}" 36 | exit 1 37 | fi 38 | 39 | docker pull \ 40 | --platform "${platform}" \ 41 | "${DOCKER_REGISTRY}/synesthesiam/voice2json" 42 | 43 | target_dir="${temp_dir}/${target}" 44 | rm -rf "${target_dir}" 45 | mkdir -p "${target_dir}" 46 | 47 | # Create voice2json script for testing 48 | echo '#!/usr/bin/env bash 49 | docker run -i \ 50 | --platform "${voice2json_platform}" \ 51 | -v "${HOME}:${HOME}" \ 52 | -e "HOME=${HOME}" \ 53 | --user "$(id -u):$(id -g)" \ 54 | "${DOCKER_REGISTRY}/synesthesiam/voice2json" "$@" 55 | ' > "${target_dir}/voice2json" 56 | chmod +x "${target_dir}/voice2json" 57 | 58 | export voice2json_platform="${platform}" 59 | 60 | # Execute test scripts 61 | PATH="${target_dir}:${PATH}" \ 62 | TMPDIR="${HOME}/.cache" \ 63 | "${src_dir}/scripts/test/test-all.sh" "${HOME}/opt/voice2json-profiles/english" 64 | done 65 | -------------------------------------------------------------------------------- /scripts/test/test-open-transcription.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | expected='would you please turn on the living room lamp' 8 | actual="$(voice2json "$@" transcribe-wav --open "${src_dir}/etc/test/would_you_please_turn_on_living_room_lamp.wav" | jq -r .text)" 9 | 10 | if [[ "${actual}" != "${expected}" ]]; then 11 | echo "Expected '${expected}' got '${actual}'" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/test/test-print-profile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | voice2json "$@" print-profile | jq . > /dev/null 5 | -------------------------------------------------------------------------------- /scripts/test/test-print-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | expected="$(cat "${src_dir}/VERSION")" 8 | actual="$(voice2json "$@" print-version)" 9 | 10 | if [[ "${actual}" != "${expected}" ]]; then 11 | echo "Expected '${expected}' got '${actual}'" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/test/test-pronounce-word.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | word='raxacoricofallipatorius' 8 | actual="$(voice2json "$@" pronounce-word --quiet --nbest 1 "${word}")" 9 | 10 | if [[ ! "${actual}" =~ ^${word} ]]; then 11 | echo "Expected '${word} ...' got '${actual}'" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/test/test-recognize-intent.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | expected='ChangeLightState' 8 | actual="$(voice2json "$@" recognize-intent --text 'turn on the living room lamp' | jq -r .intent.name)" 9 | 10 | if [[ "${actual}" != "${expected}" ]]; then 11 | echo "Expected '${expected}' got '${actual}'" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/test/test-transcribe-wav.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | expected='turn on the living room lamp' 8 | actual="$(voice2json "$@" transcribe-wav "${src_dir}/etc/test/turn_on_living_room_lamp.wav" | jq -r .text)" 9 | 10 | if [[ "${actual}" != "${expected}" ]]; then 11 | echo "Expected '${expected}' got '${actual}'" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/test/test-wait-wake.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 5 | src_dir="$(realpath "${this_dir}/../..")" 6 | 7 | actual="$(sox "${src_dir}/etc/test/hey_mycroft.wav" -r 16000 -e signed-integer -c 1 -t raw - | voice2json "$@" wait-wake --exit-count 1 --audio-source - | jq -r .keyword)" 8 | 9 | if [[ -z "${actual}" ]]; then 10 | echo "Got empty result" 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # To work with Black 3 | max-line-length = 88 4 | # E501: line too long 5 | # W503: Line break occurred before a binary operator 6 | # E203: Whitespace before ':' 7 | # D202 No blank lines allowed after function docstring 8 | # W504 line break after binary operator 9 | ignore = 10 | E501, 11 | W503, 12 | E203, 13 | D202, 14 | W504 15 | -------------------------------------------------------------------------------- /setup.py.in: -------------------------------------------------------------------------------- 1 | """Setup script for voice2json""" 2 | import os 3 | from pathlib import Path 4 | 5 | import setuptools 6 | 7 | this_dir = Path(__file__).parent 8 | 9 | # ----------------------------------------------------------------------------- 10 | 11 | # Load README in as long description 12 | long_description: str = "" 13 | readme_path = this_dir / "README.md" 14 | if readme_path.is_file(): 15 | long_description = readme_path.read_text() 16 | 17 | # ----------------------------------------------------------------------------- 18 | 19 | 20 | def is_yes(s): 21 | """True if string is yes (from configure.ac)""" 22 | return s.lower().strip() == "yes" 23 | 24 | 25 | # ----------------------------------------------------------------------------- 26 | 27 | requirements_path = this_dir / "requirements.txt" 28 | with open(requirements_path, "r") as requirements_file: 29 | requirements = requirements_file.read().splitlines() 30 | 31 | enable_pocketsphinx = is_yes("@ENABLE_POCKETSPHINX@") 32 | if not enable_pocketsphinx: 33 | # Disable pocketsphinx 34 | requirements = [ 35 | r for r in requirements if not r.startswith("rhasspy-asr-pocketsphinx") 36 | ] 37 | 38 | enable_kaldi = is_yes("@ENABLE_KALDI@") 39 | if not enable_kaldi: 40 | # Disable Kaldi 41 | requirements = [ 42 | r for r in requirements if not r.startswith("rhasspy-asr-kaldi") 43 | ] 44 | 45 | enable_deepspeech = is_yes("@ENABLE_DEEPSPEECH@") 46 | if not enable_deepspeech: 47 | # Disable Mozilla's DeepSpeech 48 | requirements = [ 49 | r for r in requirements if not r.startswith("rhasspy-asr-deepspeech") 50 | ] 51 | 52 | # ----------------------------------------------------------------------------- 53 | 54 | setuptools.setup( 55 | name="@PACKAGE_NAME@", 56 | version="@PACKAGE_VERSION@", 57 | author="Michael Hansen", 58 | author_email="@PACKAGE_BUGREPORT@", 59 | url="https://voice2json.org", 60 | packages=setuptools.find_packages(), 61 | package_data={"@PACKAGE_NAME@": ["py.typed"]}, 62 | install_requires=requirements, 63 | classifiers=[ 64 | "Programming Language :: Python :: 3", 65 | "Programming Language :: Python :: 3.7", 66 | "Programming Language :: Python :: 3.8", 67 | "License :: OSI Approved :: MIT License", 68 | ], 69 | long_description=long_description, 70 | long_description_content_type="text/markdown", 71 | python_requires=">=3.7", 72 | ) 73 | -------------------------------------------------------------------------------- /voice2json.sh.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | in_place="@IN_PLACE@" 3 | if [[ "${in_place}" == 'yes' ]]; then 4 | # Run directly from source code 5 | this_dir="$( cd "$( dirname "$0" )" && pwd )" 6 | "${this_dir}/bin/voice2json" "$@" 7 | else 8 | # Use virtual environment as a prefix 9 | prefix="@prefix@" 10 | virtualenv="@VIRTUALENV@" 11 | export voice2json_dir="${prefix}/share/voice2json" 12 | 13 | : "${PYTHON=python3}" 14 | 15 | if [[ "${virtualenv}" = 'yes' ]]; then 16 | # Use virtual environment 17 | source "${prefix}/bin/activate" 18 | PYTHON="${prefix}/bin/python" 19 | fi 20 | 21 | export LD_LIBRARY_PATH="${prefix}/lib:${LD_LIBRARY_PATH}" 22 | export PATH="${prefix}/bin:${PATH}" 23 | export KALDI_DIR="${prefix}/lib/kaldi" 24 | 25 | "${PYTHON}" -m voice2json "$@" 26 | fi 27 | -------------------------------------------------------------------------------- /voice2json.spec.in: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | import os 3 | import site 4 | from pathlib import Path 5 | 6 | from PyInstaller.utils.hooks import copy_metadata 7 | 8 | block_cipher = None 9 | 10 | prefix = Path("@prefix@") 11 | 12 | site_dirs = site.getsitepackages() 13 | lib_dir = prefix / "lib" 14 | for lib_python_dir in lib_dir.glob("python*"): 15 | site_dir = lib_python_dir / "site-packages" 16 | if site_dir.is_dir(): 17 | site_dirs.append(site_dir) 18 | 19 | # Look for compiled artifacts 20 | artifacts = ["_webrtcvad.*.so"] 21 | found_artifacts = {} 22 | for site_dir in site_dirs: 23 | site_dir = Path(site_dir) 24 | for artifact in artifacts: 25 | artifact_paths = list(site_dir.glob(artifact)) 26 | if artifact_paths: 27 | found_artifacts[artifact] = artifact_paths[0] 28 | continue 29 | 30 | missing_artifacts = set(artifacts) - set(found_artifacts) 31 | assert not missing_artifacts, missing_artifacts 32 | 33 | a = Analysis( 34 | [Path.cwd() / "__main__.py"], 35 | pathex=["."], 36 | binaries=[(p, ".") for p in found_artifacts.values()], 37 | datas=copy_metadata("webrtcvad"), 38 | hiddenimports=["networkx"], 39 | hookspath=[], 40 | runtime_hooks=[], 41 | excludes=[], 42 | win_no_prefer_redirects=False, 43 | win_private_assemblies=False, 44 | cipher=block_cipher, 45 | noarchive=False, 46 | ) 47 | pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) 48 | exe = EXE( 49 | pyz, 50 | a.scripts, 51 | [], 52 | exclude_binaries=True, 53 | name="voice2json", 54 | debug=False, 55 | bootloader_ignore_signals=False, 56 | strip=True, 57 | upx=True, 58 | console=True, 59 | ) 60 | coll = COLLECT( 61 | exe, a.binaries, a.zipfiles, a.datas, strip=True, upx=True, name="voice2json" 62 | ) 63 | -------------------------------------------------------------------------------- /voice2json/__init__.py: -------------------------------------------------------------------------------- 1 | """voice2json module.""" 2 | -------------------------------------------------------------------------------- /voice2json/generate.py: -------------------------------------------------------------------------------- 1 | """Methods for generating examples.""" 2 | import argparse 3 | import dataclasses 4 | import gzip 5 | import logging 6 | 7 | from .core import Voice2JsonCore 8 | from .utils import dag_paths_random, itershuffle, print_json 9 | 10 | _LOGGER = logging.getLogger("voice2json.generate") 11 | 12 | # ----------------------------------------------------------------------------- 13 | 14 | 15 | async def generate(args: argparse.Namespace, core: Voice2JsonCore) -> None: 16 | """Generate randomish examples from intent graph.""" 17 | import networkx as nx 18 | import rhasspynlu 19 | 20 | # Make sure profile has been trained 21 | assert core.check_trained(), "Not trained" 22 | 23 | # Load settings 24 | intent_graph_path = core.ppath( 25 | "intent-recognition.intent-graph", "intent.pickle.gz" 26 | ) 27 | 28 | # Load intent graph 29 | _LOGGER.debug("Loading %s", intent_graph_path) 30 | with gzip.GzipFile(intent_graph_path, mode="rb") as graph_gzip: 31 | intent_graph = nx.readwrite.gpickle.read_gpickle(graph_gzip) 32 | 33 | start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes(intent_graph) 34 | assert (start_node is not None) and ( 35 | end_node is not None 36 | ), "Missing start/end node(s)" 37 | 38 | paths_left = None 39 | if args.number > 0: 40 | paths_left = args.number 41 | 42 | # Iterate through all paths 43 | for path in itershuffle(dag_paths_random(intent_graph, start_node, end_node)): 44 | if paths_left is not None: 45 | paths_left -= 1 46 | if paths_left < 0: 47 | # Stop iterating 48 | break 49 | 50 | if args.raw_symbols: 51 | # Output labels directly from intent graph 52 | symbols = [] 53 | for from_node, to_node in rhasspynlu.utils.pairwise(path): 54 | edge_data = intent_graph.edges[(from_node, to_node)] 55 | olabel = edge_data.get("olabel") 56 | if olabel: 57 | symbols.append(olabel) 58 | 59 | print(" ".join(symbols)) 60 | continue 61 | 62 | # Convert to intent 63 | _, recognition = rhasspynlu.fsticuffs.path_to_recognition(path, intent_graph) 64 | if not recognition: 65 | _LOGGER.warning("Recognition failed for path: %s", path) 66 | continue 67 | 68 | intent = dataclasses.asdict(recognition) 69 | 70 | # Add slots 71 | intent["slots"] = {} 72 | for ev in intent["entities"]: 73 | intent["slots"][ev["entity"]] = ev["value"] 74 | 75 | if args.iob: 76 | # IOB format 77 | token_idx = 0 78 | entity_start = {ev["start"]: ev for ev in intent["entities"]} 79 | entity_end = {ev["end"]: ev for ev in intent["entities"]} 80 | entity = None 81 | 82 | word_tags = [] 83 | for word in intent["tokens"]: 84 | # Determine tag label 85 | tag = "O" if not entity else f"I-{entity}" 86 | if token_idx in entity_start: 87 | entity = entity_start[token_idx]["entity"] 88 | tag = f"B-{entity}" 89 | 90 | word_tags.append((word, tag)) 91 | 92 | # word ner 93 | token_idx += len(word) + 1 94 | 95 | if (token_idx - 1) in entity_end: 96 | entity = None 97 | 98 | print("BS", end=" ") 99 | for wt in word_tags: 100 | print(wt[0], end=" ") 101 | print("ES", end="\t") 102 | 103 | print("O", end=" ") # BS 104 | for wt in word_tags: 105 | print(wt[1], end=" ") 106 | print("O", end="\t") # ES 107 | 108 | # Intent name last 109 | print(intent["intent"]["name"]) 110 | else: 111 | # Write as jsonl 112 | print_json(intent) 113 | -------------------------------------------------------------------------------- /voice2json/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/synesthesiam/voice2json/03996c94113a615182adbbb1c83834124833579a/voice2json/py.typed -------------------------------------------------------------------------------- /voice2json/speak.py: -------------------------------------------------------------------------------- 1 | """Text to speech methods for voice2json command-line interface.""" 2 | import argparse 3 | import asyncio 4 | import logging 5 | import shlex 6 | import sys 7 | import typing 8 | 9 | import pydash 10 | 11 | from .core import Voice2JsonCore 12 | 13 | _LOGGER = logging.getLogger("voice2json.tts") 14 | 15 | # ----------------------------------------------------------------------------- 16 | 17 | 18 | async def speak(args: argparse.Namespace, core: Voice2JsonCore) -> None: 19 | """Speak one or more sentences using text to speech.""" 20 | if args.marytts: 21 | marytts_voice = pydash.get(core.profile, "text-to-speech.marytts.voice") 22 | assert marytts_voice, "No MaryTTS voice" 23 | await speak_marytts(args, core, marytts_voice) 24 | else: 25 | await speak_espeak(args, core) 26 | 27 | 28 | # ----------------------------------------------------------------------------- 29 | 30 | 31 | async def speak_espeak(args: argparse.Namespace, core: Voice2JsonCore) -> None: 32 | """Speak one or more sentences using eSpeak.""" 33 | voice = pydash.get(core.profile, "text-to-speech.espeak.voice") 34 | espeak_cmd_format = pydash.get(core.profile, "text-to-speech.espeak.speak-command") 35 | play_command = shlex.split(pydash.get(core.profile, "audio.play-command")) 36 | 37 | # Process sentence(s) 38 | if len(args.sentence) > 0: 39 | sentences = args.sentence 40 | else: 41 | sentences = sys.stdin 42 | 43 | for sentence in sentences: 44 | sentence = sentence.strip() 45 | espeak_cmd = shlex.split(espeak_cmd_format.format(sentence=sentence)) 46 | espeak_cmd.append("--stdout") 47 | 48 | if voice is not None: 49 | espeak_cmd.extend(["-v", str(voice)]) 50 | 51 | _LOGGER.debug(espeak_cmd) 52 | speak_process = await asyncio.create_subprocess_exec( 53 | *espeak_cmd, stdout=asyncio.subprocess.PIPE 54 | ) 55 | wav_data, _ = await speak_process.communicate() 56 | 57 | if args.wav_sink is not None: 58 | # Write WAV output somewhere 59 | if args.wav_sink == "-": 60 | # STDOUT 61 | wav_sink = sys.stdout.buffer 62 | else: 63 | # File output 64 | wav_sink = open(args.wav_sink, "wb") 65 | 66 | wav_sink.write(wav_data) 67 | wav_sink.flush() 68 | else: 69 | _LOGGER.debug(play_command) 70 | 71 | # Speak sentence 72 | print(sentence) 73 | play_process = await asyncio.create_subprocess_exec( 74 | *play_command, stdin=asyncio.subprocess.PIPE 75 | ) 76 | await play_process.communicate(input=wav_data) 77 | 78 | 79 | # ----------------------------------------------------------------------------- 80 | 81 | 82 | async def speak_marytts( 83 | args: argparse.Namespace, core: Voice2JsonCore, marytts_voice: str 84 | ) -> None: 85 | """Speak one or more sentences using MaryTTS.""" 86 | play_command = shlex.split(pydash.get(core.profile, "audio.play-command")) 87 | 88 | marytts_locale = pydash.get( 89 | core.profile, 90 | "text-to-speech.marytts.locale", 91 | pydash.get(core.profile, "language.code"), 92 | ) 93 | marytts_url = str( 94 | pydash.get( 95 | core.profile, 96 | "text-to-speech.marytts.process-url", 97 | "http://localhost:59125/process", 98 | ) 99 | ) 100 | 101 | # Set up default params 102 | marytts_params: typing.Dict[str, str] = { 103 | "INPUT_TEXT": "", 104 | "INPUT_TYPE": "TEXT", 105 | "AUDIO": "WAVE", 106 | "OUTPUT_TYPE": "AUDIO", 107 | "VOICE": marytts_voice, 108 | } 109 | 110 | if marytts_locale is not None: 111 | marytts_params["LOCALE"] = marytts_locale 112 | 113 | # Process sentence(s) 114 | if args.sentence: 115 | sentences = args.sentence 116 | else: 117 | sentences = sys.stdin 118 | 119 | for sentence in sentences: 120 | sentence = sentence.strip() 121 | marytts_params["INPUT_TEXT"] = sentence 122 | 123 | # Do GET requests 124 | _LOGGER.debug("%s %s", marytts_url, marytts_params) 125 | async with core.http_session.get( 126 | marytts_url, params=marytts_params, ssl=core.ssl_context 127 | ) as response: 128 | data = await response.read() 129 | if response.status != 200: 130 | # Print error message 131 | _LOGGER.error(data.decode()) 132 | 133 | response.raise_for_status() 134 | 135 | wav_data = data 136 | if args.wav_sink is not None: 137 | # Write WAV output somewhere 138 | if args.wav_sink == "-": 139 | # STDOUT 140 | wav_sink = sys.stdout.buffer 141 | else: 142 | # File output 143 | wav_sink = open(args.wav_sink, "wb") 144 | 145 | wav_sink.write(wav_data) 146 | wav_sink.flush() 147 | else: 148 | _LOGGER.debug(play_command) 149 | 150 | # Speak sentence 151 | print(sentence) 152 | play_process = await asyncio.create_subprocess_exec( 153 | *play_command, stdin=asyncio.subprocess.PIPE 154 | ) 155 | await play_process.communicate(input=wav_data) 156 | --------------------------------------------------------------------------------