├── .env ├── renovate.json ├── nemo ├── gpu.conf └── Dockerfile.rocm ├── hitz ├── gpu.conf └── Dockerfile.rocm ├── whisper ├── gpu.conf ├── cpu.conf ├── Dockerfile ├── Dockerfile.cuda └── Dockerfile.rocm ├── mynorthai ├── cpu.conf ├── gpu.conf ├── Dockerfile ├── Dockerfile.cuda └── Dockerfile.rocm ├── project-aina-whisper ├── gpu.conf ├── Dockerfile.cuda └── Dockerfile.rocm ├── fasterwhisper ├── cpu.conf ├── gpu.conf ├── Dockerfile ├── Dockerfile.rocm └── Dockerfile.cuda ├── vosk └── Dockerfile ├── chromium └── Dockerfile ├── deepgram └── Dockerfile ├── base ├── Dockerfile.rocm ├── Dockerfile └── Dockerfile.cuda ├── citrinet └── Dockerfile ├── docker-compose.cuda.yml ├── docker-compose.yml ├── docker-compose.rocm.yml └── README.md /.env: -------------------------------------------------------------------------------- 1 | CONFIG_FOLDER=~/ovos-tts-stt/config 2 | OVOS_USER=ovos 3 | TZ=America/Montreal 4 | VERSION=alpha 5 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /nemo/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-nemo", 4 | "ovos-stt-plugin-nemo": { 5 | "use_cuda":true 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /hitz/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-nemo", 4 | "ovos-stt-plugin-nemo": { 5 | "model": "stt_eu_conformer_ctc_large", 6 | "use_cuda":true 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /whisper/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-whisper", 4 | "ovos-stt-plugin-whisper": { 5 | "model": "openai/whisper-large-v3-turbo", 6 | "use_cuda": true 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /mynorthai/cpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-whisper", 4 | "ovos-stt-plugin-whisper": { 5 | "model": "my-north-ai/whisper-small-v3-pt", 6 | "use_cuda": true 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /mynorthai/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-whisper", 4 | "ovos-stt-plugin-whisper": { 5 | "model": "my-north-ai/whisper-large-v3-pt", 6 | "use_cuda": true 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /whisper/cpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-whisper", 4 | "ovos-stt-plugin-whisper": { 5 | "model": "openai/whisper-large-v3-turbo", 6 | "use_cuda": false 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /project-aina-whisper/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-whisper", 4 | "ovos-stt-plugin-whisper": { 5 | "model": "projecte-aina/whisper-large-v3-ca-3catparla", 6 | "use_cuda": true 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /fasterwhisper/cpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-fasterwhisper", 4 | "ovos-stt-plugin-fasterwhisper": { 5 | "model": "whisper-medium", 6 | "beam_size": 5, 7 | "cpu_threads": 4 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /fasterwhisper/gpu.conf: -------------------------------------------------------------------------------- 1 | { 2 | "stt": { 3 | "module": "ovos-stt-plugin-fasterwhisper", 4 | "ovos-stt-plugin-fasterwhisper": { 5 | "model": "whisper-large-v3-turbo", 6 | "use_cuda": true, 7 | "compute_type": "float16", 8 | "beam_size": 5, 9 | "cpu_threads": 4 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /vosk/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Vosk image" 8 | LABEL org.opencontainers.image.description="Vosk is a speech recognition toolkit" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-vosk --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-vosk; \ 25 | fi \ 26 | && mkdir -p /home/${USER}/.local/share/vosk \ 27 | && rm -rf ${HOME}/.cache 28 | 29 | ENV PATH /home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV /home/${USER}/.venv 31 | 32 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-vosk 33 | 34 | EXPOSE 8080 35 | -------------------------------------------------------------------------------- /chromium/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Chromium image" 8 | LABEL org.opencontainers.image.description="A STT plugin for OVOS using the Google Chrome browser API" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-chromium.git --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-chromium; \ 25 | fi \ 26 | && rm -rf ${HOME}/.cache 27 | 28 | USER $USER 29 | 30 | ENV PATH /home/${USER}/.venv/bin:$PATH 31 | ENV VIRTUAL_ENV /home/${USER}/.venv 32 | 33 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-chromium 34 | 35 | EXPOSE 8080 36 | -------------------------------------------------------------------------------- /deepgram/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Deepgram image" 8 | LABEL org.opencontainers.image.description="Unmatched accuracy. Blazing fast. Enterprise scale. Hands-down the best price. Everything developers need to build with confidence and ship faster" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-deepgram.git --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-deepgram; \ 25 | fi \ 26 | && rm -rf ${HOME}/.cache 27 | 28 | USER $USER 29 | 30 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-deepgram 31 | 32 | EXPOSE 8080 33 | -------------------------------------------------------------------------------- /nemo/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Nemo image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-nemo combo-lock==0.2.6 --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-nemo; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | ENV PATH=/home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV=/home/${USER}/.venv 31 | 32 | COPY gpu.conf /etc/mycroft/mycroft.conf 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-nemo 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /fasterwhisper/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT FastWhisper image" 8 | LABEL org.opencontainers.image.description="High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY cpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH /home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV /home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-fasterwhisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /base/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | FROM rocm/pytorch:latest 2 | 3 | ARG BUILD_DATE=unknown 4 | ARG VERSION=unknown 5 | 6 | LABEL org.opencontainers.image.title="Open Voice OS OCI Speech-to-Text base image with ROCm support" 7 | LABEL org.opencontainers.image.description="Used as base layer for other OCI STT images that require AMD ROCm support" 8 | LABEL org.opencontainers.image.version=${VERSION} 9 | LABEL org.opencontainers.image.created=${BUILD_DATE} 10 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 11 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 12 | LABEL org.opencontainers.image.vendor="Open Voice OS" 13 | 14 | ARG ALPHA=false 15 | ARG USER=ovos 16 | 17 | ENV DEBIAN_FRONTEND noninteractive 18 | 19 | SHELL ["/bin/bash", "-c"] 20 | 21 | RUN apt-get update \ 22 | && apt-get install -y vim git python3 python3-venv python3-pip curl ffmpeg 23 | RUN useradd --no-log-init $USER -m -c "Open Voice OS user" \ 24 | && python3 -m venv --system-site-packages /home/${USER}/.venv \ 25 | && . /home/${USER}/.venv/bin/activate \ 26 | && mkdir -p /home/${USER}/.config/mycroft /home/${USER}/.cache /home/${USER}/gradio_cached_examples \ 27 | && chown ${USER}:${USER} -R /home/${USER} 28 | 29 | USER $USER 30 | 31 | ENV PATH /home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV /home/${USER}/.venv 33 | 34 | WORKDIR /home/${USER} 35 | -------------------------------------------------------------------------------- /fasterwhisper/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT FastWhisper image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY gpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH /home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV /home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-fasterwhisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /whisper/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Whisper image" 8 | LABEL org.opencontainers.image.description="OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=true 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY cpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /fasterwhisper/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-cuda:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT FastWhisper image with Nvidia CUDA support" 8 | LABEL org.opencontainers.image.description="High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY gpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-fasterwhisper; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH /home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV /home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-fasterwhisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /mynorthai/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT MyNorthAI image" 8 | LABEL org.opencontainers.image.description="MyNorthAI is a STT specialized with Portuguese language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY cpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /whisper/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-cuda:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Whisper image with Nvidia CUDA support" 8 | LABEL org.opencontainers.image.description="OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY gpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /mynorthai/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-cuda:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT MyNorthAI image with Nvidia CUDA support" 8 | LABEL org.opencontainers.image.description="MyNorthAI is a STT specialized with Portuguese language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY gpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /hitz/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT HiTZ image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="HiTZ is a STT specialized with Basque language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp setuptools wheel Cython uv \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | uv pip install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-nemo combo-lock==0.2.6 --pre; \ 23 | else \ 24 | uv pip install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-nemo combo-lock==0.2.6; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | ENV PATH=/home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV=/home/${USER}/.venv 31 | 32 | COPY gpu.conf /etc/mycroft/mycroft.conf 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-nemo 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /whisper/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Whisper image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="OpenAI's Whisper automatic speech recognition (ASR) model" 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=true 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6 --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | ENV PATH=/home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV=/home/${USER}/.venv 31 | 32 | COPY gpu.conf /etc/mycroft/mycroft.conf 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /mynorthai/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT MyNorthAI image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="MyNorthAI is a STT specialized with Portuguese language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6 --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | COPY gpu.conf /etc/mycroft/mycroft.conf 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:trixie-slim 2 | 3 | ARG BUILD_DATE=unknown 4 | ARG VERSION=unknown 5 | 6 | LABEL org.opencontainers.image.title="Open Voice OS OCI Speech-to-Text base image" 7 | LABEL org.opencontainers.image.description="Used as base layer for other OCI STT images" 8 | LABEL org.opencontainers.image.version=${VERSION} 9 | LABEL org.opencontainers.image.created=${BUILD_DATE} 10 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 11 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 12 | LABEL org.opencontainers.image.vendor="Open Voice OS" 13 | 14 | ARG ALPHA=false 15 | ARG USER=ovos 16 | 17 | ENV DEBIAN_FRONTEND noninteractive 18 | 19 | SHELL ["/bin/bash", "-c"] 20 | 21 | RUN apt-get update \ 22 | && apt-get install -y vim git python3 python3-venv python3-pip curl ffmpeg \ 23 | && c_rehash \ 24 | && useradd --no-log-init $USER -m -c "Open Voice OS user" \ 25 | && python3 -m venv /home/${USER}/.venv \ 26 | && . /home/${USER}/.venv/bin/activate \ 27 | && mkdir -p /home/${USER}/.config/mycroft /home/${USER}/.cache /home/${USER}/gradio_cached_examples \ 28 | && chown ${USER}:${USER} -R /home/${USER} \ 29 | && pip3 install aiohttp \ 30 | && apt-get --purge autoremove -y \ 31 | && rm -rf ${HOME}/.cache/* /var/lib/apt /var/log/{apt,dpkg.log} 32 | 33 | USER $USER 34 | 35 | ENV PATH /home/${USER}/.venv/bin:$PATH 36 | ENV VIRTUAL_ENV /home/${USER}/.venv 37 | 38 | WORKDIR /home/${USER} 39 | -------------------------------------------------------------------------------- /project-aina-whisper/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-cuda:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT project AINA Whisper image with Nvidia CUDA support" 8 | LABEL org.opencontainers.image.description="project AINA Whisper is a STT specialized with Catalan language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=true 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | COPY gpu.conf /etc/mycroft/mycroft.conf 21 | 22 | RUN pip3 install aiohttp \ 23 | && if [ "${ALPHA}" == "true" ]; then \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch --pre; \ 25 | else \ 26 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git torch; \ 27 | fi \ 28 | && mkdir -p ${HOME}/flagged \ 29 | && rm -rf ${HOME}/.cache/* 30 | 31 | ENV PATH=/home/${USER}/.venv/bin:$PATH 32 | ENV VIRTUAL_ENV=/home/${USER}/.venv 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /project-aina-whisper/Dockerfile.rocm: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base-rocm:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT project AINA Whisper image with AMD ROCm support" 8 | LABEL org.opencontainers.image.description="project AINA Whisper is a STT specialized with Catalan language." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=true 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6 --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/TigreGotico/ovos-stt-plugin-whisper.git combo-lock==0.2.6; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | ENV PATH=/home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV=/home/${USER}/.venv 31 | 32 | COPY gpu.conf /etc/mycroft/mycroft.conf 33 | 34 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-whisper 35 | 36 | EXPOSE 8080 37 | -------------------------------------------------------------------------------- /citrinet/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG TAG=alpha 2 | FROM smartgic/ovos-stt-server-base:${TAG} 3 | 4 | ARG BUILD_DATE=unknown 5 | ARG VERSION=unknown 6 | 7 | LABEL org.opencontainers.image.title="Open Voice OS OCI STT Citrinet image" 8 | LABEL org.opencontainers.image.description="Citrinet is a version of QuartzNet [ASR-MODELS6] that extends ContextNet [ASR-MODELS2], utilizing subword encoding (via Word Piece tokenization) and Squeeze-and-Excitation mechanism [ASR-MODELS4] to obtain highly accurate audio transcripts while utilizing a non-autoregressive CTC based decoding scheme for efficient inference." 9 | LABEL org.opencontainers.image.version=${VERSION} 10 | LABEL org.opencontainers.image.created=${BUILD_DATE} 11 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 12 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 13 | LABEL org.opencontainers.image.vendor="Open Voice OS" 14 | 15 | ARG ALPHA=false 16 | ARG USER=ovos 17 | 18 | SHELL ["/bin/bash", "-c"] 19 | 20 | RUN pip3 install aiohttp \ 21 | && if [ "${ALPHA}" == "true" ]; then \ 22 | pip3 install ovos-stt-http-server SpeechRecognition git+https://github.com/OpenVoiceOS/ovos-stt-plugin-citrinet.git --pre; \ 23 | else \ 24 | pip3 install ovos-stt-http-server SpeechRecognition ovos-stt-plugin-citrinet; \ 25 | fi \ 26 | && mkdir -p ${HOME}/flagged \ 27 | && rm -rf ${HOME}/.cache/* 28 | 29 | ENV PATH /home/${USER}/.venv/bin:$PATH 30 | ENV VIRTUAL_ENV /home/${USER}/.venv 31 | 32 | ENTRYPOINT ovos-stt-server --engine ovos-stt-plugin-citrinet 33 | 34 | EXPOSE 8080 35 | -------------------------------------------------------------------------------- /base/Dockerfile.cuda: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 2 | 3 | ARG BUILD_DATE=unknown 4 | ARG VERSION=unknown 5 | 6 | LABEL org.opencontainers.image.title="Open Voice OS OCI Speech-to-Text base image with CUDA support" 7 | LABEL org.opencontainers.image.description="Used as base layer for other OCI STT images that require Nvidia CUDA 11.8.x support" 8 | LABEL org.opencontainers.image.version=${VERSION} 9 | LABEL org.opencontainers.image.created=${BUILD_DATE} 10 | LABEL org.opencontainers.image.documentation="https://openvoiceos.github.io/community-docs" 11 | LABEL org.opencontainers.image.source="https://github.com/OpenVoiceOS/ovos-docker-stt" 12 | LABEL org.opencontainers.image.vendor="Open Voice OS" 13 | 14 | ARG ALPHA=false 15 | ARG USER=ovos 16 | ARG LIBNVRCT="find /usr/local/cuda/lib64/ -type f -name 'libnvrtc.so*'" 17 | 18 | ENV DEBIAN_FRONTEND noninteractive 19 | 20 | SHELL ["/bin/bash", "-c"] 21 | 22 | RUN apt-get update \ 23 | && apt-get install -y vim git python3 python3-venv python3-pip curl ffmpeg \ 24 | && c_rehash \ 25 | && useradd --no-log-init $USER -m -c "Open Voice OS user" \ 26 | && python3 -m venv /home/${USER}/.venv \ 27 | && . /home/${USER}/.venv/bin/activate \ 28 | && mkdir -p /home/${USER}/.config/mycroft /home/${USER}/.cache /home/${USER}/gradio_cached_examples \ 29 | && chown ${USER}:${USER} -R /home/${USER} \ 30 | && if [ -f $(eval $LIBNVRCT) ]; then \ 31 | cd /usr/local/cuda/lib64; \ 32 | ln -s -f $(eval $LIBNVRCT | awk -F"/" '{ print $NF }') libnvrtc.so; \ 33 | fi \ 34 | && apt-get --purge autoremove -y \ 35 | && rm -rf ${HOME}/.cache/* /var/lib/apt /var/log/{apt,dpkg.log} 36 | 37 | USER $USER 38 | 39 | ENV PATH /home/${USER}/.venv/bin:$PATH 40 | ENV VIRTUAL_ENV /home/${USER}/.venv 41 | 42 | WORKDIR /home/${USER} 43 | -------------------------------------------------------------------------------- /docker-compose.cuda.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: "3.9" 3 | 4 | x-podman: &podman 5 | userns_mode: keep-id 6 | security_opt: 7 | - "label=disable" 8 | 9 | x-logging: &default-logging 10 | driver: json-file 11 | options: 12 | mode: non-blocking 13 | max-buffer-size: 4m 14 | 15 | volumes: 16 | ovos_stt_fasterwhisper_cache: 17 | name: ovos_stt_fasterwhisper_cache 18 | driver: local 19 | ovos_stt_fasterwshiper_gradio_cache: 20 | name: ovos_stt_fasterwshiper_gradio_cache 21 | driver: local 22 | ovos_stt_mynorthai_cache: 23 | name: ovos_stt_mynorthai_cache 24 | driver: local 25 | ovos_stt_mynorthai_gradio_cache: 26 | name: ovos_stt_mynorthai_gradio_cache 27 | driver: local 28 | 29 | services: 30 | ovos_stt_mynorthai: 31 | <<: *podman 32 | build: 33 | context: ./mynorthai 34 | dockerfile: Dockerfile.cuda 35 | container_name: ovos_stt_mynorthai 36 | hostname: ovos_stt_mynorthai 37 | restart: unless-stopped 38 | image: docker.io/smartgic/ovos-stt-server-mynorthai-cuda:${VERSION} 39 | logging: *default-logging 40 | pull_policy: always 41 | tty: true 42 | deploy: 43 | resources: 44 | reservations: 45 | devices: 46 | - driver: nvidia 47 | count: 1 48 | capabilities: 49 | - gpu 50 | environment: 51 | TZ: $TZ 52 | ports: 53 | - "8084:8080" 54 | volumes: 55 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 56 | - ovos_stt_mynorthai_cache:/home/${OVOS_USER}/.local/share 57 | - ovos_stt_mynorthai_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 58 | 59 | ovos_stt_fasterwhisper: 60 | <<: *podman 61 | build: 62 | context: ./fasterwhisper 63 | dockerfile: Dockerfile.cuda 64 | container_name: ovos_stt_fasterwhisper 65 | hostname: ovos_stt_fasterwhisper 66 | restart: unless-stopped 67 | image: docker.io/smartgic/ovos-stt-server-fasterwhisper-cuda:${VERSION} 68 | logging: *default-logging 69 | pull_policy: always 70 | tty: true 71 | deploy: 72 | resources: 73 | reservations: 74 | devices: 75 | - driver: nvidia 76 | count: 1 77 | capabilities: 78 | - gpu 79 | environment: 80 | TZ: $TZ 81 | CT2_CUDA_ALLOW_FP16: 1 82 | ports: 83 | - "8080:8080" 84 | volumes: 85 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 86 | - ovos_stt_fasterwhisper_cache:/home/${OVOS_USER}/.cache 87 | - ovos_stt_fasterwshiper_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 88 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: "3.9" 3 | 4 | x-podman: &podman 5 | userns_mode: keep-id 6 | security_opt: 7 | - "label=disable" 8 | 9 | x-logging: &default-logging 10 | driver: json-file 11 | options: 12 | mode: non-blocking 13 | max-buffer-size: 4m 14 | 15 | volumes: 16 | ovos_stt_fasterwhisper_cache: 17 | name: ovos_stt_fasterwhisper_cache 18 | driver: local 19 | ovos_stt_fasterwshiper_gradio_cache: 20 | name: ovos_stt_fasterwshiper_gradio_cache 21 | driver: local 22 | ovos_stt_vosk_cache: 23 | name: ovos_stt_vosk_cache 24 | driver: local 25 | ovos_stt_vosk_gradio_cache: 26 | name: ovos_stt_vosk_gradio_cache 27 | driver: local 28 | ovos_stt_chromium_gradio_cache: 29 | name: ovos_stt_chromium_gradio_cache 30 | driver: local 31 | ovos_stt_deepgram_gradio_cache: 32 | name: ovos_stt_deepgram_gradio_cache 33 | driver: local 34 | ovos_stt_citrinet_cache: 35 | name: ovos_stt_citrinet_cache 36 | driver: local 37 | ovos_stt_citrinet_gradio_cache: 38 | name: ovos_stt_citrinet_gradio_cache 39 | driver: local 40 | 41 | services: 42 | ovos_stt_fasterwhisper: 43 | <<: *podman 44 | build: ./fasterwhisper 45 | container_name: ovos_stt_fasterwhisper 46 | hostname: ovos_stt_fasterwhisper 47 | restart: unless-stopped 48 | image: docker.io/smartgic/ovos-stt-server-fasterwhisper:${VERSION} 49 | logging: *default-logging 50 | pull_policy: always 51 | tty: true 52 | environment: 53 | TZ: $TZ 54 | ports: 55 | - "8080:8080" 56 | volumes: 57 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 58 | - ovos_stt_fasterwhisper_cache:/home/${OVOS_USER}/.cache 59 | - ovos_stt_fasterwshiper_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 60 | 61 | ovos_stt_vosk: 62 | <<: *podman 63 | build: ./vosk 64 | container_name: ovos_stt_vosk 65 | hostname: ovos_stt_vosk 66 | restart: unless-stopped 67 | image: docker.io/smartgic/ovos-stt-server-vosk:${VERSION} 68 | logging: *default-logging 69 | pull_policy: always 70 | tty: true 71 | environment: 72 | TZ: $TZ 73 | ports: 74 | - "8081:8080" 75 | volumes: 76 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 77 | - ovos_stt_vosk_cache:/home/${OVOS_USER}/.local/share/vosk 78 | - ovos_stt_vosk_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 79 | 80 | ovos_stt_chromium: 81 | <<: *podman 82 | build: ./chromium 83 | container_name: ovos_stt_chromium 84 | hostname: ovos_stt_chromium 85 | restart: unless-stopped 86 | image: docker.io/smartgic/ovos-stt-server-chromium:${VERSION} 87 | logging: *default-logging 88 | pull_policy: always 89 | tty: true 90 | environment: 91 | TZ: $TZ 92 | ports: 93 | - "8082:8080" 94 | volumes: 95 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 96 | - ovos_stt_chromium_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 97 | 98 | ovos_stt_deepgram: 99 | <<: *podman 100 | container_name: ovos_stt_deepgram 101 | hostname: ovos_stt_deepgram 102 | restart: unless-stopped 103 | image: docker.io/smartgic/ovos-stt-server-deepgram:${VERSION} 104 | logging: *default-logging 105 | pull_policy: always 106 | tty: true 107 | environment: 108 | TZ: $TZ 109 | ports: 110 | - "8083:8080" 111 | volumes: 112 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 113 | - ovos_stt_deepgram_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 114 | 115 | ovos_stt_citrinet: 116 | <<: *podman 117 | build: ./citrinet 118 | container_name: ovos_stt_citrinet 119 | hostname: ovos_stt_citrinet 120 | restart: unless-stopped 121 | image: docker.io/smartgic/ovos-stt-server-citrinet:${VERSION} 122 | logging: *default-logging 123 | pull_policy: always 124 | tty: true 125 | environment: 126 | TZ: $TZ 127 | ports: 128 | - "8084:8080" 129 | volumes: 130 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 131 | - ovos_stt_citrinet_cache:/home/${OVOS_USER}/.cache 132 | - ovos_stt_citrinet_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 133 | -------------------------------------------------------------------------------- /docker-compose.rocm.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: "3.9" 3 | 4 | x-podman: &podman 5 | userns_mode: keep-id 6 | security_opt: 7 | - "label=disable" 8 | 9 | x-logging: &default-logging 10 | driver: json-file 11 | options: 12 | mode: non-blocking 13 | max-buffer-size: 4m 14 | 15 | volumes: 16 | ovos_stt_whisper_cache: 17 | name: ovos_stt_whisper_cache 18 | driver: local 19 | ovos_stt_whisper_gradio_cache: 20 | name: ovos_stt_whisper_gradio_cache 21 | driver: local 22 | ovos_stt_hitz_cache: 23 | name: ovos_stt_hitz_cache 24 | driver: local 25 | ovos_stt_hitz_gradio_cache: 26 | name: ovos_stt_hitz_gradio_cache 27 | driver: local 28 | ovos_stt_mynorthai_cache: 29 | name: ovos_stt_mynorthai_cache 30 | driver: local 31 | ovos_stt_mynorthai_gradio_cache: 32 | name: ovos_stt_mynorthai_gradio_cache 33 | driver: local 34 | ovos_stt_aina_whisper_cache: 35 | name: ovos_stt_aina_whisper_cache 36 | driver: local 37 | ovos_stt_aina_whisper_gradio_cache: 38 | name: ovos_stt_aina_whisper_gradio_cache 39 | driver: local 40 | 41 | services: 42 | 43 | ovos_stt_mynorthai_rocm: 44 | <<: *podman 45 | build: 46 | context: ./mynorthai 47 | dockerfile: Dockerfile.rocm 48 | container_name: ovos_stt_mynorthai_rocm 49 | hostname: ovos_stt_mynorthai 50 | restart: unless-stopped 51 | image: smartgic/ovos-stt-server-mynorthai-rocm:${VERSION} 52 | logging: *default-logging 53 | #pull_policy: always 54 | stdin_open: true 55 | tty: true 56 | cap_add: 57 | - SYS_PTRACE 58 | security_opt: 59 | - seccomp=unconfined 60 | group_add: 61 | - video 62 | ipc: host 63 | devices: 64 | - /dev/kfd 65 | - /dev/dri 66 | environment: 67 | TZ: $TZ 68 | HSA_OVERRIDE_GFX_VERSION: 11.0.0 # YMMV - I needed this for AMD Radeon RX 7600 XT 69 | ports: 70 | - "9081:8080" 71 | volumes: 72 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 73 | - ovos_stt_aina_whisper_cache:/home/${OVOS_USER}/.cache 74 | - ovos_stt_aina_whisper_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 75 | 76 | ovos_stt_aina_whisper_rocm: 77 | <<: *podman 78 | build: 79 | context: ./project-aina-whisper 80 | dockerfile: Dockerfile.rocm 81 | container_name: ovos_stt_aina_whisper_rocm 82 | hostname: ovos_stt_aina_whisper 83 | restart: unless-stopped 84 | image: smartgic/ovos-stt-server-aina-whisper-rocm:${VERSION} 85 | logging: *default-logging 86 | #pull_policy: always 87 | stdin_open: true 88 | tty: true 89 | cap_add: 90 | - SYS_PTRACE 91 | security_opt: 92 | - seccomp=unconfined 93 | group_add: 94 | - video 95 | ipc: host 96 | devices: 97 | - /dev/kfd 98 | - /dev/dri 99 | environment: 100 | TZ: $TZ 101 | HSA_OVERRIDE_GFX_VERSION: 11.0.0 # YMMV - I needed this for AMD Radeon RX 7600 XT 102 | ports: 103 | - "9082:8080" 104 | volumes: 105 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 106 | - ovos_stt_aina_whisper_cache:/home/${OVOS_USER}/.cache 107 | - ovos_stt_aina_whisper_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 108 | 109 | ovos_stt_hitz_rocm: 110 | <<: *podman 111 | build: 112 | context: ./hitz 113 | dockerfile: Dockerfile.rocm 114 | container_name: ovos_stt_hitz_rocm 115 | hostname: ovos_stt_hitz 116 | restart: unless-stopped 117 | image: smartgic/ovos-stt-server-hitz-rocm:${VERSION} 118 | logging: *default-logging 119 | #pull_policy: always 120 | stdin_open: true 121 | tty: true 122 | cap_add: 123 | - SYS_PTRACE 124 | security_opt: 125 | - seccomp=unconfined 126 | group_add: 127 | - video 128 | ipc: host 129 | devices: 130 | - /dev/kfd 131 | - /dev/dri 132 | environment: 133 | TZ: $TZ 134 | HSA_OVERRIDE_GFX_VERSION: 11.0.0 # YMMV - I needed this for AMD Radeon RX 7600 XT 135 | ports: 136 | - "9083:8080" 137 | volumes: 138 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 139 | - ovos_stt_hitz_cache:/home/${OVOS_USER}/.cache 140 | - ovos_stt_hitz_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 141 | 142 | ovos_stt_whisper_rocm: 143 | <<: *podman 144 | build: 145 | context: ./whisper 146 | dockerfile: Dockerfile.rocm 147 | container_name: ovos_stt_whisper_rocm 148 | hostname: ovos_stt_whisper 149 | restart: unless-stopped 150 | image: smartgic/ovos-stt-server-whisper-rocm:${VERSION} 151 | logging: *default-logging 152 | #pull_policy: always 153 | stdin_open: true 154 | tty: true 155 | cap_add: 156 | - SYS_PTRACE 157 | security_opt: 158 | - seccomp=unconfined 159 | group_add: 160 | - video 161 | ipc: host 162 | devices: 163 | - /dev/kfd 164 | - /dev/dri 165 | environment: 166 | TZ: $TZ 167 | HSA_OVERRIDE_GFX_VERSION: 11.0.0 # YMMV - I needed this for AMD Radeon RX 7600 XT 168 | ports: 169 | - "9080:8080" 170 | volumes: 171 | - ${CONFIG_FOLDER}:/home/${OVOS_USER}/.config/mycroft:ro,z 172 | - ovos_stt_whisper_cache:/home/${OVOS_USER}/.cache 173 | - ovos_stt_whisper_gradio_cache:/home/${OVOS_USER}/gradio_cached_examples 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open Voice OS Speech-to-Text (STT) on Docker or Podman 2 | 3 | ## What's a Speech-to-Text (STT)? 4 | 5 | *According to :* 6 | 7 | > Speech to text is a speech recognition software that enables the recognition and translation of spoken language into text through computational linguistics. It is also known as speech recognition or computer speech recognition. Specific applications, tools, and devices can transcribe audio streams in real-time to display text and act on it. 8 | 9 | Open Voice OS provides support for different STT engines via a plugin mechanism exposing HTTP endpoints to be consumed by the voice assistant. 10 | 11 | ## Containerized STT plugins 12 | 13 | To facilitate the installation and the adoption of local Speech-to-Text engine, we build a set of OCI images compatible with Docker, Podman and Kubernetes as well. 14 | 15 | | Image | Port | Description | 16 | |--------------------------------------| --- | --- | 17 | | `ovos-stt-plugin-chromium` | 8082 | A STT plugin for OVOS using the Google Chrome browser API | 18 | | `ovos-stt-plugin-deepgram` | 8083 | Unmatched accuracy. Blazing fast. Enterprise scale. Hands-down the best price. Everything developers need to build with confidence and ship faster | 19 | | `ovos-stt-plugin-fasterwhisper` | 8080 | High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model | 20 | | `ovos-stt-plugin-fasterwhisper-cuda` | 8080 | High-performance inference of OpenAI's Whisper automatic speech recognition (ASR) model supporting Nvidia CUDA | 21 | | `ovos-stt-plugin-citrinet` | 8084 | Conversational AI toolkit built for researchers working on automatic speech recognition (ASR), natural language processing (NLP), and text-to-speech synthesis (TTS) | 22 | | `ovos-stt-plugin-vosk` | 8081 | Vosk is a speech recognition toolkit supporting more than 20 languages and dialects, works offline and able to run on lightweight devices | 23 | 24 | Using this approach allows you as well to decentralize the STT server which means that it doesn't have to run locally on the voice assistant but on a remote server with more compute power using CPU and/or GPU. 25 | 26 | ### Image alternatives 27 | 28 | There are two *(2)* different implementations for the Faster Whisper STT plugin. 29 | 30 | - `ovos-stt-plugin-fasterwhisper` image using only the CPU to transcribe *(default)* 31 | - `ovos-stt-plugin-fasterwhisper-cuda` image using only the GPU to transcribe 32 | 33 | To use `ovos-stt-plugin-fasterwhisper-cuda`, please review the `docker-compose.yml` file. 34 | 35 | **Only one implementation can be selected at a time.** 36 | 37 | ## Requirements 38 | 39 | ### Docker or Podman 40 | 41 | Docker or Podman *(rootless)* is of course required and `docker compose` *(not `docker-compose`!!)* or `podman-compose` is a nice to have to simplify the whole process of deploying the whole stack by using the `docker-compose.yml` files *(for Docker, this command will be embedded depending the version, for Podman, `podman-compose` command comes from a different package)*. 42 | 43 | **If you plan to passthrough GPUs in order to leverage Nvidia CUDA with Docker or Podman, please make you configured your container engine properly to support GPUs.** 44 | 45 | ## How to build these images 46 | 47 | The `base` image is the main layer for the other images, for example the `fasterwhisper` image requires the `base` image to be build. 48 | 49 | ```bash 50 | git clone https://github.com/OpenVoiceOS/ovos-docker-stt.git 51 | cd ovos-docker-stt 52 | docker buildx build fasterwhisper/ -t smartgic/ovos-stt-server-fasterwhisper:alpha --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') --no-cache 53 | # Or: 54 | podman buildx build fasterwhisper/ -t smartgic/ovos-stt-server-fasterwhisper:alpha --build-arg BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') --no-cache 55 | ``` 56 | 57 | ### Arguments 58 | 59 | There are a list of available arguments that could be used during the image build process. 60 | 61 | | Name | Value | Default | Description | 62 | | --- | --- |-----------| --- | 63 | | `ALPHA` | `true` | `false` | Using the alpha releases from PyPi built from the `dev` branches | 64 | | `BUILD_DATE` | `$(date -u +'%Y-%m-%dT%H:%M:%SZ')` | `unknown` | Used as `LABEL` within the Dockerfile to determine the build date | 65 | | `TAG` | `dev` | `dev` | OCI image tag, (e.g. `docker pull smartgic/ovos-stt-server-base:dev`) | 66 | | `VERSION` | `0.0.8a` | `unknown` | Used as `LABEL` within the Dockerfile to determine the version | 67 | 68 | Pre-build images are already available [here](https://hub.docker.com/u/smartgic) and are the default referenced within the `docker-compose.yml` file. 69 | 70 | ## How to use these images 71 | 72 | `docker-compose.yml` file provides an easy way to provision the container stack *(volumes and services)* with the required configuration for each of them. `docker compose` or `podman-compose` both support environment files, check the `.env` file. 73 | 74 | ```bash 75 | git clone https://github.com/OpenVoiceOS/ovos-docker-stt.git 76 | mkdir -p ~/ovos-tts-stt/config 77 | chown ${USER}:${USER} -R ~/ovos-tts-stt 78 | cd ovos-docker-stt 79 | docker compose up -d 80 | # Or: 81 | podman-compose up -d 82 | ``` 83 | 84 | To reduce the potential overhead due to the image downloads and extracts, the `--parallel` option could be user in order to process the images by batch of `x` *(where `x` is an integer)*. 85 | 86 | ```bash 87 | docker compose --parallel 3 up -d 88 | # Or: 89 | podman-compose --parallel 3 up -d 90 | ``` 91 | 92 | If you only plan to use the Faster Whisper STT server then you could reference it to the command line. 93 | 94 | ```bash 95 | docker compose up -d ovos_stt_fasterwhisper 96 | # Or: 97 | podman-compose up -d ovos_stt_fasterwhisper 98 | ``` 99 | 100 | Some variables might need to be tuned to match your setup such as the timezone, the directories, *etc...*, have a look into the `.env` files befor running `docker compose` or `podman-compose`. 101 | 102 | The `OVOS_USER` variable should be changed **only** if you build the Docker images with a different user than `ovos`. 103 | 104 | ## How to update the current stack 105 | 106 | The easiest way to update a stack already deployed by `docker compose` or `podman-compose` is to use `docker compose` or `podman-compose`. :relaxed: 107 | 108 | Because the `pull_policy` option of each service is set to `always`, everytime that a new image is uploaded with the same tag then `docker compose` or `podman-compose` will pull-it and re-create the container based on this new image. 109 | 110 | ```bash 111 | docker compose up -d 112 | # Or: 113 | podman-compose up -d 114 | ``` 115 | 116 | If you want to change the tag to deploy, update the `.env` file with the new value. 117 | 118 | ## Configuration the STT plugins 119 | 120 | `~/ovos/config/mycroft.conf` configuration file is used to configura the STT plugin. Make sure to adapt the sample below to fit your requirements. 121 | 122 | ```json 123 | { 124 | "logs": { 125 | "path": "stdout" 126 | }, 127 | "stt": { 128 | "module": "ovos-stt-plugin-fasterwhisper", 129 | "ovos-stt-plugin-fasterwhisper": { 130 | "model": "whisper-large-v3-turbo", 131 | "compute_type": "float16", 132 | "use_cuda": true, 133 | "cpu_thread": 8 134 | }, 135 | "ovos-stt-plugin-vosk-streaming": { 136 | "model": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip", 137 | "verbose": false 138 | }, 139 | "ovos-stt-plugin-vosk": { 140 | "model": "http://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip", 141 | "verbose": false 142 | }, 143 | "ovos-stt-plugin-deepgram": { 144 | "key": "GET A KEY FROM DEEPGRAM WEBSITE :)" 145 | }, 146 | "ovos-stt-plugin-chromium": { 147 | "lang": "en-US", 148 | "pfilter": false, 149 | "debug": false 150 | } 151 | } 152 | } 153 | ``` 154 | 155 | If you don't plan to use Nvidia CUDA with the STT Faster Whisper plugin, then `use_cuda` should be set to `false` and `compute_type` set to `int8`. 156 | 157 | ## Configure the voice assistant 158 | 159 | Once the STT servers are up and running, the voice assistant must be configured to reference them. Please make sure to add the section below to your `~/ovos/config/mycroft.conf` configuration file. 160 | 161 | ```json 162 | { 163 | "stt": { 164 | "module": "ovos-stt-plugin-server", 165 | "fallback_module": "ovos-stt-plugin-vosk", 166 | "ovos-stt-plugin-server": { 167 | "urls": [ 168 | "http://192.168.1.227:8080/stt", 169 | "http://192.168.1.227:8081/stt", 170 | "http://192.168.1.227:8082/stt", 171 | "http://192.168.1.227:8083/stt", 172 | "http://192.168.1.227:8084/stt", 173 | "https://stt.openvoiceos.org/stt" 174 | ] 175 | } 176 | } 177 | } 178 | ``` 179 | 180 | The configuration means that `ovos-stt-plugin-server` will be used as default STT plugin. The plugin has a list of five *(5)* STT servers, if one is down then the plugin goes to the next one, etc... 181 | 182 | If all the STT servers from `ovos-stt-plugin-server` are down then the voice assistant will fallback to the `ovos-stt-plugin-vosk` STT server running locally to the voice assistant. 183 | 184 | ## Debug 185 | 186 | ### Is the STT alive? 187 | 188 | In order to check if a STT server is up and running, the `/status` endpoint should be called *(`jq` command is not mandatory just nice to have)*. 189 | 190 | ```bash 191 | curl -v http://192.168.1.227:8080/status | jq 192 | ``` 193 | 194 | ### Logging 195 | 196 | Enable debug mode in `~/ovos/config/mycroft.conf` to get more verbosity from the logs. All containers will have to be restarted to receive the configuration change. 197 | 198 | ```json 199 | { 200 | "debug": true, 201 | "log_level": "DEBUG", 202 | "logs": { 203 | "path": "stdout" 204 | } 205 | } 206 | ``` 207 | 208 | ### Container debugging 209 | 210 | To access all the container logs at the same time, run the following command *(make sure it matches the `docker compose` or `podman-compose` command you run to deploy the stack)*: 211 | 212 | ```bash 213 | docker compose logs -f --tail 200 214 | # Or: 215 | podman-compose logs -n -f --tail 200 216 | ``` 217 | 218 | To access the logs of a specific container, run the following command: 219 | 220 | ```bash 221 | docker logs -f --tail 200 ovos_stt_fasterwhisper 222 | # Or: 223 | podman logs -f --tail 200 ovos_stt_fasterwhisper 224 | ``` 225 | 226 | To go inside a container and run multiple commands, run the following command *(where `bash` is the available shell in there)*: 227 | 228 | ```bash 229 | docker exec -ti ovos_stt_fasterwhisper bash 230 | # Or: 231 | podman exec -ti ovos_stt_fasterwhisper bash 232 | ``` 233 | 234 | If the configuration file is not valid JSON, `jq` will return something like this: 235 | 236 | ```text 237 | parse error: Expected another key-value pair at line 81, column 3 238 | ``` 239 | 240 | To get the CPU, memory and I/O consumption per container, run the following command: 241 | 242 | ```bash 243 | docker stats -a --no-trunc 244 | # Or: 245 | podman stats -a --no-trunc 246 | ``` 247 | 248 | ### Validate configuration 249 | 250 | Make sure `mycroft.conf` configuration file is JSON valid by using the `jq` command. 251 | 252 | ```bash 253 | cat ~/ovos/config/mycroft.conf | jq 254 | ``` 255 | 256 | ## Support 257 | 258 | - [Matrix channel](https://matrix.to/#/#openvoiceos:matrix.org) 259 | - [Open Voice OS documentation](https://openvoiceos.github.io/community-docs/) 260 | - [Contribute to Open Voice OS](https://openvoiceos.github.io/community-docs/contributing/) 261 | - [Report bugs related to these Docker images](https://github.com/OpenVoiceOS/ovos-docker-stt/issues) 262 | --------------------------------------------------------------------------------