├── deepspeech_server ├── __init__.py ├── cli.py ├── config.py ├── decoding.py ├── coqui.py └── server.py ├── tox.ini ├── requirements.txt ├── config.sample.yaml ├── .github └── workflows │ ├── pythonpublish.yml │ └── pythonpackage.yml ├── setup.py ├── .gitignore ├── README.rst └── LICENSE /deepspeech_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /deepspeech_server/cli.py: -------------------------------------------------------------------------------- 1 | from deepspeech_server import server 2 | 3 | 4 | def main(): 5 | server.main() 6 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3 3 | 4 | [testenv] 5 | deps=nose 6 | commands= 7 | nosetests \ 8 | [] 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | rx>=3.0 2 | scipy>=1.0 3 | cyclotron>=1.2 4 | cyclotron-aiohttp>=1.0 5 | cyclotron-std>=1.0 6 | av==9.2.0 7 | numpy>=1.19 8 | stt>=1.3 9 | pydantic>=1.9 10 | PyYAML>=6.0 11 | -------------------------------------------------------------------------------- /config.sample.yaml: -------------------------------------------------------------------------------- 1 | coqui: 2 | model: coqui-1.0.tflite 3 | scorer: huge-vocabulary.scorer 4 | beam_width: 500 5 | server: 6 | http: 7 | host: "0.0.0.0" 8 | port: 8080 9 | request_max_size: 1048576 10 | log: 11 | level: 12 | - logger: deepspeech_server 13 | level: DEBUG 14 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python package 2 | 3 | on: [push] 4 | 5 | jobs: 6 | publish: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - uses: actions/checkout@v2 11 | - name: Set up Python 12 | uses: actions/setup-python@v2 13 | with: 14 | python-version: 3.9 15 | - name: Install dependencies 16 | run: | 17 | sudo apt-get install ffmpeg libavformat-dev libavdevice-dev 18 | python -m pip install --upgrade pip 19 | pip install twine 20 | pip install -r requirements.txt 21 | - name: Publish on Pypi 22 | if: startsWith(github.ref, 'refs/tags/') 23 | env: 24 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 25 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 26 | run: | 27 | python setup.py sdist 28 | twine upload dist/*.tar.gz 29 | -------------------------------------------------------------------------------- /.github/workflows/pythonpackage.yml: -------------------------------------------------------------------------------- 1 | name: Python package 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ['3.6', '3.7', '3.8', '3.9', '3.10'] 11 | 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python ${{ matrix.python-version }} 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: ${{ matrix.python-version }} 18 | - name: Install dependencies 19 | run: | 20 | sudo apt-get install ffmpeg libavformat-dev libavdevice-dev 21 | python -m pip install --upgrade pip 22 | pip install -r requirements.txt 23 | python setup.py install 24 | - name: Lint with flake8 25 | run: | 26 | pip install flake8 27 | # stop the build if there are Python syntax errors or undefined names 28 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 29 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 30 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 31 | -------------------------------------------------------------------------------- /deepspeech_server/config.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import yaml 3 | from typing import List, Optional 4 | from xml.etree.ElementPath import ops 5 | from pydantic import BaseModel 6 | 7 | import rx.operators as ops 8 | 9 | 10 | class Coqui(BaseModel): 11 | model: str 12 | scorer: Optional[str] 13 | beam_width: Optional[int] 14 | lm_alpha: Optional[float] 15 | lm_beta: Optional[float] 16 | 17 | 18 | class HttpServer(BaseModel): 19 | host: str 20 | port: int 21 | request_max_size: int 22 | 23 | 24 | class Server(BaseModel): 25 | http: HttpServer 26 | 27 | 28 | class LogLevel(BaseModel): 29 | logger: str 30 | level: str 31 | 32 | class Log(BaseModel): 33 | level: List[LogLevel] 34 | 35 | class Config(BaseModel): 36 | coqui: Coqui 37 | server: Server 38 | log: Log 39 | 40 | 41 | def parse_config(config_data): 42 | ''' takes a stream with the content of the configuration file as input 43 | and returns a (hot) stream of arguments . 44 | ''' 45 | config = config_data.pipe( 46 | ops.filter(lambda i: i.id == "config"), 47 | ops.flat_map(lambda i: i.data), 48 | ops.map(lambda i: yaml.load( 49 | i, 50 | Loader=yaml.FullLoader 51 | )), 52 | ops.map(lambda i: Config(**i)), 53 | ops.share(), 54 | ) 55 | 56 | return config 57 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | try: 3 | from setuptools import setup, find_packages 4 | use_setuptools = True 5 | except ImportError: 6 | from distutils.core import setup 7 | use_setuptools = False 8 | 9 | try: 10 | with open('README.rst', 'rt') as readme: 11 | description = '\n' + readme.read() 12 | except IOError: 13 | # maybe running setup.py from some other dir 14 | description = '' 15 | 16 | python_requires='>=3.6' 17 | install_requires = [ 18 | 'rx>=3.0', 19 | 'scipy>=1.0', 20 | 'cyclotron>=1.2', 21 | 'cyclotron-aiohttp>=1.0', 22 | 'cyclotron-std>=1.0', 23 | 'stt>=1.3', 24 | 'pydantic>=1.9', 25 | 'PyYAML>=6.0', 26 | ] 27 | 28 | setup( 29 | name="deepspeech-server", 30 | version='3.0.1', 31 | url='https://github.com/MainRo/deepspeech-server.git', 32 | license='MPL-2.0', 33 | description="server for mozilla deepspeech", 34 | long_description=description, 35 | long_description_content_type='text/x-rst', 36 | author='Romain Picard', 37 | author_email='romain.picard@oakbits.com', 38 | packages=find_packages(), 39 | install_requires=install_requires, 40 | include_package_data=True, 41 | platforms='any', 42 | classifiers=[ 43 | 'Development Status :: 3 - Alpha', 44 | 'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)', 45 | 'Intended Audience :: Developers', 46 | 'Programming Language :: Python :: 3', 47 | 'Topic :: Scientific/Engineering :: Artificial Intelligence' 48 | ], 49 | entry_points={ 50 | 'console_scripts': ['deepspeech-server=deepspeech_server.cli:main'], 51 | } 52 | ) 53 | -------------------------------------------------------------------------------- /deepspeech_server/decoding.py: -------------------------------------------------------------------------------- 1 | """ 2 | Audio decoding module. 3 | """ 4 | 5 | import logging 6 | 7 | import scipy.io.wavfile as wav 8 | import numpy as np 9 | 10 | 11 | def decode_audio_pyav(file): 12 | """ 13 | Resample the input audio to the format that DeepSpeech expects. 14 | 15 | This one uses PyAV. 16 | 17 | :returns: A 1-dimensional NumPy array. 18 | """ 19 | 20 | audio = av.open(file) 21 | if len(audio.streams.audio) > 1: 22 | logging.warning("Audio has more than one stream. Only one of them will be used.") 23 | 24 | resampler = av.audio.resampler.AudioResampler( 25 | format="s16", layout="mono", rate=16000 26 | ) 27 | resampled_frames = [] 28 | for frame in audio.decode(audio=0): 29 | # As of PyAV 9.0, one input frame may be resampled to multiple outputs. 30 | # Convert each into a numpy ndarray... 31 | iterable_frames = map(lambda f:f.to_ndarray(), resampler.resample(frame)) 32 | # ...then flatten each of those arrays down to 1D 33 | flat_frames = list(map(lambda f:f.flatten(), iterable_frames)) 34 | # Add all of the resampled frames to our output list 35 | resampled_frames.extend(flat_frames) 36 | 37 | return np.concatenate(resampled_frames) 38 | 39 | 40 | def decode_audio_scipy(file): 41 | """ 42 | Resample the input audio to the format that DeepSpeech expects. 43 | 44 | This one uses SciPy. 45 | 46 | :returns: A 1-dimensional NumPy array. 47 | """ 48 | _, audio = wav.read(file) 49 | # convert to mono. 50 | # todo: move to a component or just a function here 51 | if len(audio.shape) > 1: 52 | audio = audio[:, 0] 53 | return audio 54 | 55 | 56 | try: 57 | import av 58 | except ImportError as e: 59 | logging.warning("PyAV was not found. Falling back to SciPy...") 60 | decode_audio = decode_audio_scipy 61 | else: 62 | logging.debug("Found PyAV!") 63 | decode_audio = decode_audio_pyav 64 | 65 | __all__ = ("decode_audio",) 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom config file 2 | config.yaml 3 | asset 4 | 5 | # IDE's 6 | .vscode 7 | 8 | # >>>> Python >>>> 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | share/python-wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | MANIFEST 36 | 37 | # PyInstaller 38 | # Usually these files are written by a python script from a template 39 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 40 | *.manifest 41 | *.spec 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | cover/ 61 | 62 | # Translations 63 | *.mo 64 | *.pot 65 | 66 | # Django stuff: 67 | *.log 68 | local_settings.py 69 | db.sqlite3 70 | db.sqlite3-journal 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | .pybuilder/ 84 | target/ 85 | 86 | # Jupyter Notebook 87 | .ipynb_checkpoints 88 | 89 | # IPython 90 | profile_default/ 91 | ipython_config.py 92 | 93 | # pyenv 94 | # For a library or package, you might want to ignore these files since the code is 95 | # intended to run in multiple environments; otherwise, check them in: 96 | # .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre/ 141 | 142 | # pytype static type analyzer 143 | .pytype/ 144 | 145 | # Cython debug symbols 146 | cython_debug/ 147 | # <<<< Python <<<< -------------------------------------------------------------------------------- /deepspeech_server/coqui.py: -------------------------------------------------------------------------------- 1 | import io 2 | import logging 3 | from collections import namedtuple 4 | 5 | import rx 6 | from cyclotron import Component 7 | from cyclotron_std.logging import Log 8 | from stt import Model 9 | 10 | import deepspeech_server.decoding as decoding 11 | 12 | 13 | Sink = namedtuple('Sink', ['speech']) 14 | Source = namedtuple('Source', ['text', 'log']) 15 | 16 | # Sink events 17 | Scorer = namedtuple('Scorer', ['scorer', 'lm_alpha', 'lm_beta']) 18 | Scorer.__new__.__defaults__ = (None, None, None) 19 | 20 | Initialize = namedtuple('Initialize', ['model', 'scorer', 'beam_width']) 21 | Initialize.__new__.__defaults__ = (None,) 22 | 23 | SpeechToText = namedtuple('SpeechToText', ['data', 'context']) 24 | 25 | # Sourc eevents 26 | TextResult = namedtuple('TextResult', ['text', 'context']) 27 | TextError = namedtuple('TextError', ['error', 'context']) 28 | 29 | 30 | def make_driver(loop=None): 31 | def driver(sink): 32 | model = None 33 | log_observer = None 34 | 35 | def on_log_subscribe(observer, scheduler): 36 | nonlocal log_observer 37 | log_observer = observer 38 | 39 | def log(message, level=logging.DEBUG): 40 | if log_observer is not None: 41 | log_observer.on_next(Log( 42 | logger=__name__, 43 | level=level, 44 | message=message, 45 | )) 46 | 47 | def setup_model(model_path, scorer, beam_width): 48 | log("creating model {} with scorer {}...".format(model_path, scorer)) 49 | model = Model(model_path) 50 | 51 | if scorer.scorer is not None: 52 | model.enableExternalScorer(scorer.scorer) 53 | if scorer.lm_alpha is not None and scorer.lm_beta is not None: 54 | if model.setScorerAlphaBeta(scorer.lm_alpha, scorer.lm_beta) != 0: 55 | raise RuntimeError("Unable to set scorer parameters") 56 | 57 | if beam_width is not None: 58 | if model.setBeamWidth(beam_width) != 0: 59 | raise RuntimeError("Unable to set beam width") 60 | 61 | log("model is ready.") 62 | return model 63 | 64 | def subscribe(observer, scheduler): 65 | def on_coqui_request(item): 66 | nonlocal model 67 | 68 | if type(item) is SpeechToText: 69 | if model is not None: 70 | try: 71 | audio = decoding.decode_audio(io.BytesIO(item.data)) 72 | text = model.stt(audio) 73 | log("STT result: {}".format(text)) 74 | observer.on_next(rx.just(TextResult( 75 | text=text, 76 | context=item.context, 77 | ))) 78 | except Exception as e: 79 | log("STT error: {}".format(e), level=logging.ERROR) 80 | observer.on_next(rx.throw(TextError( 81 | error=e, 82 | context=item.context, 83 | ))) 84 | elif type(item) is Initialize: 85 | log("initialize: {}".format(item)) 86 | model = setup_model( 87 | item.model, item.scorer, item.beam_width) 88 | else: 89 | log("unknown item: {}".format(item), level=logging.CRITICAL) 90 | observer.on_error( 91 | "Unknown item type: {}".format(type(item))) 92 | 93 | sink.speech.subscribe(lambda item: on_coqui_request(item)) 94 | 95 | return Source( 96 | text=rx.create(subscribe), 97 | log=rx.create(on_log_subscribe), 98 | ) 99 | 100 | return Component(call=driver, input=Sink) 101 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | DeepSpeech Server 3 | ================== 4 | 5 | .. image:: https://github.com/MainRo/deepspeech-server/actions/workflows/pythonpackage.yml/badge.svg 6 | :target: https://github.com/MainRo/deepspeech-server/actions/workflows/pythonpackage.yml 7 | 8 | .. image:: https://badge.fury.io/py/deepspeech-server.svg 9 | :target: https://badge.fury.io/py/deepspeech-server 10 | 11 | Key Features 12 | ============ 13 | 14 | This is an http server that can be used to test the Coqui STT project (the 15 | successor of the Mozilla DeepSpeech project). You need an environment with 16 | DeepSpeech or Coqui to run this server. 17 | 18 | This code uses the Coqui STT 1.0 APIs. 19 | 20 | Installation 21 | ============= 22 | 23 | The server is available on pypi, so you can install it with pip: 24 | 25 | .. code-block:: console 26 | 27 | pip3 install deepspeech-server 28 | 29 | 30 | You can also install deepspeech server from sources: 31 | 32 | .. code-block:: console 33 | 34 | python3 setup.py install 35 | 36 | Note that python 3.6 is the minimum version required to run the server. 37 | 38 | Starting the server 39 | ==================== 40 | 41 | .. code-block:: console 42 | 43 | deepspeech-server --config config.yaml 44 | 45 | What is a STT model? 46 | -------------------- 47 | 48 | The quality of the speech-to-text engine depends heavily on which models it 49 | loads at runtime. Think of them as a sort of pattern that controls how the 50 | engine works. 51 | 52 | How to use a specific STT model 53 | ------------------------------- 54 | 55 | You can use coqui without training a model. Pre-trained models are on 56 | offer at the Coqui Model Zoo (Make sure the STT Models tab is selected): 57 | 58 | https://coqui.ai/models 59 | 60 | Once you've downloaded a pre-trained model, make a copy of the sample 61 | configuration file. Edit the `"model"` and `"scorer"` fields in your new file 62 | for the engine you want to use so that they match the downloaded files: 63 | 64 | .. code-block:: console 65 | 66 | cp config.sample.yaml config.yaml 67 | $EDITOR config.yaml 68 | 69 | Lastly, start the server: 70 | 71 | .. code-block:: console 72 | 73 | deepspeech-server --config config.yaml 74 | 75 | Server configuration 76 | ===================== 77 | 78 | The configuration is done with a yaml file, provided with the "--config" argument. 79 | Its structure is the following one: 80 | 81 | .. code-block:: yaml 82 | 83 | coqui: 84 | model: coqui-1.0.tflite 85 | scorer: huge-vocabulary.scorer 86 | beam_width: 500 87 | server: 88 | http: 89 | host: "0.0.0.0" 90 | port: 8080 91 | request_max_size: 1048576 92 | log: 93 | level: 94 | - logger: deepspeech_server 95 | level: DEBUG 96 | 97 | The configuration file contains several sections and sub-sections. 98 | 99 | coqui section configuration 100 | --------------------------- 101 | 102 | Section "coqui" contains configuration of the coqui-stt engine: 103 | 104 | **model**: The model that was trained by coqui. Must be a tflite (TensorFlow Lite) file. 105 | 106 | **scorer**: [Optional] The scorer file. Use this to tune the STT to understand certain phrases better. 107 | 108 | **lm_alpha**: [Optional] alpha hyperparameter for the scorer. 109 | 110 | **lm_beta**: [Optional] beta hyperparameter for the scorer. 111 | 112 | **beam_width**: [Optional] The size of the beam search. Corresponds directly to how long decoding takes. 113 | 114 | http section configuration 115 | -------------------------- 116 | 117 | **request_max_size** (default value: 1048576, i.e. 1MiB) is the maximum payload 118 | size allowed by the server. A received payload size above this threshold will 119 | return a "413: Request Entity Too Large" error. 120 | 121 | **host** The listen address of the http server. 122 | 123 | **port** The listening port of the http server. 124 | 125 | log section configuration 126 | ------------------------- 127 | 128 | The log section can be used to set the log levels of the server. This section 129 | contains a list of log entries. Each log entry contains the name of a **logger** 130 | and its **level**. Both follow the convention of the python logging module. 131 | 132 | 133 | Using the server 134 | ================ 135 | 136 | Inference on the model is done via http post requests. For example with the 137 | following curl command: 138 | 139 | .. code-block:: console 140 | 141 | curl -X POST --data-binary @testfile.wav http://localhost:8080/stt 142 | -------------------------------------------------------------------------------- /deepspeech_server/server.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import namedtuple 3 | from functools import partial 4 | import asyncio 5 | import rx 6 | import rx.operators as ops 7 | from rx.scheduler import ImmediateScheduler 8 | from rx.scheduler.eventloop import AsyncIOScheduler 9 | 10 | from cyclotron import Component 11 | from cyclotron.router import make_error_router 12 | 13 | from cyclotron.asyncio.runner import run 14 | import cyclotron_aiohttp.httpd as httpd 15 | import cyclotron_std.sys.argv as argv 16 | import cyclotron_std.io.file as file 17 | import cyclotron_std.argparse as argparse 18 | import cyclotron_std.logging as logging 19 | 20 | import deepspeech_server.coqui as coqui 21 | from .config import parse_config 22 | 23 | from multidict import MultiDict 24 | 25 | #from cyclotron.debug import trace_observable 26 | 27 | DeepspeechSink = namedtuple('DeepspeechSink', [ 28 | 'logging', 'file', 'stt', 'httpd' 29 | ]) 30 | DeepspeechSource = namedtuple('DeepspeechSource', [ 31 | 'stt', 'httpd', 'file', 'argv' 32 | ]) 33 | DeepspeechDrivers = namedtuple('DeepspeechServerDrivers', [ 34 | 'logging', 'stt', 'httpd', 'file', 'argv' 35 | ]) 36 | 37 | 38 | def parse_arguments(argv): 39 | parser = argparse.ArgumentParser("deepspeech server") 40 | parser.add_argument( 41 | '--config', required=True, 42 | help="Path of the server configuration file") 43 | 44 | return argv.pipe( 45 | ops.skip(1), 46 | argparse.parse(parser), 47 | ) 48 | 49 | 50 | def deepspeech_server(aio_scheduler, sources): 51 | argv = sources.argv.argv 52 | stt = sources.httpd.route 53 | stt_response = sources.stt.text 54 | ds_logs = sources.stt.log 55 | 56 | http_ds_error, route_ds_error = make_error_router() 57 | 58 | args = parse_arguments(argv) 59 | 60 | read_request, read_response = args.pipe( 61 | ops.map(lambda i: file.Read(id='config', path=i.value)), 62 | file.read(sources.file.response), 63 | ) 64 | read_request = read_request.pipe( 65 | ops.subscribe_on(aio_scheduler), 66 | ) 67 | config = parse_config(read_response) 68 | 69 | logs_config = config.pipe( 70 | ops.flat_map(lambda i: rx.from_(i.log.level, scheduler=ImmediateScheduler())), 71 | ops.map(lambda i: logging.SetLevel(logger=i.logger, level=i.level)), 72 | ) 73 | logs = rx.merge(logs_config, ds_logs) 74 | 75 | ds_stt = stt.pipe( 76 | ops.flat_map(lambda i: i.request), 77 | ops.map(lambda i: coqui.SpeechToText(data=i.data, context=i.context)), 78 | ) 79 | 80 | # config is hot, the combine operator allows to keep its last value 81 | # until logging is initialized 82 | ds_arg = config.pipe( 83 | ops.map(lambda i: coqui.Initialize( 84 | model=i.coqui.model, 85 | scorer=coqui.Scorer( 86 | scorer=getattr(i.coqui, 'scorer', None), 87 | lm_alpha=getattr(i.coqui, 'lm_alpha', None), 88 | lm_beta=getattr(i.coqui, 'lm_beta', None), 89 | ), 90 | beam_width=getattr(i.coqui, 'beam_width', None), 91 | )), 92 | ) 93 | ds = rx.merge(ds_stt, ds_arg) 94 | 95 | http_init = config.pipe( 96 | ops.flat_map(lambda i: rx.from_([ 97 | httpd.Initialize(request_max_size=i.server.http.request_max_size), 98 | httpd.AddRoute( 99 | methods=['POST'], 100 | path='/stt', 101 | id='stt', 102 | headers=MultiDict([('Content-Type', 'text/plain')]), 103 | ), 104 | httpd.StartServer( 105 | host=i.server.http.host, 106 | port=i.server.http.port), 107 | ])), 108 | ) 109 | 110 | http_response = stt_response.pipe( 111 | route_ds_error( 112 | error_map=lambda e: httpd.Response( 113 | data="Speech to text error".encode('utf-8'), 114 | context=e.args[0].context, 115 | status=500 116 | )), 117 | ops.map(lambda i: httpd.Response( 118 | data=i.text.encode('utf-8'), 119 | context=i.context, 120 | )), 121 | ) 122 | 123 | http = rx.merge(http_init, http_response, http_ds_error) 124 | 125 | return DeepspeechSink( 126 | file=file.Sink(request=read_request), 127 | logging=logging.Sink(request=logs), 128 | stt=coqui.Sink(speech=ds), 129 | httpd=httpd.Sink(control=http) 130 | ) 131 | 132 | 133 | def main(): 134 | loop = asyncio.get_event_loop() 135 | # loop.set_debug(True) 136 | aio_scheduler = AsyncIOScheduler(loop=loop) 137 | run( 138 | Component( 139 | call=partial(deepspeech_server, aio_scheduler), 140 | input=DeepspeechSource), 141 | DeepspeechDrivers( 142 | stt=coqui.make_driver(), 143 | httpd=httpd.make_driver(), 144 | argv=argv.make_driver(), 145 | logging=logging.make_driver(), 146 | file=file.make_driver() 147 | ), 148 | loop=loop, 149 | ) 150 | 151 | 152 | if __name__ == '__main__': 153 | main() 154 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | --------------------------------------------------------------------------------