├── .devcontainer └── devcontainer.json ├── .github └── workflows │ ├── build.yml │ └── tox.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.md ├── COPYING_Kaldi_Project ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── Makefile ├── cli_helper.py ├── make.bat ├── requirements.txt └── source │ ├── cli.rst │ ├── conf.py │ ├── index.rst │ ├── locale.rst │ ├── modules.rst │ ├── pydrobert.kaldi.eval.rst │ ├── pydrobert.kaldi.eval.util.rst │ ├── pydrobert.kaldi.io.argparse.rst │ ├── pydrobert.kaldi.io.corpus.rst │ ├── pydrobert.kaldi.io.duck_streams.rst │ ├── pydrobert.kaldi.io.enums.rst │ ├── pydrobert.kaldi.io.rst │ ├── pydrobert.kaldi.io.table_streams.rst │ ├── pydrobert.kaldi.io.util.rst │ └── pydrobert.kaldi.logging.rst ├── pyproject.toml ├── pytest.ini ├── setup.cfg ├── setup.py ├── src ├── base │ ├── io-funcs-inl.h │ ├── io-funcs.cc │ ├── io-funcs.h │ ├── kaldi-common.h │ ├── kaldi-error.cc │ ├── kaldi-error.h │ ├── kaldi-math.cc │ ├── kaldi-math.h │ ├── kaldi-types.h │ ├── kaldi-utils.cc │ ├── kaldi-utils.h │ ├── timer.cc │ ├── timer.h │ └── version.h ├── feat │ ├── feature-common-inl.h │ ├── feature-common.h │ ├── feature-fbank.cc │ ├── feature-fbank.h │ ├── feature-functions.cc │ ├── feature-functions.h │ ├── feature-mfcc.cc │ ├── feature-mfcc.h │ ├── feature-plp.cc │ ├── feature-plp.h │ ├── feature-spectrogram.cc │ ├── feature-spectrogram.h │ ├── feature-window.cc │ ├── feature-window.h │ ├── mel-computations.cc │ ├── mel-computations.h │ ├── online-feature.cc │ ├── online-feature.h │ ├── pitch-functions.cc │ ├── pitch-functions.h │ ├── resample.cc │ ├── resample.h │ ├── signal.cc │ ├── signal.h │ ├── wave-reader.cc │ └── wave-reader.h ├── itf │ ├── clusterable-itf.h │ ├── context-dep-itf.h │ ├── decodable-itf.h │ ├── online-feature-itf.h │ ├── optimizable-itf.h │ ├── options-itf.h │ └── transition-information.h ├── matrix │ ├── cblas-wrappers.h │ ├── compressed-matrix.cc │ ├── compressed-matrix.h │ ├── jama-eig.h │ ├── jama-svd.h │ ├── kaldi-blas.h │ ├── kaldi-gpsr.cc │ ├── kaldi-gpsr.h │ ├── kaldi-matrix-inl.h │ ├── kaldi-matrix.cc │ ├── kaldi-matrix.h │ ├── kaldi-vector-inl.h │ ├── kaldi-vector.cc │ ├── kaldi-vector.h │ ├── matrix-common.h │ ├── matrix-functions-inl.h │ ├── matrix-functions.cc │ ├── matrix-functions.h │ ├── matrix-lib.h │ ├── numpy-array.cc │ ├── numpy-array.h │ ├── optimization.cc │ ├── optimization.h │ ├── packed-matrix.cc │ ├── packed-matrix.h │ ├── qr.cc │ ├── sp-matrix-inl.h │ ├── sp-matrix.cc │ ├── sp-matrix.h │ ├── sparse-matrix.cc │ ├── sparse-matrix.h │ ├── srfft.cc │ ├── srfft.h │ ├── tp-matrix.cc │ └── tp-matrix.h ├── pydrobert │ └── kaldi │ │ ├── __init__.py │ │ ├── command_line.py │ │ ├── eval │ │ ├── __init__.py │ │ ├── command_line.py │ │ └── util.py │ │ ├── feat │ │ ├── __init__.py │ │ └── command_line.py │ │ ├── io │ │ ├── __init__.py │ │ ├── argparse.py │ │ ├── command_line.py │ │ ├── corpus.py │ │ ├── duck_streams.py │ │ ├── enums.py │ │ ├── table_streams.py │ │ └── util.py │ │ └── logging.py ├── transform │ ├── cmvn.cc │ └── cmvn.h └── util │ ├── basic-filebuf.h │ ├── common-utils.h │ ├── const-integer-set-inl.h │ ├── const-integer-set.h │ ├── edit-distance-inl.h │ ├── edit-distance.h │ ├── hash-list-inl.h │ ├── hash-list.h │ ├── kaldi-cygwin-io-inl.h │ ├── kaldi-holder-inl.h │ ├── kaldi-holder.cc │ ├── kaldi-holder.h │ ├── kaldi-io-inl.h │ ├── kaldi-io.cc │ ├── kaldi-io.h │ ├── kaldi-pipebuf.h │ ├── kaldi-semaphore.cc │ ├── kaldi-semaphore.h │ ├── kaldi-table-inl.h │ ├── kaldi-table.cc │ ├── kaldi-table.h │ ├── kaldi-thread.cc │ ├── kaldi-thread.h │ ├── parse-options.cc │ ├── parse-options.h │ ├── simple-io-funcs.cc │ ├── simple-io-funcs.h │ ├── simple-options.cc │ ├── simple-options.h │ ├── stl-utils.h │ ├── table-types.h │ ├── text-utils.cc │ └── text-utils.h ├── swig ├── numpy │ ├── numpy.i │ └── pyfragments.swg └── pydrobert │ ├── error.i │ ├── io │ ├── duck.i │ ├── tables │ │ ├── basic_tables.i │ │ ├── mv_tables.i │ │ ├── tables.i │ │ ├── token_tables.i │ │ └── wave_tables.i │ └── util.i │ └── kaldi.i ├── tests └── python │ ├── conftest.py │ ├── test_argparse.py │ ├── test_corpus.py │ ├── test_duck_streams.py │ ├── test_eval_commands.py │ ├── test_eval_util.py │ ├── test_feat_commands.py │ ├── test_io_commands.py │ ├── test_logging.py │ ├── test_metadata.py │ └── test_table_streams.py └── tox.ini /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | {"image":"mcr.microsoft.com/devcontainers/universal:2"} -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build PyPI 2 | 3 | # Build wheels and source distribution for Python 4 | # We only run a single command as a test to check for linker errors 5 | 6 | on: [pull_request, release] 7 | 8 | jobs: 9 | build_wheels: 10 | name: Build wheels on ${{ matrix.os }} 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest, windows-latest, macos-latest] 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: actions/setup-python@v5 19 | with: 20 | python-version: '3.13' 21 | 22 | - name: Install cibuildwheel 23 | run: python -m pip install cibuildwheel 24 | 25 | - name: Build wheels 26 | run: python -m cibuildwheel --output-dir wheelhouse 27 | env: 28 | CIBW_BEFORE_BUILD_WINDOWS: pip install delvewheel 29 | CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: delvewheel repair -w {dest_dir} {wheel} 30 | CIBW_ARCHS_LINUX: x86_64 31 | CIBW_SKIP: pp* *-win32 32 | CIBW_TEST_COMMAND: write-table-to-pickle --help 33 | 34 | - uses: actions/upload-artifact@v4 35 | with: 36 | name: wheels-${{ matrix.os }} 37 | path: ./wheelhouse/*.whl 38 | 39 | build_sdist: 40 | name: Build source distribution 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v4 44 | 45 | - uses: actions/setup-python@v5 46 | with: 47 | python-version: '3.13' 48 | 49 | - name: Build sdist 50 | run: pipx run build --sdist 51 | 52 | - name: Install sdist 53 | run: pip install dist/*.tar.gz 54 | 55 | - name: Test sdist 56 | run: write-table-to-pickle --help 57 | 58 | - uses: actions/upload-artifact@v4 59 | with: 60 | name: sdist 61 | path: dist/*.tar.gz 62 | 63 | -------------------------------------------------------------------------------- /.github/workflows/tox.yml: -------------------------------------------------------------------------------- 1 | # adapted from https://github.com/tox-dev/tox-gh 2 | 3 | 4 | name: Run tox tests 5 | on: 6 | push: 7 | branches-ignore: 8 | - docs 9 | pull_request: 10 | schedule: 11 | # run every monday @ 8am 12 | - cron: "0 8 * * 1" 13 | 14 | concurrency: 15 | group: tox-${{ github.ref }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | test: 20 | name: test with ${{ matrix.env }} 21 | runs-on: ubuntu-latest 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | env: 26 | - "3.13" 27 | - "3.12" 28 | - "3.10" 29 | - "3.9" 30 | steps: 31 | - uses: actions/checkout@v4 32 | - name: Install the latest version of uv 33 | uses: astral-sh/setup-uv@v3 34 | - name: Install tox 35 | run: uv tool install --python-preference only-managed --python 3.13 tox --with tox-uv --with tox-gh 36 | - name: Install Python 37 | if: matrix.env != '3.13' 38 | run: uv python install --python-preference only-managed ${{ matrix.env }} 39 | - name: Setup test suite 40 | run: tox run -vv --notest --skip-missing-interpreters false 41 | env: 42 | TOX_GH_MAJOR_MINOR: ${{ matrix.env }} 43 | - name: Run test suite 44 | run: tox run --skip-pkg-install 45 | env: 46 | TOX_GH_MAJOR_MINOR: ${{ matrix.env }} 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Mac ds store 56 | .DS_Store 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | doc/build 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | #Ipython Notebook 66 | .ipynb_checkpoints 67 | 68 | # sdrobert: additional crap 69 | swig/pydrobert/internal.py 70 | swig/pydrobert/kaldi_wrap.cpp 71 | venv/ 72 | .ftpignore 73 | .ftpconfig 74 | .python-version 75 | .vscode/ 76 | _version.py -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3" 13 | # You can also specify other tool versions: 14 | # nodejs: "19" 15 | # rust: "1.64" 16 | # golang: "1.19" 17 | 18 | # Build documentation in the docs/ directory with Sphinx 19 | sphinx: 20 | builder: html 21 | 22 | # If using Sphinx, optionally build your docs in additional formats such as PDF 23 | # formats: 24 | # - pdf 25 | 26 | # Optionally declare the Python requirements required to build your docs 27 | python: 28 | install: 29 | - requirements: docs/requirements.txt 30 | - method: pip 31 | path: . -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | v0.6.4 2 | ------ 3 | - Python 3.13 builds 4 | - Replace AppVeyor CI with GitHub Actions. 5 | - Python 3.12 and musslinux builds. 6 | 7 | v0.6.3 8 | ------ 9 | - Fixed an annoying bug causing memory bloat. 10 | 11 | v0.6.2 12 | ------ 13 | - Added 3.11. Removed 32-bit wheels. 14 | - Fixed absolute paths in source distribution. 15 | - Cleaned up some of the package metadata. 16 | 17 | v0.6.1 18 | ------ 19 | 20 | - CHANGELOG and README are Markdown 21 | - Cut Python less than 3.7, added up to 3.10. 22 | - "No BLAS" is now the default build. None of the currently wrapped 23 | functionality actually uses BLAS right now. 24 | - Removed `KaldiLocaleWarning` and added a documentation page addressing 25 | locales. 26 | - Updated documentation, including a special page for the CLI. 27 | - Updated CI to only use [cibuildwheel](https://github.com/pypa/cibuildwheel/). 28 | Able to compile Win-64 wheels. 29 | - Updated Kaldi source. 30 | - All but extension is now in `setup.cfg` and `pyproject.toml`. 31 | - Got rid of Conda recipe. Will switch to 32 | [conda-forge](https://conda-forge.org/). 33 | 34 | v0.6.0 35 | ------ 36 | 37 | A considerable amount of refactoring occurred for this build, chiefly to get 38 | rid of Python 2.7 support. While the functionality did not change much for this 39 | version, we have switched from a `pkgutil`-style `pydrobert` namespace to 40 | PEP-420-style namespaces. As a result, *this package is not 41 | backwards-compatible with previous `pydrobert` packages!* Make sure that if 42 | any of the following are installed, they exceed the following version 43 | thresholds: 44 | 45 | - `pydrobert-param >0.2.0` 46 | - `pydrobert-pytorch >0.2.1` 47 | - `pydrobert-speech >0.1.0` 48 | 49 | Miscellaneous other changes include: 50 | 51 | - Type hints everywhere 52 | - Shifted python source to `src/`, alongside Kaldi source 53 | - Updated numpy swig bindings for numpy 1.11.3 54 | - Black-formatted remaining source 55 | - Removed `future` and `six`, `configparser` 56 | - Shifted a lot of the configuration to `setup.cfg`. There is still 57 | considerable work in `setup.py` due to the C extension 58 | - Shifted documentation source from `doc/` to `docs/` 59 | - Shuffled around the indexing of documentation 60 | - Added changelog :D 61 | 62 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-exclude .github * 2 | exclude .gitignore 3 | recursive-exclude recipe * 4 | recursive-include swig * -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://github.com/sdrobert/pydrobert-kaldi/actions/workflows/tox.yml/badge.svg)](https://github.com/sdrobert/pydrobert-kaldi/actions/workflows/tox.yml) 2 | [![Documentation Status](https://readthedocs.org/projects/pydrobert-kaldi/badge/?version=latest)](https://pydrobert-kaldi.readthedocs.io/en/latest/?badge=latest) 3 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 4 | 5 | 6 | # pydrobert-kaldi 7 | 8 | Some [Kaldi](http://kaldi-asr.org/) bindings for Python. I started this project 9 | because I wanted to seamlessly incorporate [Kaldi's I/O 10 | mechanism](http://kaldi-asr.org/doc/io.html) into the gamut of Python-based 11 | data science packages (e.g. Theano, Tensorflow, CNTK, PyTorch, etc.). The code 12 | base is expanding to wrap more of Kaldi's feature processing and mathematical 13 | functions, but is unlikely to include modelling or decoding. 14 | 15 | Eventually, I plan on adding hooks for Kaldi audio features and pre-/post- 16 | processing. However, I have no plans on porting any code involving modelling or 17 | decoding. 18 | 19 | **This is student-driven code, so don't expect a stable API. I'll try to use 20 | semantic versioning, but the best way to keep functionality stable is by 21 | forking.** 22 | 23 | ## Documentation 24 | 25 | - [Latest](https://pydrobert-kaldi.readthedocs.io/en/latest/) 26 | - [v0.6.3](https://pydrobert-kaldi.readthedocs.io/en/v0.6.3/) 27 | 28 | ## Input/Output 29 | 30 | Most I/O can be performed with the `pydrobert.kaldi.io.open` function: 31 | 32 | 33 | ``` python 34 | from pydrobert.kaldi import io 35 | with io.open('scp:foo.scp', 'bm') as f: 36 | for matrix in f: 37 | ... 38 | ``` 39 | 40 | `open` is a factory function that determines the appropriate underlying stream 41 | to open, much like Python's built-in `open`. The data types we can read (here, 42 | a `BaseMatrix`) are listed in `pydrobert.kaldi.io.enums.KaldiDataType`. Big 43 | data types, like matrices and vectors, are piped into Numpy arrays. Passing an 44 | extended filename (e.g. paths to files on discs, `'-'` for stdin/stdout, 45 | `'gzip -c a.ark.gz |'`, etc.) opens a stream from which data types can be read 46 | one-by-one and in the order they were written. Alternatively, prepending the 47 | extended filename with `'ark[,[option_a[,option_b...]]:'` or `'scp[,...]:'` and 48 | specifying a data type allows one to open a Kaldi table for iterator-like 49 | sequential reading (`mode='r'`), dict-like random access reading (`mode='r+'`), 50 | or writing (`mode='w'`). For more information on the open function, consult the 51 | docstring. 52 | 53 | The submodule `pydrobert.kaldi.io.corpus` contains useful wrappers around Kaldi 54 | I/O to serve up batches of data to, say, a neural network: 55 | 56 | ``` python 57 | train = ShuffledData('scp:feats.scp', 'scp:labels.scp', batch_size=10) 58 | for feat_batch, label_batch in train: 59 | ... 60 | ``` 61 | 62 | ## Logging and CLI 63 | 64 | By default, Kaldi error, warning, and critical messages are piped to standard 65 | error. The `pydrobert.kaldi.logging` submodule provides hooks into python's 66 | native logging interface: the `logging` module. The :class:`KaldiLogger` can 67 | handle stack traces from Kaldi C++ code, and there are a variety of decorators 68 | to finagle the kaldi logging patterns to python logging patterns, or vice 69 | versa. 70 | 71 | You'd likely want to explicitly handle logging when creating new kaldi-style 72 | commands for command line. `pydrobert.kaldi.io.argparse` provides 73 | :class:`KaldiParser`, an :class:`ArgumentParser` tailored to Kaldi 74 | inputs/outputs. It is used by a few command-line entry points added by this 75 | package. See the [Command-Line 76 | Interface](http://pydrobert-kaldi.readthedocs.io/en/latest/cli.html) page for 77 | details. 78 | 79 | ## Installation 80 | 81 | Prepackaged binaries of tagged versions of `pydrobert-kaldi` are available for 82 | most 64-bit platforms (Windows, Glibc Linux, OSX) and most active Python 83 | versions (3.7-3.11) on both [conda](https://docs.conda.io/en/latest/) and 84 | [PyPI](https://pypi.org/). 85 | 86 | To install via [conda-forge](https://conda-forge.org/) 87 | 88 | ``` sh 89 | conda install -c conda-forge pydrobert-kaldi 90 | ``` 91 | 92 | If you only want to rely on Anaconda depenedencies, you can install from the 93 | `sdrobert` channel instead. There is not yet a 3.11 build there. 94 | 95 | To install via [PyPI](https://pypi.org/) 96 | 97 | ``` sh 98 | pip install pydrobert-kaldi 99 | ``` 100 | 101 | You can also try building the cutting-edge version. To do so, you'll need to 102 | first install [SWIG 4.0](https://www.swig.org/) and an appropriate C++ 103 | compiler, then 104 | 105 | ``` sh 106 | pip install git+https://github.com/sdrobert/pydrobert-kaldi.git 107 | ``` 108 | 109 | The current version does not require a BLAS install, though it likely will in 110 | the future as more is wrapped. 111 | 112 | ## License 113 | 114 | This code is licensed under Apache 2.0. 115 | 116 | Code found under the `src/` directory has been primarily copied from Kaldi. 117 | `setup.py` is also strongly influenced by Kaldi's build configuration. Kaldi is 118 | also covered by the Apache 2.0 license; its specific license file was copied 119 | into `src/COPYING_Kaldi_Project` to live among its fellows. 120 | 121 | ## How to Cite 122 | 123 | Please see the [pydrobert page](https://github.com/sdrobert/pydrobert) for more 124 | details. 125 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pydrobert-kaldi 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/cli_helper.py: -------------------------------------------------------------------------------- 1 | # Generate CLI page 2 | # Needs imports, so run with full 3 | 4 | import pydrobert.kaldi.command_line as cli 5 | import os 6 | from io import StringIO 7 | import sys 8 | import inspect 9 | import warnings 10 | 11 | warnings.simplefilter("ignore") 12 | 13 | # Modified from 14 | # https://stackoverflow.com/questions/16571150/how-to-capture-stdout-output-from-a-python-function-call 15 | class Capturing(list): 16 | def __enter__(self): 17 | self._stdout = sys.stdout 18 | self._stderr = sys.stderr 19 | sys.stdout = sys.stderr = self._stringio = StringIO() 20 | return self 21 | 22 | def __exit__(self, *args): 23 | self.extend(self._stringio.getvalue().splitlines()) 24 | del self._stringio # free up some memory 25 | sys.stdout = self._stdout 26 | sys.stderr = self._stderr 27 | 28 | 29 | DIR = os.path.dirname(__file__) 30 | CLI_RST = os.path.join(DIR, "source", "cli.rst") 31 | 32 | buff = "Command-Line Interface\n======================\n\n" 33 | for cmd_name in ( 34 | "write-table-to-pickle", 35 | "write-pickle-to-table", 36 | "compute-error-rate", 37 | "normalize-feat-lens", 38 | "write-table-to-torch-dir", 39 | "write-torch-dir-to-table", 40 | ): 41 | buff += cmd_name + "\n" + ("-" * len(cmd_name)) + "\n\n::\n\n " 42 | sys.argv[0] = cmd_name 43 | func = next( 44 | x[1] for x in inspect.getmembers(cli) if x[0] == cmd_name.replace("-", "_") 45 | ) 46 | with Capturing() as c: 47 | try: 48 | func(["-h"]) 49 | except SystemExit: 50 | pass 51 | buff += "\n ".join(c) + "\n\n" 52 | 53 | with open(CLI_RST, "w") as f: 54 | f.write(buff) 55 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | myst-parser 2 | sphinx_rtd_theme>=0.5 3 | sphinx-autodoc-typehints 4 | Sphinx>=4.4 -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../../README.md 2 | :parser: myst_parser.sphinx_ 3 | 4 | .. toctree:: 5 | :maxdepth: 3 6 | :caption: Contents: 7 | 8 | cli 9 | locale 10 | modules 11 | 12 | Indices and tables 13 | ================== 14 | 15 | * :ref:`genindex` 16 | * :ref:`modindex` 17 | * :ref:`search` 18 | -------------------------------------------------------------------------------- /docs/source/locale.rst: -------------------------------------------------------------------------------- 1 | Locale and Kaldi 2 | ================ 3 | 4 | After ``v0.6.0``, ``pydrobert.kaldi.io`` no longer issues a 5 | ``KaldiLocaleWarning`` when the system locale doesn't match the POSIX standard. 6 | *The long story short is that locale shouldn't matter much to what 7 | pydrobert-kaldi does,* so I no longer bug you about it. If you're hunting an 8 | error, however, read on. 9 | 10 | Most Kaldi shell scripts presume 11 | 12 | .. code-block:: shell 13 | 14 | export LC_ALL=C 15 | 16 | has been called some time prior to running the current script. This sets the 17 | locale to POSIX-style, which is going to ensure your various shell commands 18 | sort stuff like C does. The Kaldi codebase is written in C, so it's definitely 19 | going to sort this way. Here's an example of some weirdness involving the 20 | ``"s"`` flag in the file rxspecifier. It basically tells Kaldi that table 21 | entries are in sorted order, which allows Kaldi to take some shortcuts to save 22 | on read/write costs. 23 | 24 | .. code-block:: shell 25 | 26 | # I've previously installed the German and Russian locales on Ubuntu: 27 | # sudo locale-gen de_DE 28 | # sudo locale-gen ru_RU 29 | 30 | export LC_ALL=C 31 | 32 | python -c "print('f\xe4n a'); print('foo b')" | \ 33 | sort | \ 34 | python -c " 35 | from pydrobert.kaldi.io import open as kopen 36 | with kopen('ark,s:-', 't', 'r+') as f: 37 | print(f['foo']) 38 | " 39 | # outputs: b 40 | # sort sorts C-style ("foo" first), kaldi sorts C-style 41 | 42 | python -c "print('f\xe4n a'); print('foo b')" | \ 43 | LC_ALL=de_DE sort | \ 44 | python -c " 45 | from pydrobert.kaldi.io import open as kopen 46 | with kopen('ark,s:-', 't', 'r+') as f: 47 | print(f['foo']) 48 | " 49 | # KeyError: 'foo' 50 | # sort sorts German ("fän" first), kaldi sorts C-style 51 | 52 | python -c "print('f\xe4n a'); print('foo b')" | \ 53 | sort | \ 54 | LC_ALL=de_DE python -c " 55 | from pydrobert.kaldi.io import open as kopen 56 | with kopen('ark,s:-', 't', 'r+') as f: 57 | print(f['foo']) 58 | " 59 | # outputs: b 60 | # sort sorts C-style, kaldi ignores German encoding and sorts C-style 61 | 62 | These examples will lead to exceptions which can be caught and debugged. One 63 | can come up with more insidious errors which don't fail, mind you. 64 | 65 | For the most part, however, this is a non-issue, at least for 66 | `pydrobert-kaldi`. The only situation the library might mess up in that I know 67 | of involves sorting table keys, and the table keys are (as far as I can tell) 68 | exclusively ASCII. Also as far as I can tell, even locales which contain 69 | characters visually identical to those in the Latin alphabet are nonetheless 70 | encoded outside of the ASCII range. For example: 71 | 72 | .. code-block:: shell 73 | 74 | export LC_ALL=C 75 | echo $'M\nC' | LC_ALL=ru_RU sort 76 | # outputs: C, M 77 | # these are the ASCII characters 78 | echo $'М\nС' | LC_ALL=ru_RU sort 79 | # outputs: M, C 80 | # these are UTF characters 'U+0421' and 'U+0043', respectively 81 | 82 | Besides UTF, ISO-8859-1 maintains a contiguous ASCII range. Technically there's 83 | no guarantee that this will be the case for all encodings, though any such 84 | encoding would probably break all sorts of legacy code. If you have a 85 | counterexample of a Kaldi recipe that does otherwise, please let me know and 86 | I'll mention it here. 87 | 88 | Other than that, the library is quite agnostic to locale. An error involving 89 | locales is, more likely than not, something that occurred before or after the 90 | library was called. 91 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi API 2 | =================== 3 | 4 | .. automodule:: pydrobert.kaldi 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Submodules: 12 | 13 | pydrobert.kaldi.eval 14 | pydrobert.kaldi.io 15 | pydrobert.kaldi.logging 16 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.eval.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.eval 2 | ==================== 3 | 4 | .. automodule:: pydrobert.kaldi.eval 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | :caption: Submodules: 13 | 14 | pydrobert.kaldi.eval.util 15 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.eval.util.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.eval.util 2 | ========================= 3 | 4 | .. automodule:: pydrobert.kaldi.eval.util 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.argparse.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.argparse 2 | =========================== 3 | 4 | .. automodule:: pydrobert.kaldi.io.argparse 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.corpus.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.corpus 2 | ========================= 3 | 4 | .. automodule:: pydrobert.kaldi.io.corpus 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.duck_streams.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.duck_streams 2 | =============================== 3 | 4 | .. automodule:: pydrobert.kaldi.io.duck_streams 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.enums.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.enums 2 | ======================== 3 | 4 | .. automodule:: pydrobert.kaldi.io.enums 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io 2 | ================== 3 | 4 | .. automodule:: pydrobert.kaldi.io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | :caption: Submodules: 13 | 14 | pydrobert.kaldi.io.argparse 15 | pydrobert.kaldi.io.corpus 16 | pydrobert.kaldi.io.duck_streams 17 | pydrobert.kaldi.io.enums 18 | pydrobert.kaldi.io.table_streams 19 | pydrobert.kaldi.io.util 20 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.table_streams.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.table_streams 2 | ================================ 3 | 4 | .. automodule:: pydrobert.kaldi.io.table_streams 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.io.util.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.io.util 2 | ======================= 3 | 4 | .. automodule:: pydrobert.kaldi.io.util 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pydrobert.kaldi.logging.rst: -------------------------------------------------------------------------------- 1 | pydrobert.kaldi.logging 2 | ----------------------- 3 | 4 | .. automodule:: pydrobert.kaldi.logging 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=45", 4 | "wheel", 5 | "setuptools_scm>=6.2", 6 | "numpy>=1.25", 7 | "swig" 8 | ] 9 | build-backend = "setuptools.build_meta" 10 | 11 | [tool.setuptools_scm] 12 | write_to = "src/pydrobert/kaldi/_version.py" -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | pytorch : Optional tests involving the pytorch package 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = pydrobert-kaldi 3 | description = Python wrapper for Kaldi 4 | long_description = file: README.md 5 | long_description_content_type = text/markdown 6 | license = Apache-2.0 7 | license_files = 8 | LICENSE 9 | COPYING_Kaldi_Project 10 | url = https://github.com/sdrobert/pydrobert-kaldi 11 | project_urls = 12 | Documentation = https://pydrobert-kaldi.readthedocs.io 13 | author = Sean Robertson 14 | author_email = sdrobert@cs.toronto.edu 15 | classifiers = 16 | Development Status :: 3 - Alpha 17 | License :: OSI Approved :: Apache Software License 18 | Programming Language :: Python :: 3 19 | 20 | [options] 21 | zip_safe = False 22 | packages = find_namespace: 23 | package_dir = 24 | = src 25 | python_requires = >= 3.9 26 | install_requires = 27 | numpy 28 | 29 | [options.entry_points] 30 | console_scripts = 31 | write-table-to-pickle = pydrobert.kaldi.command_line:write_table_to_pickle 32 | write-pickle-to-table = pydrobert.kaldi.command_line:write_pickle_to_table 33 | compute-error-rate = pydrobert.kaldi.command_line:compute_error_rate 34 | normalize-feat-lens = pydrobert.kaldi.command_line:normalize_feat_lens 35 | write-table-to-torch-dir = pydrobert.kaldi.command_line:write_table_to_torch_dir [pytorch] 36 | write-torch-dir-to-table = pydrobert.kaldi.command_line:write_torch_dir_to_table [pytorch] 37 | 38 | [options.extras_require] 39 | pytorch = torch 40 | 41 | [options.packages.find] 42 | where = src 43 | -------------------------------------------------------------------------------- /src/base/kaldi-common.h: -------------------------------------------------------------------------------- 1 | // base/kaldi-common.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_BASE_KALDI_COMMON_H_ 21 | #define KALDI_BASE_KALDI_COMMON_H_ 1 22 | 23 | #include 24 | #include 25 | #include // C string stuff like strcpy 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "base/kaldi-utils.h" 35 | #include "base/kaldi-error.h" 36 | #include "base/kaldi-types.h" 37 | #include "base/io-funcs.h" 38 | #include "base/kaldi-math.h" 39 | #include "base/timer.h" 40 | 41 | #endif // KALDI_BASE_KALDI_COMMON_H_ 42 | -------------------------------------------------------------------------------- /src/base/kaldi-math.cc: -------------------------------------------------------------------------------- 1 | // base/kaldi-math.cc 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Yanmin Qian; 4 | // Saarland University; Jan Silovsky 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #include "base/kaldi-math.h" 22 | #ifndef _MSC_VER 23 | #include 24 | #include 25 | #endif 26 | #include 27 | #include 28 | 29 | namespace kaldi { 30 | // These routines are tested in matrix/matrix-test.cc 31 | 32 | int32 RoundUpToNearestPowerOfTwo(int32 n) { 33 | KALDI_ASSERT(n > 0); 34 | n--; 35 | n |= n >> 1; 36 | n |= n >> 2; 37 | n |= n >> 4; 38 | n |= n >> 8; 39 | n |= n >> 16; 40 | return n+1; 41 | } 42 | 43 | static std::mutex _RandMutex; 44 | 45 | int Rand(struct RandomState* state) { 46 | #if !defined(_POSIX_THREAD_SAFE_FUNCTIONS) 47 | // On Windows and Cygwin, just call Rand() 48 | return rand(); 49 | #else 50 | if (state) { 51 | return rand_r(&(state->seed)); 52 | } else { 53 | std::lock_guard lock(_RandMutex); 54 | return rand(); 55 | } 56 | #endif 57 | } 58 | 59 | RandomState::RandomState() { 60 | // we initialize it as Rand() + 27437 instead of just Rand(), because on some 61 | // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be 62 | // the case that rand_r when initialized with rand() will give you the exact 63 | // same sequence of numbers that rand() will give if you keep calling rand() 64 | // after that initial call. This can cause problems with repeated sequences. 65 | // For example if you initialize two RandomState structs one after the other 66 | // without calling rand() in between, they would give you the same sequence 67 | // offset by one (if we didn't have the "+ 27437" in the code). 27437 is just 68 | // a randomly chosen prime number. 69 | seed = unsigned(Rand()) + 27437; 70 | } 71 | 72 | bool WithProb(BaseFloat prob, struct RandomState* state) { 73 | KALDI_ASSERT(prob >= 0 && prob <= 1.1); // prob should be <= 1.0, 74 | // but we allow slightly larger values that could arise from roundoff in 75 | // previous calculations. 76 | KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128); 77 | if (prob == 0) return false; 78 | else if (prob == 1.0) return true; 79 | else if (prob * RAND_MAX < 128.0) { 80 | // prob is very small but nonzero, and the "main algorithm" 81 | // wouldn't work that well. So: with probability 1/128, we 82 | // return WithProb (prob * 128), else return false. 83 | if (Rand(state) < RAND_MAX / 128) { // with probability 128... 84 | // Note: we know that prob * 128.0 < 1.0, because 85 | // we asserted RAND_MAX > 128 * 128. 86 | return WithProb(prob * 128.0); 87 | } else { 88 | return false; 89 | } 90 | } else { 91 | return (Rand(state) < ((RAND_MAX + static_cast(1.0)) * prob)); 92 | } 93 | } 94 | 95 | int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) { 96 | // This is not exact. 97 | KALDI_ASSERT(max_val >= min_val); 98 | if (max_val == min_val) return min_val; 99 | 100 | #ifdef _MSC_VER 101 | // RAND_MAX is quite small on Windows -> may need to handle larger numbers. 102 | if (RAND_MAX > (max_val-min_val)*8) { 103 | // *8 to avoid large inaccuracies in probability, from the modulus... 104 | return min_val + 105 | ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val)); 106 | } else { 107 | if ((unsigned int)(RAND_MAX*RAND_MAX) > 108 | (unsigned int)((max_val+1-min_val)*8)) { 109 | // *8 to avoid inaccuracies in probability, from the modulus... 110 | return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state))) 111 | % (unsigned int)(max_val+1-min_val)); 112 | } else { 113 | KALDI_ERR << "rand_int failed because we do not support such large " 114 | "random numbers. (Extend this function)."; 115 | } 116 | } 117 | #else 118 | return min_val + 119 | (static_cast(Rand(state)) % static_cast(max_val+1-min_val)); 120 | #endif 121 | } 122 | 123 | // Returns poisson-distributed random number. 124 | // Take care: this takes time proportional 125 | // to lambda. Faster algorithms exist but are more complex. 126 | int32 RandPoisson(float lambda, struct RandomState* state) { 127 | // Knuth's algorithm. 128 | KALDI_ASSERT(lambda >= 0); 129 | float L = expf(-lambda), p = 1.0; 130 | int32 k = 0; 131 | do { 132 | k++; 133 | float u = RandUniform(state); 134 | p *= u; 135 | } while (p > L); 136 | return k-1; 137 | } 138 | 139 | void RandGauss2(float *a, float *b, RandomState *state) { 140 | KALDI_ASSERT(a); 141 | KALDI_ASSERT(b); 142 | float u1 = RandUniform(state); 143 | float u2 = RandUniform(state); 144 | u1 = sqrtf(-2.0f * logf(u1)); 145 | u2 = 2.0f * M_PI * u2; 146 | *a = u1 * cosf(u2); 147 | *b = u1 * sinf(u2); 148 | } 149 | 150 | void RandGauss2(double *a, double *b, RandomState *state) { 151 | KALDI_ASSERT(a); 152 | KALDI_ASSERT(b); 153 | float a_float, b_float; 154 | // Just because we're using doubles doesn't mean we need super-high-quality 155 | // random numbers, so we just use the floating-point version internally. 156 | RandGauss2(&a_float, &b_float, state); 157 | *a = a_float; 158 | *b = b_float; 159 | } 160 | 161 | 162 | } // end namespace kaldi 163 | -------------------------------------------------------------------------------- /src/base/kaldi-types.h: -------------------------------------------------------------------------------- 1 | // base/kaldi-types.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Saarland University; 4 | // Jan Silovsky; Yanmin Qian 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #ifndef KALDI_BASE_KALDI_TYPES_H_ 22 | #define KALDI_BASE_KALDI_TYPES_H_ 1 23 | 24 | namespace kaldi { 25 | // TYPEDEFS .................................................................. 26 | #if (KALDI_DOUBLEPRECISION != 0) 27 | typedef double BaseFloat; 28 | #else 29 | typedef float BaseFloat; 30 | #endif 31 | } 32 | 33 | #ifdef _MSC_VER 34 | #include 35 | #define ssize_t SSIZE_T 36 | #endif 37 | 38 | // we can do this a different way if some platform 39 | // we find in the future lacks stdint.h 40 | #include 41 | 42 | // for discussion on what to do if you need compile kaldi 43 | // without OpenFST, see the bottom of this this file 44 | //#include 45 | // 46 | //namespace kaldi { 47 | // using ::int16; 48 | // using ::int32; 49 | // using ::int64; 50 | // using ::uint16; 51 | // using ::uint32; 52 | // using ::uint64; 53 | // typedef float float32; 54 | // typedef double double64; 55 | //} // end namespace kaldi 56 | 57 | // In a theoretical case you decide compile Kaldi without the OpenFST 58 | // comment the previous namespace statement and uncomment the following 59 | namespace kaldi { 60 | typedef int8_t int8; 61 | typedef int16_t int16; 62 | typedef int32_t int32; 63 | typedef int64_t int64; 64 | 65 | typedef uint8_t uint8; 66 | typedef uint16_t uint16; 67 | typedef uint32_t uint32; 68 | typedef uint64_t uint64; 69 | typedef float float32; 70 | typedef double double64; 71 | } // end namespace kaldi 72 | 73 | #endif // KALDI_BASE_KALDI_TYPES_H_ 74 | -------------------------------------------------------------------------------- /src/base/kaldi-utils.cc: -------------------------------------------------------------------------------- 1 | // base/kaldi-utils.cc 2 | // Copyright 2009-2011 Karel Vesely; Yanmin Qian; Microsoft Corporation 3 | 4 | // Modified by Sean Robertson 2022. Updates listed below. 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #include "base/kaldi-utils.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | // sdrobert: Not guaranteed to have been included beforehand on all MSVC for 28 | // some reason 29 | #include 30 | 31 | // sdrobert: std::snprintf did not exist pre-2015 on MSVC 32 | #if defined(_MSC_VER) && _MSC_VER < 1900 33 | #define mysnprintf c99_snprintf 34 | #else 35 | #define mysnprintf std::snprintf 36 | #endif 37 | 38 | namespace kaldi { 39 | 40 | std::string CharToString(const char &c) { 41 | char buf[20]; 42 | if (std::isprint(c)) 43 | mysnprintf(buf, sizeof(buf), "\'%c\'", c); 44 | else 45 | mysnprintf(buf, sizeof(buf), "[character %d]", static_cast(c)); 46 | return buf; 47 | } 48 | 49 | void Sleep(double sec) { 50 | // duration_cast<> rounds down, add 0.5 to compensate. 51 | auto dur_nanos = std::chrono::duration(sec * 1E9 + 0.5); 52 | auto dur_syshires = std::chrono::duration_cast< 53 | typename std::chrono::high_resolution_clock::duration>(dur_nanos); 54 | std::this_thread::sleep_for(dur_syshires); 55 | } 56 | 57 | #undef mysnprintf 58 | 59 | } // end namespace kaldi 60 | -------------------------------------------------------------------------------- /src/base/timer.cc: -------------------------------------------------------------------------------- 1 | // base/timer.cc 2 | 3 | // Copyright 2018 Johns Hopkins University (author: Daniel Povey) 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #include "base/timer.h" 21 | #include "base/kaldi-error.h" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace kaldi { 28 | 29 | class ProfileStats { 30 | public: 31 | void AccStats(const char *function_name, double elapsed) { 32 | std::unordered_map::iterator 33 | iter = map_.find(function_name); 34 | if (iter == map_.end()) { 35 | map_[function_name] = ProfileStatsEntry(function_name); 36 | map_[function_name].total_time = elapsed; 37 | } else { 38 | iter->second.total_time += elapsed; 39 | } 40 | } 41 | ~ProfileStats() { 42 | // This map makes sure we agglomerate the time if there were any duplicate 43 | // addresses of strings. 44 | std::unordered_map total_time; 45 | for (auto iter = map_.begin(); iter != map_.end(); iter++) 46 | total_time[iter->second.name] += iter->second.total_time; 47 | 48 | ReverseSecondComparator comp; 49 | std::vector > pairs(total_time.begin(), 50 | total_time.end()); 51 | std::sort(pairs.begin(), pairs.end(), comp); 52 | for (size_t i = 0; i < pairs.size(); i++) { 53 | KALDI_LOG << "Time taken in " << pairs[i].first << " is " 54 | << std::fixed << std::setprecision(2) << pairs[i].second << "s."; 55 | } 56 | } 57 | private: 58 | 59 | struct ProfileStatsEntry { 60 | std::string name; 61 | double total_time; 62 | ProfileStatsEntry() { } 63 | ProfileStatsEntry(const char *name): name(name) { } 64 | }; 65 | 66 | struct ReverseSecondComparator { 67 | bool operator () (const std::pair &a, 68 | const std::pair &b) { 69 | return a.second > b.second; 70 | } 71 | }; 72 | 73 | // Note: this map is keyed on the address of the string, there is no proper 74 | // hash function. The assumption is that the strings are compile-time 75 | // constants. 76 | std::unordered_map map_; 77 | }; 78 | 79 | ProfileStats g_profile_stats; 80 | 81 | Profiler::~Profiler() { 82 | g_profile_stats.AccStats(name_, tim_.Elapsed()); 83 | } 84 | 85 | } // namespace kaldi 86 | -------------------------------------------------------------------------------- /src/base/timer.h: -------------------------------------------------------------------------------- 1 | // base/timer.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_BASE_TIMER_H_ 20 | #define KALDI_BASE_TIMER_H_ 21 | 22 | #include "base/kaldi-utils.h" 23 | #include "base/kaldi-error.h" 24 | 25 | 26 | #if defined(_MSC_VER) || defined(MINGW) 27 | 28 | namespace kaldi { 29 | class Timer { 30 | public: 31 | Timer() { Reset(); } 32 | 33 | // You can initialize with bool to control whether or not you want the time to 34 | // be set when the object is created. 35 | explicit Timer(bool set_timer) { if (set_timer) Reset(); } 36 | 37 | void Reset() { 38 | QueryPerformanceCounter(&time_start_); 39 | } 40 | double Elapsed() const { 41 | LARGE_INTEGER time_end; 42 | LARGE_INTEGER freq; 43 | QueryPerformanceCounter(&time_end); 44 | 45 | if (QueryPerformanceFrequency(&freq) == 0) { 46 | // Hardware does not support this. 47 | return 0.0; 48 | } 49 | return (static_cast(time_end.QuadPart) - 50 | static_cast(time_start_.QuadPart)) / 51 | (static_cast(freq.QuadPart)); 52 | } 53 | private: 54 | LARGE_INTEGER time_start_; 55 | }; 56 | 57 | 58 | #else 59 | #include 60 | #include 61 | 62 | namespace kaldi { 63 | class Timer { 64 | public: 65 | Timer() { Reset(); } 66 | 67 | // You can initialize with bool to control whether or not you want the time to 68 | // be set when the object is created. 69 | explicit Timer(bool set_timer) { if (set_timer) Reset(); } 70 | 71 | void Reset() { gettimeofday(&this->time_start_, &time_zone_); } 72 | 73 | /// Returns time in seconds. 74 | double Elapsed() const { 75 | struct timeval time_end; 76 | struct timezone time_zone; 77 | gettimeofday(&time_end, &time_zone); 78 | double t1, t2; 79 | t1 = static_cast(time_start_.tv_sec) + 80 | static_cast(time_start_.tv_usec)/(1000*1000); 81 | t2 = static_cast(time_end.tv_sec) + 82 | static_cast(time_end.tv_usec)/(1000*1000); 83 | return t2-t1; 84 | } 85 | 86 | private: 87 | struct timeval time_start_; 88 | struct timezone time_zone_; 89 | }; 90 | 91 | #endif 92 | 93 | class Profiler { 94 | public: 95 | // Caution: the 'const char' should always be a string constant; for speed, 96 | // internally the profiling code uses the address of it as a lookup key. 97 | Profiler(const char *function_name): name_(function_name) { } 98 | ~Profiler(); 99 | private: 100 | Timer tim_; 101 | const char *name_; 102 | }; 103 | 104 | // To add timing info for a function, you just put 105 | // KALDI_PROFILE; 106 | // at the beginning of the function. Caution: this doesn't 107 | // include the class name. 108 | #define KALDI_PROFILE Profiler _profiler(__func__) 109 | 110 | 111 | 112 | } // namespace kaldi 113 | 114 | 115 | #endif // KALDI_BASE_TIMER_H_ 116 | -------------------------------------------------------------------------------- /src/base/version.h: -------------------------------------------------------------------------------- 1 | // This file was automatically created by ./get_version.sh. 2 | // It is only included by ./kaldi-error.cc. 3 | #define KALDI_VERSION "5.3.11-35288" 4 | #define KALDI_GIT_HEAD "35288fae9009de7f0b9e47eae0205adb44fe90fd" 5 | -------------------------------------------------------------------------------- /src/feat/feature-common-inl.h: -------------------------------------------------------------------------------- 1 | // feat/feature-common-inl.h 2 | 3 | // Copyright 2016 Johns Hopkins University (author: Daniel Povey) 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_ 21 | #define KALDI_FEAT_FEATURE_COMMON_INL_H_ 22 | 23 | #include "feat/resample.h" 24 | // Do not include this file directly. It is included by feat/feature-common.h 25 | 26 | namespace kaldi { 27 | 28 | template 29 | void OfflineFeatureTpl::ComputeFeatures( 30 | const VectorBase &wave, 31 | BaseFloat sample_freq, 32 | BaseFloat vtln_warp, 33 | Matrix *output) { 34 | KALDI_ASSERT(output != NULL); 35 | BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq; 36 | if (sample_freq == new_sample_freq) { 37 | Compute(wave, vtln_warp, output); 38 | } else { 39 | if (new_sample_freq < sample_freq && 40 | ! computer_.GetFrameOptions().allow_downsample) 41 | KALDI_ERR << "Waveform and config sample Frequency mismatch: " 42 | << sample_freq << " .vs " << new_sample_freq 43 | << " (use --allow-downsample=true to allow " 44 | << " downsampling the waveform)."; 45 | else if (new_sample_freq > sample_freq && 46 | ! computer_.GetFrameOptions().allow_upsample) 47 | KALDI_ERR << "Waveform and config sample Frequency mismatch: " 48 | << sample_freq << " .vs " << new_sample_freq 49 | << " (use --allow-upsample=true option to allow " 50 | << " upsampling the waveform)."; 51 | // Resample the waveform. 52 | Vector resampled_wave(wave); 53 | ResampleWaveform(sample_freq, wave, 54 | new_sample_freq, &resampled_wave); 55 | Compute(resampled_wave, vtln_warp, output); 56 | } 57 | } 58 | 59 | template 60 | void OfflineFeatureTpl::Compute( 61 | const VectorBase &wave, 62 | BaseFloat vtln_warp, 63 | Matrix *output) { 64 | KALDI_ASSERT(output != NULL); 65 | int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()), 66 | cols_out = computer_.Dim(); 67 | if (rows_out == 0) { 68 | output->Resize(0, 0); 69 | return; 70 | } 71 | output->Resize(rows_out, cols_out); 72 | Vector window; // windowed waveform. 73 | bool use_raw_log_energy = computer_.NeedRawLogEnergy(); 74 | for (int32 r = 0; r < rows_out; r++) { // r is frame index. 75 | BaseFloat raw_log_energy = 0.0; 76 | ExtractWindow(0, wave, r, computer_.GetFrameOptions(), 77 | feature_window_function_, &window, 78 | (use_raw_log_energy ? &raw_log_energy : NULL)); 79 | 80 | SubVector output_row(*output, r); 81 | computer_.Compute(raw_log_energy, vtln_warp, &window, &output_row); 82 | } 83 | } 84 | 85 | template 86 | void OfflineFeatureTpl::Compute( 87 | const VectorBase &wave, 88 | BaseFloat vtln_warp, 89 | Matrix *output) const { 90 | OfflineFeatureTpl temp(*this); 91 | // call the non-const version of Compute() on a temporary copy of this object. 92 | // This is a workaround for const-ness that may sometimes be useful in 93 | // multi-threaded code, although it's not optimally efficient. 94 | temp.Compute(wave, vtln_warp, output); 95 | } 96 | 97 | } // end namespace kaldi 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /src/feat/feature-fbank.cc: -------------------------------------------------------------------------------- 1 | // feat/feature-fbank.cc 2 | 3 | // Copyright 2009-2012 Karel Vesely 4 | // 2016 Johns Hopkins University (author: Daniel Povey) 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | 22 | #include "feat/feature-fbank.h" 23 | 24 | namespace kaldi { 25 | 26 | FbankComputer::FbankComputer(const FbankOptions &opts): 27 | opts_(opts), srfft_(NULL) { 28 | if (opts.energy_floor > 0.0) 29 | log_energy_floor_ = Log(opts.energy_floor); 30 | 31 | int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); 32 | if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... 33 | srfft_ = new SplitRadixRealFft(padded_window_size); 34 | 35 | // We'll definitely need the filterbanks info for VTLN warping factor 1.0. 36 | // [note: this call caches it.] 37 | GetMelBanks(1.0); 38 | } 39 | 40 | FbankComputer::FbankComputer(const FbankComputer &other): 41 | opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), 42 | mel_banks_(other.mel_banks_), srfft_(NULL) { 43 | for (std::map::iterator iter = mel_banks_.begin(); 44 | iter != mel_banks_.end(); 45 | ++iter) 46 | iter->second = new MelBanks(*(iter->second)); 47 | if (other.srfft_) 48 | srfft_ = new SplitRadixRealFft(*(other.srfft_)); 49 | } 50 | 51 | FbankComputer::~FbankComputer() { 52 | for (std::map::iterator iter = mel_banks_.begin(); 53 | iter != mel_banks_.end(); ++iter) 54 | delete iter->second; 55 | delete srfft_; 56 | } 57 | 58 | const MelBanks* FbankComputer::GetMelBanks(BaseFloat vtln_warp) { 59 | MelBanks *this_mel_banks = NULL; 60 | std::map::iterator iter = mel_banks_.find(vtln_warp); 61 | if (iter == mel_banks_.end()) { 62 | this_mel_banks = new MelBanks(opts_.mel_opts, 63 | opts_.frame_opts, 64 | vtln_warp); 65 | mel_banks_[vtln_warp] = this_mel_banks; 66 | } else { 67 | this_mel_banks = iter->second; 68 | } 69 | return this_mel_banks; 70 | } 71 | 72 | void FbankComputer::Compute(BaseFloat signal_raw_log_energy, 73 | BaseFloat vtln_warp, 74 | VectorBase *signal_frame, 75 | VectorBase *feature) { 76 | 77 | const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); 78 | 79 | KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && 80 | feature->Dim() == this->Dim()); 81 | 82 | 83 | // Compute energy after window function (not the raw one). 84 | if (opts_.use_energy && !opts_.raw_energy) 85 | signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), 86 | std::numeric_limits::epsilon())); 87 | 88 | if (srfft_ != NULL) // Compute FFT using split-radix algorithm. 89 | srfft_->Compute(signal_frame->Data(), true); 90 | else // An alternative algorithm that works for non-powers-of-two. 91 | RealFft(signal_frame, true); 92 | 93 | // Convert the FFT into a power spectrum. 94 | ComputePowerSpectrum(signal_frame); 95 | SubVector power_spectrum(*signal_frame, 0, 96 | signal_frame->Dim() / 2 + 1); 97 | 98 | // Use magnitude instead of power if requested. 99 | if (!opts_.use_power) 100 | power_spectrum.ApplyPow(0.5); 101 | 102 | int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0); 103 | SubVector mel_energies(*feature, 104 | mel_offset, 105 | opts_.mel_opts.num_bins); 106 | 107 | // Sum with mel fiterbanks over the power spectrum 108 | mel_banks.Compute(power_spectrum, &mel_energies); 109 | if (opts_.use_log_fbank) { 110 | // Avoid log of zero (which should be prevented anyway by dithering). 111 | mel_energies.ApplyFloor(std::numeric_limits::epsilon()); 112 | mel_energies.ApplyLog(); // take the log. 113 | } 114 | 115 | // Copy energy as first value (or the last, if htk_compat == true). 116 | if (opts_.use_energy) { 117 | if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) { 118 | signal_raw_log_energy = log_energy_floor_; 119 | } 120 | int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0; 121 | (*feature)(energy_index) = signal_raw_log_energy; 122 | } 123 | } 124 | 125 | } // namespace kaldi 126 | -------------------------------------------------------------------------------- /src/feat/feature-spectrogram.cc: -------------------------------------------------------------------------------- 1 | // feat/feature-spectrogram.cc 2 | 3 | // Copyright 2009-2012 Karel Vesely 4 | // Copyright 2012 Navdeep Jaitly 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | 22 | #include "feat/feature-spectrogram.h" 23 | 24 | 25 | namespace kaldi { 26 | 27 | SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts) 28 | : opts_(opts), srfft_(NULL) { 29 | if (opts.energy_floor > 0.0) 30 | log_energy_floor_ = Log(opts.energy_floor); 31 | 32 | int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); 33 | if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two 34 | srfft_ = new SplitRadixRealFft(padded_window_size); 35 | } 36 | 37 | SpectrogramComputer::SpectrogramComputer(const SpectrogramComputer &other): 38 | opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) { 39 | if (other.srfft_ != NULL) 40 | srfft_ = new SplitRadixRealFft(*other.srfft_); 41 | } 42 | 43 | SpectrogramComputer::~SpectrogramComputer() { 44 | delete srfft_; 45 | } 46 | 47 | void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy, 48 | BaseFloat vtln_warp, 49 | VectorBase *signal_frame, 50 | VectorBase *feature) { 51 | KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && 52 | feature->Dim() == this->Dim()); 53 | 54 | 55 | // Compute energy after window function (not the raw one) 56 | if (!opts_.raw_energy) 57 | signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), 58 | std::numeric_limits::epsilon())); 59 | 60 | if (srfft_ != NULL) // Compute FFT using split-radix algorithm. 61 | srfft_->Compute(signal_frame->Data(), true); 62 | else // An alternative algorithm that works for non-powers-of-two 63 | RealFft(signal_frame, true); 64 | 65 | if (opts_.return_raw_fft) { 66 | feature->CopyFromVec(*signal_frame); 67 | return; 68 | } 69 | 70 | // Convert the FFT into a power spectrum. 71 | ComputePowerSpectrum(signal_frame); 72 | SubVector power_spectrum(*signal_frame, 73 | 0, signal_frame->Dim() / 2 + 1); 74 | 75 | power_spectrum.ApplyFloor(std::numeric_limits::epsilon()); 76 | power_spectrum.ApplyLog(); 77 | 78 | feature->CopyFromVec(power_spectrum); 79 | 80 | if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) 81 | signal_raw_log_energy = log_energy_floor_; 82 | // The zeroth spectrogram component is always set to the signal energy, 83 | // instead of the square of the constant component of the signal. 84 | (*feature)(0) = signal_raw_log_energy; 85 | } 86 | 87 | } // namespace kaldi 88 | -------------------------------------------------------------------------------- /src/feat/feature-spectrogram.h: -------------------------------------------------------------------------------- 1 | // feat/feature-spectrogram.h 2 | 3 | // Copyright 2009-2012 Karel Vesely 4 | // Copyright 2012 Navdeep Jaitly 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #ifndef KALDI_FEAT_FEATURE_SPECTROGRAM_H_ 22 | #define KALDI_FEAT_FEATURE_SPECTROGRAM_H_ 23 | 24 | 25 | #include 26 | 27 | #include "feat/feature-common.h" 28 | #include "feat/feature-functions.h" 29 | #include "feat/feature-window.h" 30 | 31 | namespace kaldi { 32 | /// @addtogroup feat FeatureExtraction 33 | /// @{ 34 | 35 | 36 | /// SpectrogramOptions contains basic options for computing spectrogram 37 | /// features. 38 | struct SpectrogramOptions { 39 | FrameExtractionOptions frame_opts; 40 | BaseFloat energy_floor; 41 | bool raw_energy; // If true, compute energy before preemphasis and windowing 42 | bool return_raw_fft; // If true, return the raw FFT spectrum 43 | // Note that in that case the Dim() will return double 44 | // the expected dimension (because of the complex domain of it) 45 | 46 | SpectrogramOptions() : 47 | energy_floor(0.0), 48 | raw_energy(true), 49 | return_raw_fft(false) {} 50 | 51 | void Register(OptionsItf *opts) { 52 | frame_opts.Register(opts); 53 | opts->Register("energy-floor", &energy_floor, 54 | "Floor on energy (absolute, not relative) in Spectrogram " 55 | "computation. Caution: this floor is applied to the zeroth " 56 | "component, representing the total signal energy. The " 57 | "floor on the individual spectrogram elements is fixed at " 58 | "std::numeric_limits::epsilon()."); 59 | opts->Register("raw-energy", &raw_energy, 60 | "If true, compute energy before preemphasis and windowing"); 61 | opts->Register("return-raw-fft", &return_raw_fft, 62 | "If true, return raw FFT complex numbers instead of log magnitudes"); 63 | } 64 | }; 65 | 66 | /// Class for computing spectrogram features. 67 | class SpectrogramComputer { 68 | public: 69 | typedef SpectrogramOptions Options; 70 | explicit SpectrogramComputer(const SpectrogramOptions &opts); 71 | SpectrogramComputer(const SpectrogramComputer &other); 72 | 73 | const FrameExtractionOptions& GetFrameOptions() const { 74 | return opts_.frame_opts; 75 | } 76 | 77 | int32 Dim() const { 78 | if (opts_.return_raw_fft) { 79 | return opts_.frame_opts.PaddedWindowSize(); 80 | } else { 81 | return opts_.frame_opts.PaddedWindowSize() / 2 + 1; 82 | } 83 | } 84 | 85 | bool NeedRawLogEnergy() const { return opts_.raw_energy; } 86 | 87 | 88 | /** 89 | Function that computes one frame of spectrogram features from 90 | one frame of signal. 91 | 92 | @param [in] signal_raw_log_energy The log-energy of the frame of the signal 93 | prior to windowing and pre-emphasis, or 94 | log(numeric_limits::min()), whichever is greater. Must be 95 | ignored by this function if this class returns false from 96 | this->NeedsRawLogEnergy(). 97 | @param [in] vtln_warp This is ignored by this function, it's only 98 | needed for interface compatibility. 99 | @param [in] signal_frame One frame of the signal, 100 | as extracted using the function ExtractWindow() using the options 101 | returned by this->GetFrameOptions(). The function will use the 102 | vector as a workspace, which is why it's a non-const pointer. 103 | @param [out] feature Pointer to a vector of size this->Dim(), to which 104 | the computed feature will be written. 105 | */ 106 | void Compute(BaseFloat signal_raw_log_energy, 107 | BaseFloat vtln_warp, 108 | VectorBase *signal_frame, 109 | VectorBase *feature); 110 | 111 | ~SpectrogramComputer(); 112 | 113 | private: 114 | SpectrogramOptions opts_; 115 | BaseFloat log_energy_floor_; 116 | SplitRadixRealFft *srfft_; 117 | 118 | // Disallow assignment. 119 | SpectrogramComputer &operator=(const SpectrogramComputer &other); 120 | }; 121 | 122 | typedef OfflineFeatureTpl Spectrogram; 123 | 124 | 125 | /// @} End of "addtogroup feat" 126 | } // namespace kaldi 127 | 128 | 129 | #endif // KALDI_FEAT_FEATURE_SPECTROGRAM_H_ 130 | -------------------------------------------------------------------------------- /src/feat/signal.cc: -------------------------------------------------------------------------------- 1 | // feat/signal.cc 2 | 3 | // Copyright 2015 Tom Ko 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #include "base/kaldi-common.h" 21 | #include "util/common-utils.h" 22 | #include "feat/signal.h" 23 | 24 | namespace kaldi { 25 | 26 | void ElementwiseProductOfFft(const Vector &a, Vector *b) { 27 | int32 num_fft_bins = a.Dim() / 2; 28 | for (int32 i = 0; i < num_fft_bins; i++) { 29 | // do complex multiplication 30 | ComplexMul(a(2*i), a(2*i + 1), &((*b)(2*i)), &((*b)(2*i + 1))); 31 | } 32 | } 33 | 34 | void ConvolveSignals(const Vector &filter, Vector *signal) { 35 | int32 signal_length = signal->Dim(); 36 | int32 filter_length = filter.Dim(); 37 | int32 output_length = signal_length + filter_length - 1; 38 | Vector signal_padded(output_length); 39 | signal_padded.SetZero(); 40 | for (int32 i = 0; i < signal_length; i++) { 41 | for (int32 j = 0; j < filter_length; j++) { 42 | signal_padded(i + j) += (*signal)(i) * filter(j); 43 | } 44 | } 45 | signal->Resize(output_length); 46 | signal->CopyFromVec(signal_padded); 47 | } 48 | 49 | 50 | void FFTbasedConvolveSignals(const Vector &filter, Vector *signal) { 51 | int32 signal_length = signal->Dim(); 52 | int32 filter_length = filter.Dim(); 53 | int32 output_length = signal_length + filter_length - 1; 54 | 55 | int32 fft_length = RoundUpToNearestPowerOfTwo(output_length); 56 | KALDI_VLOG(1) << "fft_length for full signal convolution is " << fft_length; 57 | 58 | SplitRadixRealFft srfft(fft_length); 59 | 60 | Vector filter_padded(fft_length); 61 | filter_padded.Range(0, filter_length).CopyFromVec(filter); 62 | srfft.Compute(filter_padded.Data(), true); 63 | 64 | Vector signal_padded(fft_length); 65 | signal_padded.Range(0, signal_length).CopyFromVec(*signal); 66 | srfft.Compute(signal_padded.Data(), true); 67 | 68 | ElementwiseProductOfFft(filter_padded, &signal_padded); 69 | 70 | srfft.Compute(signal_padded.Data(), false); 71 | signal_padded.Scale(1.0 / fft_length); 72 | 73 | signal->Resize(output_length); 74 | signal->CopyFromVec(signal_padded.Range(0, output_length)); 75 | } 76 | 77 | void FFTbasedBlockConvolveSignals(const Vector &filter, Vector *signal) { 78 | int32 signal_length = signal->Dim(); 79 | int32 filter_length = filter.Dim(); 80 | int32 output_length = signal_length + filter_length - 1; 81 | signal->Resize(output_length, kCopyData); 82 | 83 | KALDI_VLOG(1) << "Length of the filter is " << filter_length; 84 | 85 | int32 fft_length = RoundUpToNearestPowerOfTwo(4 * filter_length); 86 | KALDI_VLOG(1) << "Best FFT length is " << fft_length; 87 | 88 | int32 block_length = fft_length - filter_length + 1; 89 | KALDI_VLOG(1) << "Block size is " << block_length; 90 | SplitRadixRealFft srfft(fft_length); 91 | 92 | Vector filter_padded(fft_length); 93 | filter_padded.Range(0, filter_length).CopyFromVec(filter); 94 | srfft.Compute(filter_padded.Data(), true); 95 | 96 | Vector temp_pad(filter_length - 1); 97 | temp_pad.SetZero(); 98 | Vector signal_block_padded(fft_length); 99 | 100 | for (int32 po = 0; po < output_length; po += block_length) { 101 | // get a block of the signal 102 | int32 process_length = std::min(block_length, output_length - po); 103 | signal_block_padded.SetZero(); 104 | signal_block_padded.Range(0, process_length).CopyFromVec(signal->Range(po, process_length)); 105 | 106 | srfft.Compute(signal_block_padded.Data(), true); 107 | 108 | ElementwiseProductOfFft(filter_padded, &signal_block_padded); 109 | 110 | srfft.Compute(signal_block_padded.Data(), false); 111 | signal_block_padded.Scale(1.0 / fft_length); 112 | 113 | // combine the block 114 | if (po + block_length < output_length) { // current block is not the last block 115 | signal->Range(po, block_length).CopyFromVec(signal_block_padded.Range(0, block_length)); 116 | signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad); 117 | temp_pad.CopyFromVec(signal_block_padded.Range(block_length, filter_length - 1)); 118 | } else { 119 | signal->Range(po, output_length - po).CopyFromVec( 120 | signal_block_padded.Range(0, output_length - po)); 121 | if (filter_length - 1 < output_length - po) 122 | signal->Range(po, filter_length - 1).AddVec(1.0, temp_pad); 123 | else 124 | signal->Range(po, output_length - po).AddVec(1.0, temp_pad.Range(0, output_length - po)); 125 | } 126 | } 127 | } 128 | } 129 | 130 | -------------------------------------------------------------------------------- /src/feat/signal.h: -------------------------------------------------------------------------------- 1 | // feat/signal.h 2 | 3 | // Copyright 2015 Tom Ko 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_FEAT_SIGNAL_H_ 21 | #define KALDI_FEAT_SIGNAL_H_ 22 | 23 | #include "base/kaldi-common.h" 24 | #include "util/common-utils.h" 25 | 26 | namespace kaldi { 27 | 28 | /* 29 | The following three functions are having the same functionality but 30 | different implementations so as the efficiency. After the convolution, 31 | the length of the signal will be extended to (original signal length + 32 | filter length - 1). 33 | */ 34 | 35 | /* 36 | This function implements a simple non-FFT-based convolution of two signals. 37 | It is suggested to use the FFT-based convolution function which is more 38 | efficient. 39 | */ 40 | void ConvolveSignals(const Vector &filter, Vector *signal); 41 | 42 | /* 43 | This function implements FFT-based convolution of two signals. 44 | However this should be an inefficient version of BlockConvolveSignals() 45 | as it processes the entire signal with a single FFT. 46 | */ 47 | void FFTbasedConvolveSignals(const Vector &filter, Vector *signal); 48 | 49 | /* 50 | This function implements FFT-based block convolution of two signals using 51 | overlap-add method. This is an efficient way to evaluate the discrete 52 | convolution of a long signal with a finite impulse response filter. 53 | */ 54 | void FFTbasedBlockConvolveSignals(const Vector &filter, Vector *signal); 55 | 56 | } // namespace kaldi 57 | 58 | #endif // KALDI_FEAT_SIGNAL_H_ 59 | -------------------------------------------------------------------------------- /src/itf/clusterable-itf.h: -------------------------------------------------------------------------------- 1 | // itf/clusterable-itf.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | #ifndef KALDI_ITF_CLUSTERABLE_ITF_H_ 22 | #define KALDI_ITF_CLUSTERABLE_ITF_H_ 1 23 | 24 | #include 25 | #include "base/kaldi-common.h" 26 | 27 | namespace kaldi { 28 | 29 | 30 | /** \addtogroup clustering_group 31 | @{ 32 | A virtual class for clusterable objects; see \ref clustering for an 33 | explanation if its function. 34 | */ 35 | 36 | 37 | 38 | class Clusterable { 39 | public: 40 | /// \name Functions that must be overridden 41 | /// @{ 42 | 43 | /// Return a copy of this object. 44 | virtual Clusterable *Copy() const = 0; 45 | /// Return the objective function associated with the stats 46 | /// [assuming ML estimation] 47 | virtual BaseFloat Objf() const = 0; 48 | /// Return the normalizer (typically, count) associated with the stats 49 | virtual BaseFloat Normalizer() const = 0; 50 | /// Set stats to empty. 51 | virtual void SetZero() = 0; 52 | /// Add other stats. 53 | virtual void Add(const Clusterable &other) = 0; 54 | /// Subtract other stats. 55 | virtual void Sub(const Clusterable &other) = 0; 56 | /// Scale the stats by a positive number f [not mandatory to supply this]. 57 | virtual void Scale(BaseFloat f) { 58 | KALDI_ERR << "This Clusterable object does not implement Scale()."; 59 | } 60 | 61 | /// Return a string that describes the inherited type. 62 | virtual std::string Type() const = 0; 63 | 64 | /// Write data to stream. 65 | virtual void Write(std::ostream &os, bool binary) const = 0; 66 | 67 | /// Read data from a stream and return the corresponding object (const 68 | /// function; it's a class member because we need access to the vtable 69 | /// so generic code can read derived types). 70 | virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0; 71 | 72 | virtual ~Clusterable() {} 73 | 74 | /// @} 75 | 76 | /// \name Functions that have default implementations 77 | /// @{ 78 | 79 | // These functions have default implementations (but may be overridden for 80 | // speed). Implementatons in tree/clusterable-classes.cc 81 | 82 | /// Return the objective function of the combined object this + other. 83 | virtual BaseFloat ObjfPlus(const Clusterable &other) const; 84 | /// Return the objective function of the subtracted object this - other. 85 | virtual BaseFloat ObjfMinus(const Clusterable &other) const; 86 | /// Return the objective function decrease from merging the two 87 | /// clusters, negated to be a positive number (or zero). 88 | virtual BaseFloat Distance(const Clusterable &other) const; 89 | /// @} 90 | 91 | }; 92 | /// @} end of "ingroup clustering_group" 93 | 94 | } // end namespace kaldi 95 | 96 | #endif // KALDI_ITF_CLUSTERABLE_ITF_H_ 97 | 98 | -------------------------------------------------------------------------------- /src/itf/context-dep-itf.h: -------------------------------------------------------------------------------- 1 | // itf/context-dep-itf.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | #ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_ 22 | #define KALDI_ITF_CONTEXT_DEP_ITF_H_ 23 | #include "base/kaldi-common.h" 24 | 25 | namespace kaldi { 26 | /// @ingroup tree_group 27 | /// @{ 28 | 29 | /// context-dep-itf.h provides a link between 30 | /// the tree-building code in ../tree/, and the FST code in ../fstext/ 31 | /// (particularly, ../fstext/context-dep.h). It is an abstract 32 | /// interface that describes an object that can map from a 33 | /// phone-in-context to a sequence of integer leaf-ids. 34 | class ContextDependencyInterface { 35 | public: 36 | /// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones 37 | /// are considered for computing context. 38 | virtual int ContextWidth() const = 0; 39 | 40 | /// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical 41 | /// triphone system. We have to see if we can do without this function. 42 | virtual int CentralPosition() const = 0; 43 | 44 | /// The "new" Compute interface. For typical topologies, 45 | /// pdf_class would be 0, 1, 2. 46 | /// Returns success or failure; outputs the pdf-id. 47 | /// 48 | /// "Compute" is the main function of this interface, that takes a 49 | /// sequence of N phones (and it must be N phones), possibly 50 | /// including epsilons (symbol id zero) but only at positions other 51 | /// than P [these represent unknown phone context due to end or 52 | /// begin of sequence]. We do not insist that Compute must always 53 | /// output (into stateseq) a nonempty sequence of states, but we 54 | /// anticipate that stateseq will always be nonempty at output in 55 | /// typical use cases. "Compute" returns false if expansion somehow 56 | /// failed. Normally the calling code should raise an exception if 57 | /// this happens. We can define a different interface later in 58 | /// order to handle other kinds of information-- the underlying 59 | /// data-structures from event-map.h are very flexible. 60 | virtual bool Compute(const std::vector &phoneseq, int32 pdf_class, 61 | int32 *pdf_id) const = 0; 62 | 63 | /// GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which 64 | /// pairs of (phone, pdf-class) it can correspond to. (Usually just one). 65 | /// c.f. hmm/hmm-topology.h for meaning of pdf-class. 66 | /// This is the old, simpler interface of GetPdfInfo(), and that this one can 67 | /// only be called if the HmmTopology object's IsHmm() function call returns 68 | /// true. 69 | virtual void GetPdfInfo( 70 | const std::vector &phones, // list of phones 71 | const std::vector &num_pdf_classes, // indexed by phone, 72 | std::vector > > *pdf_info) 73 | const = 0; 74 | 75 | /// This function outputs information about what possible pdf-ids can 76 | /// be generated for HMM-states; it covers the general case where 77 | /// the self-loop pdf-class may be different from the forward-transition 78 | /// pdf-class, so we are asking not about the set of possible pdf-ids 79 | /// for a given (phone, pdf-class), but the set of possible ordered pairs 80 | /// (forward-transition-pdf, self-loop-pdf) for a given (phone, 81 | /// forward-transition-pdf-class, self-loop-pdf-class). 82 | /// Note: 'phones' is a list of integer ids of phones, and 83 | /// 'pdf-class-pairs', indexed by phone, is a list of pairs 84 | /// (forward-transition-pdf-class, self-loop-pdf-class) that we can have for 85 | /// that phone. 86 | /// The output 'pdf_info' is indexed first by phone and then by the 87 | /// same index that indexes each element of 'pdf_class_pairs', 88 | /// and tells us for each pair in 'pdf_class_pairs', what is the 89 | /// list of possible (forward-transition-pdf-id, self-loop-pdf-id) that 90 | /// we can have. 91 | /// This is less efficient than the other version of GetPdfInfo(). 92 | virtual void GetPdfInfo( 93 | const std::vector &phones, 94 | const std::vector > > &pdf_class_pairs, 95 | std::vector > > > *pdf_info) 96 | const = 0; 97 | 98 | 99 | /// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1). 100 | virtual int32 NumPdfs() const = 0; 101 | 102 | virtual ~ContextDependencyInterface() {}; 103 | ContextDependencyInterface() {} 104 | 105 | /// Returns pointer to new object which is copy of current one. 106 | virtual ContextDependencyInterface *Copy() const = 0; 107 | private: 108 | KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface); 109 | }; 110 | /// @} 111 | } // namespace Kaldi 112 | 113 | 114 | #endif 115 | -------------------------------------------------------------------------------- /src/itf/optimizable-itf.h: -------------------------------------------------------------------------------- 1 | // itf/optimizable-itf.h 2 | 3 | // Copyright 2009-2011 Go Vivace Inc.; Microsoft Corporation; Georg Stemmer 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_ 20 | #define KALDI_ITF_OPTIMIZABLE_ITF_H_ 21 | 22 | #include "base/kaldi-common.h" 23 | #include "matrix/matrix-lib.h" 24 | 25 | namespace kaldi { 26 | /// @ingroup Interfaces 27 | /// @{ 28 | 29 | /// OptimizableInterface provides 30 | /// a virtual class for optimizable objects. 31 | /// E.g. a class that computed a likelihood function and 32 | /// its gradient using some parameter 33 | /// that has to be optimized on data 34 | /// could inherit from it. 35 | template 36 | class OptimizableInterface { 37 | public: 38 | /// computes gradient for a parameter params and returns it 39 | /// in gradient_out 40 | virtual void ComputeGradient(const Vector ¶ms, 41 | Vector *gradient_out) = 0; 42 | /// computes the function value for a parameter params 43 | /// and returns it 44 | virtual Real ComputeValue(const Vector ¶ms) = 0; 45 | 46 | virtual ~OptimizableInterface() {} 47 | }; 48 | /// @} end of "Interfaces" 49 | } // end namespace kaldi 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/itf/options-itf.h: -------------------------------------------------------------------------------- 1 | // itf/options-itf.h 2 | 3 | // Copyright 2013 Tanel Alumae, Tallinn University of Technology 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_ITF_OPTIONS_ITF_H_ 21 | #define KALDI_ITF_OPTIONS_ITF_H_ 1 22 | #include "base/kaldi-common.h" 23 | 24 | namespace kaldi { 25 | 26 | class OptionsItf { 27 | public: 28 | 29 | virtual void Register(const std::string &name, 30 | bool *ptr, const std::string &doc) = 0; 31 | virtual void Register(const std::string &name, 32 | int32 *ptr, const std::string &doc) = 0; 33 | virtual void Register(const std::string &name, 34 | uint32 *ptr, const std::string &doc) = 0; 35 | virtual void Register(const std::string &name, 36 | float *ptr, const std::string &doc) = 0; 37 | virtual void Register(const std::string &name, 38 | double *ptr, const std::string &doc) = 0; 39 | virtual void Register(const std::string &name, 40 | std::string *ptr, const std::string &doc) = 0; 41 | 42 | virtual ~OptionsItf() {} 43 | }; 44 | 45 | } // namespace Kaldi 46 | 47 | #endif // KALDI_ITF_OPTIONS_ITF_H_ 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/itf/transition-information.h: -------------------------------------------------------------------------------- 1 | // itf/transition-information.h 2 | 3 | // Copyright 2021 NVIDIA (author: Daniel Galvez) 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_ITF_TRANSITION_INFORMATION_H_ 21 | #define KALDI_ITF_TRANSITION_INFORMATION_H_ 22 | 23 | #include 24 | #include 25 | 26 | namespace kaldi { 27 | 28 | /** 29 | * Class that abstracts out TransitionModel's methods originally used 30 | * in the lat/ directory. By instantiating a subclass of this abstract 31 | * class other than TransitionModel, you can use kaldi's lattice tools 32 | * without a dependency on the hmm/ and tree/ directories. For 33 | * example, you can consider creating a subclass that implements these 34 | * via extracting information from Eesen's CTC T.fst object. 35 | * 36 | * TransitionId values must be contiguous, and starting from 1, rather 37 | * than 0, since 0 corresponds to epsilon in OpenFST. 38 | */ 39 | class TransitionInformation { 40 | public: 41 | virtual ~TransitionInformation() {}; 42 | /** 43 | * Returns true if trans_id1 and trans_id2 can correspond to the 44 | * same phone when trans_id1 immediately precedes trans_id2 (i.e., 45 | * trans_id1 occurss at timestep t, and trans_id2 ocurs at timestep 46 | * 2) (possibly with epsilons between trans_id1 and trans_id2) OR 47 | * trans_id1 ocurs before trans_id2, with some number of 48 | * trans_id_{k} values, all of which fulfill 49 | * TransitionIdsEquivalent(trans_id1, trans_id_{k}) 50 | * 51 | * If trans_id1 == trans_id2, it must be the case that 52 | * TransitionIdsEquivalent(trans_id1, trans_id2) == true 53 | */ 54 | virtual bool TransitionIdsEquivalent(int32_t trans_id1, int32_t trans_id2) const = 0; 55 | /** 56 | * Returns true if this trans_id corresponds to the start of a 57 | * phone. 58 | */ 59 | virtual bool TransitionIdIsStartOfPhone(int32_t trans_id) const = 0; 60 | /** 61 | * Phone is a historical term, and should be understood in a wider 62 | * sense that also includes graphemes, word pieces, etc.: any 63 | * minimal entity in your problem domain which is represented by a 64 | * sequence of transitions with a PDF assigned to each of them by 65 | * the model. In this sense, Token is a better word. Since 66 | * TransitionInformation was added to subsume TransitionModel, we 67 | * did not want to change the call site of every 68 | * TransitionModel::TransitionIdToPhone to 69 | * TransitionInformation::TransitionIdToToken. 70 | */ 71 | virtual int32_t TransitionIdToPhone(int32_t trans_id) const = 0; 72 | /** 73 | * Returns true if the destination of any edge with this trans_id 74 | * as its ilabel is a final state (or if a final state is 75 | * epsilon-reachable from its destination state). 76 | */ 77 | virtual bool IsFinal(int32_t trans_id) const = 0; 78 | /** 79 | * Returns true if *all* of the FST edge labeled by this trans_id 80 | * have the same start and end states. 81 | */ 82 | virtual bool IsSelfLoop(int32_t trans_id) const = 0; 83 | int32_t TransitionIdToPdf(int32_t trans_id) const { 84 | return TransitionIdToPdfArray()[trans_id]; 85 | } 86 | /** 87 | * Returns the contiguous array that backs calls to 88 | * TransitionIdToPdf(). 89 | * 90 | * Ideally, this would return a std::span, but it doesn't because 91 | * kaldi doesn't support C++20 at the time this interface was 92 | * written. 93 | */ 94 | virtual const std::vector& TransitionIdToPdfArray() const = 0; 95 | int32_t NumTransitionIds() const { 96 | return TransitionIdToPdfArray().size() - 1; 97 | } 98 | /** 99 | * Return the number of distinct outputs from 100 | * TransitionIdToPdf(). Another way to look at this is as the number 101 | * of outputs over which your acoustic model does a softmax. 102 | */ 103 | virtual int32_t NumPdfs() const = 0; 104 | }; 105 | 106 | } // namespace kaldi 107 | 108 | #endif // KALDI_TRANSITION_INFORMATION_H_ 109 | -------------------------------------------------------------------------------- /src/matrix/kaldi-matrix-inl.h: -------------------------------------------------------------------------------- 1 | // matrix/kaldi-matrix-inl.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Haihua Xu 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_ 21 | #define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1 22 | 23 | #include "matrix/kaldi-vector.h" 24 | 25 | namespace kaldi { 26 | 27 | /// Empty constructor 28 | template 29 | Matrix::Matrix(): MatrixBase(NULL, 0, 0, 0) { } 30 | 31 | 32 | template<> 33 | template<> 34 | void MatrixBase::AddVecVec(const float alpha, const VectorBase &ra, const VectorBase &rb); 35 | 36 | template<> 37 | template<> 38 | void MatrixBase::AddVecVec(const double alpha, const VectorBase &ra, const VectorBase &rb); 39 | 40 | template 41 | inline std::ostream & operator << (std::ostream & os, const MatrixBase & M) { 42 | M.Write(os, false); 43 | return os; 44 | } 45 | 46 | template 47 | inline std::istream & operator >> (std::istream & is, Matrix & M) { 48 | M.Read(is, false); 49 | return is; 50 | } 51 | 52 | 53 | template 54 | inline std::istream & operator >> (std::istream & is, MatrixBase & M) { 55 | M.Read(is, false); 56 | return is; 57 | } 58 | 59 | }// namespace kaldi 60 | 61 | 62 | #endif // KALDI_MATRIX_KALDI_MATRIX_INL_H_ 63 | 64 | -------------------------------------------------------------------------------- /src/matrix/kaldi-vector-inl.h: -------------------------------------------------------------------------------- 1 | // matrix/kaldi-vector-inl.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; 4 | // Haihua Xu 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | // This is an internal header file, included by other library headers. 22 | // You should not attempt to use it directly. 23 | 24 | #ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_ 25 | #define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1 26 | 27 | namespace kaldi { 28 | 29 | template 30 | std::ostream & operator << (std::ostream &os, const VectorBase &rv) { 31 | rv.Write(os, false); 32 | return os; 33 | } 34 | 35 | template 36 | std::istream &operator >> (std::istream &is, VectorBase &rv) { 37 | rv.Read(is, false); 38 | return is; 39 | } 40 | 41 | template 42 | std::istream &operator >> (std::istream &is, Vector &rv) { 43 | rv.Read(is, false); 44 | return is; 45 | } 46 | 47 | template<> 48 | template<> 49 | void VectorBase::AddVec(const float alpha, const VectorBase &rv); 50 | 51 | template<> 52 | template<> 53 | void VectorBase::AddVec(const double alpha, 54 | const VectorBase &rv); 55 | 56 | } // namespace kaldi 57 | 58 | #endif // KALDI_MATRIX_KALDI_VECTOR_INL_H_ 59 | -------------------------------------------------------------------------------- /src/matrix/matrix-common.h: -------------------------------------------------------------------------------- 1 | // matrix/matrix-common.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_MATRIX_MATRIX_COMMON_H_ 20 | #define KALDI_MATRIX_MATRIX_COMMON_H_ 21 | 22 | // This file contains some #includes, forward declarations 23 | // and typedefs that are needed by all the main header 24 | // files in this directory. 25 | 26 | #include "base/kaldi-common.h" 27 | 28 | namespace kaldi { 29 | // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library 30 | // we are writing them as literals because we don't want to include here matrix/kaldi-blas.h, 31 | // which puts many symbols into global scope (like "real") via the header f2c.h 32 | typedef enum { 33 | kTrans = 112, // = CblasTrans 34 | kNoTrans = 111 // = CblasNoTrans 35 | } MatrixTransposeType; 36 | 37 | typedef enum { 38 | kSetZero, 39 | kUndefined, 40 | kCopyData 41 | } MatrixResizeType; 42 | 43 | 44 | typedef enum { 45 | kDefaultStride, 46 | kStrideEqualNumCols, 47 | } MatrixStrideType; 48 | 49 | typedef enum { 50 | kTakeLower, 51 | kTakeUpper, 52 | kTakeMean, 53 | kTakeMeanAndCheck 54 | } SpCopyType; 55 | 56 | template class VectorBase; 57 | template class Vector; 58 | template class SubVector; 59 | template class MatrixBase; 60 | template class SubMatrix; 61 | template class Matrix; 62 | template class SpMatrix; 63 | template class TpMatrix; 64 | template class PackedMatrix; 65 | template class SparseMatrix; 66 | 67 | // these are classes that won't be defined in this 68 | // directory; they're mostly needed for friend declarations. 69 | template class CuMatrixBase; 70 | template class CuSubMatrix; 71 | template class CuMatrix; 72 | template class CuVectorBase; 73 | template class CuSubVector; 74 | template class CuVector; 75 | template class CuPackedMatrix; 76 | template class CuSpMatrix; 77 | template class CuTpMatrix; 78 | template class CuSparseMatrix; 79 | 80 | class CompressedMatrix; 81 | class GeneralMatrix; 82 | 83 | /// This class provides a way for switching between double and float types. 84 | template class OtherReal { }; // useful in reading+writing routines 85 | // to switch double and float. 86 | /// A specialized class for switching from float to double. 87 | template<> class OtherReal { 88 | public: 89 | typedef double Real; 90 | }; 91 | /// A specialized class for switching from double to float. 92 | template<> class OtherReal { 93 | public: 94 | typedef float Real; 95 | }; 96 | 97 | 98 | typedef int32 MatrixIndexT; 99 | typedef int32 SignedMatrixIndexT; 100 | typedef uint32 UnsignedMatrixIndexT; 101 | 102 | // If you want to use size_t for the index type, do as follows instead: 103 | //typedef size_t MatrixIndexT; 104 | //typedef ssize_t SignedMatrixIndexT; 105 | //typedef size_t UnsignedMatrixIndexT; 106 | 107 | } 108 | 109 | 110 | 111 | #endif // KALDI_MATRIX_MATRIX_COMMON_H_ 112 | -------------------------------------------------------------------------------- /src/matrix/matrix-functions-inl.h: -------------------------------------------------------------------------------- 1 | // matrix/matrix-functions-inl.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | // 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | // (*) incorporates, with permission, FFT code from his book 21 | // "Signal Processing with Lapped Transforms", Artech, 1992. 22 | 23 | 24 | 25 | #ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ 26 | #define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ 27 | 28 | namespace kaldi { 29 | 30 | //! ComplexMul implements, inline, the complex multiplication b *= a. 31 | template inline void ComplexMul(const Real &a_re, const Real &a_im, 32 | Real *b_re, Real *b_im) { 33 | Real tmp_re = (*b_re * a_re) - (*b_im * a_im); 34 | *b_im = *b_re * a_im + *b_im * a_re; 35 | *b_re = tmp_re; 36 | } 37 | 38 | template inline void ComplexAddProduct(const Real &a_re, const Real &a_im, 39 | const Real &b_re, const Real &b_im, 40 | Real *c_re, Real *c_im) { 41 | *c_re += b_re*a_re - b_im*a_im; 42 | *c_im += b_re*a_im + b_im*a_re; 43 | } 44 | 45 | 46 | template inline void ComplexImExp(Real x, Real *a_re, Real *a_im) { 47 | *a_re = std::cos(x); 48 | *a_im = std::sin(x); 49 | } 50 | 51 | 52 | } // end namespace kaldi 53 | 54 | 55 | #endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_ 56 | 57 | -------------------------------------------------------------------------------- /src/matrix/matrix-lib.h: -------------------------------------------------------------------------------- 1 | // matrix/matrix-lib.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | // Include everything from this directory. 21 | // These files include other stuff that we need. 22 | #ifndef KALDI_MATRIX_MATRIX_LIB_H_ 23 | #define KALDI_MATRIX_MATRIX_LIB_H_ 24 | 25 | #include "base/kaldi-common.h" 26 | #include "matrix/kaldi-vector.h" 27 | #include "matrix/kaldi-matrix.h" 28 | #include "matrix/sp-matrix.h" 29 | #include "matrix/tp-matrix.h" 30 | #include "matrix/matrix-functions.h" 31 | #include "matrix/srfft.h" 32 | #include "matrix/compressed-matrix.h" 33 | #include "matrix/sparse-matrix.h" 34 | #include "matrix/optimization.h" 35 | #include "matrix/numpy-array.h" 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /src/matrix/numpy-array.h: -------------------------------------------------------------------------------- 1 | // matrix/numpy-array.h 2 | 3 | // Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang) 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_MATRIX_NUMPY_ARRAY_H_ 21 | #define KALDI_MATRIX_NUMPY_ARRAY_H_ 1 22 | 23 | #include 24 | #include 25 | 26 | #include "matrix/kaldi-matrix.h" 27 | #include "matrix/kaldi-vector.h" 28 | 29 | namespace kaldi { 30 | 31 | /// \addtogroup matrix_group 32 | /// @{ 33 | 34 | /** NumpyArray for reading *.npy files. 35 | * 36 | * This class implements the format described at 37 | * https://github.com/numpy/numpy/blob/master/numpy/lib/format.py 38 | */ 39 | template 40 | class NumpyArray { 41 | public: 42 | NumpyArray() = default; 43 | NumpyArray(const NumpyArray&) = delete; 44 | NumpyArray& operator=(const NumpyArray&) = delete; 45 | 46 | ~NumpyArray() { delete[] data_; } 47 | 48 | void Read(std::istream& in, bool binary); 49 | 50 | void Write(std::ostream& out, bool binary) const; 51 | 52 | int NumElements() const { return num_elements_; } 53 | const std::vector& Shape() const { return shape_; } 54 | 55 | const Real* Data() const { return data_; } 56 | Real* Data() { return data_; } 57 | 58 | Real* begin() { return data_; } 59 | Real* end() { return data_ + num_elements_; } 60 | 61 | const Real* begin() const { return data_; } 62 | const Real* end() const { return data_ + num_elements_; } 63 | 64 | NumpyArray(const MatrixBase& m); 65 | NumpyArray(const VectorBase& v); 66 | operator SubVector(); 67 | operator SubMatrix(); 68 | 69 | Real operator[](int i) const { return data_[i]; } 70 | Real& operator[](int i) { return data_[i]; } 71 | 72 | private: 73 | // for version 1.0 74 | static uint32_t ReadHeaderLen10(std::istream& in); 75 | 76 | // for version 2.0 and 3.0 77 | static uint32_t ReadHeaderLen20And30(std::istream& in); 78 | 79 | // return true if the data is saved in little endian 80 | // return false if the data is saved in big endian 81 | bool ParseHeader(const std::string& header); 82 | 83 | private: 84 | std::vector shape_; 85 | Real* data_ = nullptr; 86 | uint32_t num_elements_ = 0; 87 | }; 88 | 89 | /// @} end of \addtogroup matrix_group 90 | 91 | } // namespace kaldi 92 | 93 | #endif // KALDI_MATRIX_NUMPY_ARRAY_H_ 94 | -------------------------------------------------------------------------------- /src/matrix/sp-matrix-inl.h: -------------------------------------------------------------------------------- 1 | // matrix/sp-matrix-inl.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Haihua Xu 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_MATRIX_SP_MATRIX_INL_H_ 21 | #define KALDI_MATRIX_SP_MATRIX_INL_H_ 22 | 23 | #include "matrix/tp-matrix.h" 24 | 25 | namespace kaldi { 26 | 27 | // All the lines in this file seem to be declaring template specializations. 28 | // These tell the compiler that we'll implement the templated function 29 | // separately for the different template arguments (float, double). 30 | 31 | template<> 32 | double SolveQuadraticProblem(const SpMatrix &H, const VectorBase &g, 33 | const SolverOptions &opts, VectorBase *x); 34 | 35 | template<> 36 | float SolveQuadraticProblem(const SpMatrix &H, const VectorBase &g, 37 | const SolverOptions &opts, VectorBase *x); 38 | 39 | } // namespace kaldi 40 | 41 | 42 | #endif // KALDI_MATRIX_SP_MATRIX_INL_H_ 43 | -------------------------------------------------------------------------------- /src/matrix/srfft.h: -------------------------------------------------------------------------------- 1 | // matrix/srfft.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Go Vivace Inc. 4 | // 2014 Daniel Povey 5 | // 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | // 21 | // This file includes a modified version of code originally published in Malvar, 22 | // H., "Signal processing with lapped transforms, " Artech House, Inc., 1992. The 23 | // current copyright holder of the original code, Henrique S. Malvar, has given 24 | // his permission for the release of this modified version under the Apache 25 | // License v2.0. 26 | 27 | #ifndef KALDI_MATRIX_SRFFT_H_ 28 | #define KALDI_MATRIX_SRFFT_H_ 29 | 30 | #include "matrix/kaldi-vector.h" 31 | #include "matrix/kaldi-matrix.h" 32 | 33 | namespace kaldi { 34 | 35 | /// @addtogroup matrix_funcs_misc 36 | /// @{ 37 | 38 | 39 | // This class is based on code by Henrique (Rico) Malvar, from his book 40 | // "Signal Processing with Lapped Transforms" (1992). Copied with 41 | // permission, optimized by Go Vivace Inc., and converted into C++ by 42 | // Microsoft Corporation 43 | // This is a more efficient way of doing the complex FFT than ComplexFft 44 | // (declared in matrix-functios.h), but it only works for powers of 2. 45 | // Note: in multi-threaded code, you would need to have one of these objects per 46 | // thread, because multiple calls to Compute in parallel would not work. 47 | template 48 | class SplitRadixComplexFft { 49 | public: 50 | typedef MatrixIndexT Integer; 51 | 52 | // N is the number of complex points (must be a power of two, or this 53 | // will crash). Note that the constructor does some work so it's best to 54 | // initialize the object once and do the computation many times. 55 | SplitRadixComplexFft(Integer N); 56 | 57 | // Copy constructor 58 | SplitRadixComplexFft(const SplitRadixComplexFft &other); 59 | 60 | // Does the FFT computation, given pointers to the real and 61 | // imaginary parts. If "forward", do the forward FFT; else 62 | // do the inverse FFT (without the 1/N factor). 63 | // xr and xi are pointers to zero-based arrays of size N, 64 | // containing the real and imaginary parts 65 | // respectively. 66 | void Compute(Real *xr, Real *xi, bool forward) const; 67 | 68 | // This version of Compute takes a single array of size N*2, 69 | // containing [ r0 im0 r1 im1 ... ]. Otherwise its behavior is the 70 | // same as the version above. 71 | void Compute(Real *x, bool forward); 72 | 73 | 74 | // This version of Compute is const; it operates on an array of size N*2 75 | // containing [ r0 im0 r1 im1 ... ], but it uses the argument "temp_buffer" as 76 | // temporary storage instead of a class-member variable. It will allocate it if 77 | // needed. 78 | void Compute(Real *x, bool forward, std::vector *temp_buffer) const; 79 | 80 | ~SplitRadixComplexFft(); 81 | 82 | protected: 83 | // temp_buffer_ is allocated only if someone calls Compute with only one Real* 84 | // argument and we need a temporary buffer while creating interleaved data. 85 | std::vector temp_buffer_; 86 | private: 87 | void ComputeTables(); 88 | void ComputeRecursive(Real *xr, Real *xi, Integer logn) const; 89 | void BitReversePermute(Real *x, Integer logn) const; 90 | 91 | Integer N_; 92 | Integer logn_; // log(N) 93 | 94 | Integer *brseed_; 95 | // brseed is Evans' seed table, ref: (Ref: D. M. W. 96 | // Evans, "An improved digit-reversal permutation algorithm ...", 97 | // IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125). 98 | Real **tab_; // Tables of butterfly coefficients. 99 | 100 | // Disallow assignment. 101 | SplitRadixComplexFft &operator =(const SplitRadixComplexFft &other); 102 | }; 103 | 104 | template 105 | class SplitRadixRealFft: private SplitRadixComplexFft { 106 | public: 107 | SplitRadixRealFft(MatrixIndexT N): // will fail unless N>=4 and N is a power of 2. 108 | SplitRadixComplexFft (N/2), N_(N) { } 109 | 110 | // Copy constructor 111 | SplitRadixRealFft(const SplitRadixRealFft &other): 112 | SplitRadixComplexFft(other), N_(other.N_) { } 113 | 114 | /// If forward == true, this function transforms from a sequence of N real points to its complex fourier 115 | /// transform; otherwise it goes in the reverse direction. If you call it 116 | /// in the forward and then reverse direction and multiply by 1.0/N, you 117 | /// will get back the original data. 118 | /// The interpretation of the complex-FFT data is as follows: the array 119 | /// is a sequence of complex numbers C_n of length N/2 with (real, im) format, 120 | /// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...]. 121 | void Compute(Real *x, bool forward); 122 | 123 | 124 | /// This is as the other Compute() function, but it is a const version that 125 | /// uses a user-supplied buffer. 126 | void Compute(Real *x, bool forward, std::vector *temp_buffer) const; 127 | 128 | private: 129 | // Disallow assignment. 130 | SplitRadixRealFft &operator =(const SplitRadixRealFft &other); 131 | int N_; 132 | }; 133 | 134 | 135 | /// @} end of "addtogroup matrix_funcs_misc" 136 | 137 | } // end namespace kaldi 138 | 139 | 140 | #endif 141 | 142 | -------------------------------------------------------------------------------- /src/matrix/tp-matrix.cc: -------------------------------------------------------------------------------- 1 | // matrix/tp-matrix.cc 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Lukas Burget; Microsoft Corporation 4 | // Saarland University; Yanmin Qian; Haihua Xu 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #include "matrix/tp-matrix.h" 22 | #include "matrix/sp-matrix.h" 23 | #include "matrix/kaldi-matrix.h" 24 | #include "matrix/cblas-wrappers.h" 25 | 26 | 27 | namespace kaldi { 28 | 29 | #ifndef HAVE_ATLAS 30 | template 31 | void TpMatrix::Invert() { 32 | // these are CLAPACK types 33 | KaldiBlasInt result; 34 | KaldiBlasInt rows = static_cast(this->num_rows_); 35 | 36 | // clapack call 37 | // NOTE: Even though "U" is for upper, lapack assumes column-wise storage 38 | // of the data. We have a row-wise storage, therefore, we need to "invert" 39 | clapack_Xtptri(&rows, this->data_, &result); 40 | 41 | if (result < 0) { 42 | KALDI_ERR << "Call to CLAPACK stptri_ function failed"; 43 | } else if (result > 0) { 44 | KALDI_ERR << "Matrix is singular"; 45 | } 46 | } 47 | #else 48 | template 49 | void TpMatrix::Invert() { 50 | // ATLAS doesn't implement triangular matrix inversion in packed 51 | // format, so we temporarily put in non-packed format. 52 | Matrix tmp(*this); 53 | int rows = static_cast(this->num_rows_); 54 | 55 | // ATLAS call. It's really row-major ordering and a lower triangular matrix, 56 | // but there is some weirdness with Fortran-style indexing that we need to 57 | // take account of, so everything gets swapped. 58 | int result = clapack_Xtrtri( rows, tmp.Data(), tmp.Stride()); 59 | // Let's hope ATLAS has the same return value conventions as clapack. 60 | // I couldn't find any documentation online. 61 | if (result < 0) { 62 | KALDI_ERR << "Call to ATLAS strtri function failed"; 63 | } else if (result > 0) { 64 | KALDI_ERR << "Matrix is singular"; 65 | } 66 | (*this).CopyFromMat(tmp); 67 | } 68 | #endif 69 | 70 | template 71 | Real TpMatrix::Determinant() { 72 | double det = 1.0; 73 | for (MatrixIndexT i = 0; iNumRows(); i++) { 74 | det *= (*this)(i, i); 75 | } 76 | return static_cast(det); 77 | } 78 | 79 | 80 | template 81 | void TpMatrix::Swap(TpMatrix *other) { 82 | std::swap(this->data_, other->data_); 83 | std::swap(this->num_rows_, other->num_rows_); 84 | } 85 | 86 | 87 | template 88 | void TpMatrix::Cholesky(const SpMatrix &orig) { 89 | KALDI_ASSERT(orig.NumRows() == this->NumRows()); 90 | MatrixIndexT n = this->NumRows(); 91 | this->SetZero(); 92 | Real *data = this->data_, *jdata = data; // start of j'th row of matrix. 93 | const Real *orig_jdata = orig.Data(); // start of j'th row of matrix. 94 | for (MatrixIndexT j = 0; j < n; j++, jdata += j, orig_jdata += j) { 95 | Real *kdata = data; // start of k'th row of matrix. 96 | Real d(0.0); 97 | for (MatrixIndexT k = 0; k < j; k++, kdata += k) { 98 | Real s = cblas_Xdot(k, kdata, 1, jdata, 1); 99 | // (*this)(j, k) = s = (orig(j, k) - s)/(*this)(k, k); 100 | jdata[k] = s = (orig_jdata[k] - s)/kdata[k]; 101 | d = d + s*s; 102 | } 103 | // d = orig(j, j) - d; 104 | d = orig_jdata[j] - d; 105 | 106 | if (d >= 0.0) { 107 | // (*this)(j, j) = std::sqrt(d); 108 | jdata[j] = std::sqrt(d); 109 | } else { 110 | KALDI_ERR << "Cholesky decomposition failed. Maybe matrix " 111 | "is not positive definite."; 112 | } 113 | } 114 | } 115 | 116 | template 117 | void TpMatrix::CopyFromMat(const MatrixBase &M, 118 | MatrixTransposeType Trans) { 119 | if (Trans == kNoTrans) { 120 | KALDI_ASSERT(this->NumRows() == M.NumRows() && M.NumRows() == M.NumCols()); 121 | MatrixIndexT D = this->NumRows(); 122 | const Real *in_i = M.Data(); 123 | MatrixIndexT stride = M.Stride(); 124 | Real *out_i = this->data_; 125 | for (MatrixIndexT i = 0; i < D; i++, in_i += stride, out_i += i) 126 | for (MatrixIndexT j = 0; j <= i; j++) 127 | out_i[j] = in_i[j]; 128 | } else { 129 | KALDI_ASSERT(this->NumRows() == M.NumRows() && M.NumRows() == M.NumCols()); 130 | MatrixIndexT D = this->NumRows(); 131 | const Real *in_i = M.Data(); 132 | MatrixIndexT stride = M.Stride(); 133 | Real *out_i = this->data_; 134 | for (MatrixIndexT i = 0; i < D; i++, in_i++, out_i += i) { 135 | for (MatrixIndexT j = 0; j <= i; j++) 136 | out_i[j] = in_i[stride*j]; 137 | } 138 | } 139 | } 140 | 141 | 142 | template class TpMatrix; 143 | template class TpMatrix; 144 | 145 | } // namespace kaldi 146 | -------------------------------------------------------------------------------- /src/matrix/tp-matrix.h: -------------------------------------------------------------------------------- 1 | // matrix/tp-matrix.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek; Lukas Burget; Microsoft Corporation; 4 | // Saarland University; Yanmin Qian; Haihua Xu 5 | // 2013 Johns Hopkins Universith (author: Daniel Povey) 6 | 7 | 8 | // See ../../COPYING for clarification regarding multiple authors 9 | // 10 | // Licensed under the Apache License, Version 2.0 (the "License"); 11 | // you may not use this file except in compliance with the License. 12 | // You may obtain a copy of the License at 13 | 14 | // http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 18 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 19 | // MERCHANTABLITY OR NON-INFRINGEMENT. 20 | // See the Apache 2 License for the specific language governing permissions and 21 | // limitations under the License. 22 | #ifndef KALDI_MATRIX_TP_MATRIX_H_ 23 | #define KALDI_MATRIX_TP_MATRIX_H_ 24 | 25 | 26 | #include "matrix/packed-matrix.h" 27 | 28 | namespace kaldi { 29 | /// \addtogroup matrix_group 30 | /// @{ 31 | 32 | template class TpMatrix; 33 | 34 | /// @brief Packed symetric matrix class 35 | template 36 | class TpMatrix : public PackedMatrix { 37 | friend class CuTpMatrix; 38 | friend class CuTpMatrix; 39 | public: 40 | TpMatrix() : PackedMatrix() {} 41 | explicit TpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero) 42 | : PackedMatrix(r, resize_type) {} 43 | TpMatrix(const TpMatrix& orig) : PackedMatrix(orig) {} 44 | 45 | /// Copy constructor from CUDA TpMatrix 46 | /// This is defined in ../cudamatrix/cu-tp-matrix.cc 47 | explicit TpMatrix(const CuTpMatrix &cu); 48 | 49 | 50 | template explicit TpMatrix(const TpMatrix& orig) 51 | : PackedMatrix(orig) {} 52 | 53 | Real operator() (MatrixIndexT r, MatrixIndexT c) const { 54 | if (static_cast(c) > 55 | static_cast(r)) { 56 | KALDI_ASSERT(static_cast(c) < 57 | static_cast(this->num_rows_)); 58 | return 0; 59 | } 60 | KALDI_ASSERT(static_cast(r) < 61 | static_cast(this->num_rows_)); 62 | // c<=r now so don't have to check c. 63 | return *(this->data_ + (r*(r+1)) / 2 + c); 64 | // Duplicating code from PackedMatrix.h 65 | } 66 | 67 | Real &operator() (MatrixIndexT r, MatrixIndexT c) { 68 | KALDI_ASSERT(static_cast(r) < 69 | static_cast(this->num_rows_)); 70 | KALDI_ASSERT(static_cast(c) <= 71 | static_cast(r) && 72 | "you cannot access the upper triangle of TpMatrix using " 73 | "a non-const matrix object."); 74 | return *(this->data_ + (r*(r+1)) / 2 + c); 75 | // Duplicating code from PackedMatrix.h 76 | } 77 | // Note: Cholesky may throw KaldiFatalError. 78 | void Cholesky(const SpMatrix& orig); 79 | 80 | void Invert(); 81 | 82 | // Inverts in double precision. 83 | void InvertDouble() { 84 | TpMatrix dmat(*this); 85 | dmat.Invert(); 86 | (*this).CopyFromTp(dmat); 87 | } 88 | 89 | /// Shallow swap 90 | void Swap(TpMatrix *other); 91 | 92 | /// Returns the determinant of the matrix (product of diagonals) 93 | Real Determinant(); 94 | 95 | /// CopyFromMat copies the lower triangle of M into *this 96 | /// (or the upper triangle, if Trans == kTrans). 97 | void CopyFromMat(const MatrixBase &M, 98 | MatrixTransposeType Trans = kNoTrans); 99 | 100 | /// This is implemented in ../cudamatrix/cu-tp-matrix.cc 101 | void CopyFromMat(const CuTpMatrix &other); 102 | 103 | /// CopyFromTp copies another triangular matrix into this one. 104 | void CopyFromTp(const TpMatrix &other) { 105 | PackedMatrix::CopyFromPacked(other); 106 | } 107 | 108 | template void CopyFromTp(const TpMatrix &other) { 109 | PackedMatrix::CopyFromPacked(other); 110 | } 111 | 112 | /// AddTp does *this += alpha * M. 113 | void AddTp(const Real alpha, const TpMatrix &M) { 114 | this->AddPacked(alpha, M); 115 | } 116 | 117 | TpMatrix& operator=(const TpMatrix &other) { 118 | PackedMatrix::operator=(other); 119 | return *this; 120 | } 121 | 122 | using PackedMatrix::Scale; 123 | 124 | void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) { 125 | PackedMatrix::Resize(nRows, resize_type); 126 | } 127 | }; 128 | 129 | /// @} end of "addtogroup matrix_group". 130 | 131 | } // namespace kaldi 132 | 133 | 134 | #endif 135 | -------------------------------------------------------------------------------- /src/pydrobert/kaldi/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Python access to kaldi""" 16 | 17 | __author__ = "Sean Robertson" 18 | __email__ = "sdrobert@cs.toronto.edu" 19 | __license__ = "Apache 2.0" 20 | __copyright__ = "Copyright 2021 Sean Robertson" 21 | 22 | __all__ = [ 23 | "eval", 24 | "feat", 25 | "io", 26 | "logging", 27 | ] 28 | 29 | try: 30 | from ._version import version as __version__ # type: ignore 31 | except ImportError: 32 | __version__ = "inplace" 33 | -------------------------------------------------------------------------------- /src/pydrobert/kaldi/command_line.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Command line entry points for pydrobert.kaldi 16 | 17 | This submodule simply collects the command line entry points from other 18 | submodules 19 | """ 20 | 21 | import pydrobert.kaldi.eval.command_line as _eval_command_line 22 | import pydrobert.kaldi.feat.command_line as _feat_command_line 23 | import pydrobert.kaldi.io.command_line as _io_command_line 24 | 25 | from pydrobert.kaldi.eval.command_line import * 26 | from pydrobert.kaldi.feat.command_line import * 27 | from pydrobert.kaldi.io.command_line import * 28 | 29 | 30 | __all__ = ( 31 | _eval_command_line.__all__ + _feat_command_line.__all__ + _io_command_line.__all__ 32 | ) 33 | -------------------------------------------------------------------------------- /src/pydrobert/kaldi/eval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tools related to evaluating models""" 16 | 17 | __all__ = ["util"] 18 | -------------------------------------------------------------------------------- /src/pydrobert/kaldi/eval/util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Utilities for evaluation""" 16 | 17 | from typing import Sequence, Tuple, Union 18 | import numpy as np 19 | 20 | __all__ = ["edit_distance"] 21 | 22 | 23 | def edit_distance( 24 | ref: Sequence, 25 | hyp: Sequence, 26 | insertion_cost: int = 1, 27 | deletion_cost: int = 1, 28 | substitution_cost: int = 1, 29 | return_tables: bool = False, 30 | ) -> Union[int, Tuple[int, dict, dict, dict, dict]]: 31 | """Levenshtein (edit) distance 32 | 33 | Parameters 34 | ---------- 35 | ref 36 | Sequence of tokens of reference text (source) 37 | hyp 38 | Sequence of tokens of hypothesis text (target) 39 | insertion_cost 40 | Penalty for `hyp` inserting a token to ref 41 | deletion_cost 42 | Penalty for `hyp` deleting a token from ref 43 | substitution_cost 44 | Penalty for `hyp` swapping tokens in ref 45 | return_tables 46 | See below 47 | 48 | Returns 49 | ------- 50 | distances: int or (int, dict, dict, dict, dict) 51 | Returns the edit distance of `hyp` from `ref`. If `return_tables` 52 | is `True`, this returns a tuple of the edit distance, a dict of 53 | insertion counts, a dict of deletion , a dict of substitution 54 | counts per ref token, and a dict of counts of ref tokens. Any 55 | tokens with count 0 are excluded from the dictionary. 56 | """ 57 | # we keep track of the whole dumb matrix in case we need to 58 | # backtrack (for `return_tables`). Should be okay for WER/PER, since 59 | # the number of tokens per vector will be on the order of tens 60 | distances = np.zeros((len(ref) + 1, len(hyp) + 1), dtype=int) 61 | distances[0, :] = tuple(insertion_cost * x for x in range(len(hyp) + 1)) 62 | distances[:, 0] = tuple(deletion_cost * x for x in range(len(ref) + 1)) 63 | for hyp_idx in range(1, len(hyp) + 1): 64 | hyp_token = hyp[hyp_idx - 1] 65 | for ref_idx in range(1, len(ref) + 1): 66 | ref_token = ref[ref_idx - 1] 67 | sub_cost = 0 if hyp_token == ref_token else substitution_cost 68 | distances[ref_idx, hyp_idx] = min( 69 | distances[ref_idx - 1, hyp_idx] + deletion_cost, 70 | distances[ref_idx, hyp_idx - 1] + insertion_cost, 71 | distances[ref_idx - 1, hyp_idx - 1] + sub_cost, 72 | ) 73 | if not return_tables: 74 | return distances[-1, -1] 75 | # backtrack to get a count of insertions, deletions, and subs 76 | # prefer insertions to deletions to substitutions 77 | inserts, deletes, subs, totals = dict(), dict(), dict(), dict() 78 | for token in ref: 79 | totals[token] = totals.get(token, 0) + 1 80 | ref_idx = len(ref) 81 | hyp_idx = len(hyp) 82 | while ref_idx or hyp_idx: 83 | if not ref_idx: 84 | hyp_idx -= 1 85 | inserts[hyp[hyp_idx]] = inserts.get(hyp[hyp_idx], 0) + 1 86 | elif not hyp_idx: 87 | ref_idx -= 1 88 | deletes[ref[ref_idx]] = deletes.get(ref[ref_idx], 0) + 1 89 | elif ref[ref_idx - 1] == hyp[hyp_idx - 1]: 90 | hyp_idx -= 1 91 | ref_idx -= 1 92 | elif ( 93 | distances[ref_idx, hyp_idx - 1] <= distances[ref_idx - 1, hyp_idx] 94 | and distances[ref_idx, hyp_idx - 1] <= distances[ref_idx - 1, hyp_idx - 1] 95 | ): 96 | hyp_idx -= 1 97 | inserts[hyp[hyp_idx]] = inserts.get(hyp[hyp_idx], 0) + 1 98 | elif distances[ref_idx - 1, hyp_idx] <= distances[ref_idx - 1, hyp_idx - 1]: 99 | ref_idx -= 1 100 | deletes[ref[ref_idx]] = deletes.get(ref[ref_idx], 0) + 1 101 | else: 102 | hyp_idx -= 1 103 | ref_idx -= 1 104 | subs[ref[ref_idx]] = subs.get(ref[ref_idx], 0) + 1 105 | return distances[-1, -1], inserts, deletes, subs, totals 106 | -------------------------------------------------------------------------------- /src/pydrobert/kaldi/feat/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Access to kaldi routines/types involving features""" 16 | 17 | __all__ = [] 18 | -------------------------------------------------------------------------------- /src/transform/cmvn.h: -------------------------------------------------------------------------------- 1 | // transform/cmvn.h 2 | 3 | // Copyright 2009-2013 Microsoft Corporation 4 | // Johns Hopkins University (author: Daniel Povey) 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | 22 | #ifndef KALDI_TRANSFORM_CMVN_H_ 23 | #define KALDI_TRANSFORM_CMVN_H_ 24 | 25 | #include "base/kaldi-common.h" 26 | #include "matrix/matrix-lib.h" 27 | 28 | namespace kaldi { 29 | 30 | /// This function initializes the matrix to dimension 2 by (dim+1); 31 | /// 1st "dim" elements of 1st row are mean stats, 1st "dim" elements 32 | /// of 2nd row are var stats, last element of 1st row is count, 33 | /// last element of 2nd row is zero. 34 | void InitCmvnStats(int32 dim, Matrix *stats); 35 | 36 | /// Accumulation from a single frame (weighted). 37 | void AccCmvnStats(const VectorBase &feat, 38 | BaseFloat weight, 39 | MatrixBase *stats); 40 | 41 | /// Accumulation from a feature file (possibly weighted-- useful in excluding silence). 42 | void AccCmvnStats(const MatrixBase &feats, 43 | const VectorBase *weights, // or NULL 44 | MatrixBase *stats); 45 | 46 | /// Apply cepstral mean and variance normalization to a matrix of features. 47 | /// If norm_vars == true, expects stats to be of dimension 2 by (dim+1), but 48 | /// if norm_vars == false, will accept stats of dimension 1 by (dim+1); these 49 | /// are produced by the balanced-cmvn code when it computes an offset and 50 | /// represents it as "fake stats". 51 | void ApplyCmvn(const MatrixBase &stats, 52 | bool norm_vars, 53 | MatrixBase *feats); 54 | 55 | /// This is as ApplyCmvn, but does so in the reverse sense, i.e. applies a transform 56 | /// that would take zero-mean, unit-variance input and turn it into output with the 57 | /// stats of "stats". This can be useful if you trained without CMVN but later want 58 | /// to correct a mismatch, so you would first apply CMVN and then do the "reverse" 59 | /// CMVN with the summed stats of your training data. 60 | void ApplyCmvnReverse(const MatrixBase &stats, 61 | bool norm_vars, 62 | MatrixBase *feats); 63 | 64 | 65 | /// Modify the stats so that for some dimensions (specified in "dims"), we 66 | /// replace them with "fake" stats that have zero mean and unit variance; this 67 | /// is done to disable CMVN for those dimensions. 68 | void FakeStatsForSomeDims(const std::vector &dims, 69 | MatrixBase *stats); 70 | 71 | 72 | 73 | } // namespace kaldi 74 | 75 | #endif // KALDI_TRANSFORM_CMVN_H_ 76 | -------------------------------------------------------------------------------- /src/util/common-utils.h: -------------------------------------------------------------------------------- 1 | // util/common-utils.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_UTIL_COMMON_UTILS_H_ 20 | #define KALDI_UTIL_COMMON_UTILS_H_ 21 | 22 | #include "base/kaldi-common.h" 23 | #include "util/parse-options.h" 24 | #include "util/kaldi-io.h" 25 | #include "util/simple-io-funcs.h" 26 | #include "util/kaldi-holder.h" 27 | #include "util/kaldi-table.h" 28 | #include "util/table-types.h" 29 | #include "util/text-utils.h" 30 | 31 | #endif // KALDI_UTIL_COMMON_UTILS_H_ 32 | -------------------------------------------------------------------------------- /src/util/const-integer-set-inl.h: -------------------------------------------------------------------------------- 1 | // util/const-integer-set-inl.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | #ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_ 22 | #define KALDI_UTIL_CONST_INTEGER_SET_INL_H_ 23 | 24 | // Do not include this file directly. It is included by const-integer-set.h 25 | 26 | 27 | namespace kaldi { 28 | 29 | template 30 | void ConstIntegerSet::InitInternal() { 31 | KALDI_ASSERT_IS_INTEGER_TYPE(I); 32 | quick_set_.clear(); // just in case we previously had data. 33 | if (slow_set_.size() == 0) { 34 | lowest_member_=(I) 1; 35 | highest_member_=(I) 0; 36 | contiguous_ = false; 37 | quick_ = false; 38 | } else { 39 | lowest_member_ = slow_set_.front(); 40 | highest_member_ = slow_set_.back(); 41 | size_t range = highest_member_ + 1 - lowest_member_; 42 | if (range == slow_set_.size()) { 43 | contiguous_ = true; 44 | quick_= false; 45 | } else { 46 | contiguous_ = false; 47 | // If it would be more compact to store as bool 48 | if (range < slow_set_.size() * 8 * sizeof(I)) { 49 | // (assuming 1 bit per element)... 50 | quick_set_.resize(range, false); 51 | for (size_t i = 0;i < slow_set_.size();i++) 52 | quick_set_[slow_set_[i] - lowest_member_] = true; 53 | quick_ = true; 54 | } else { 55 | quick_ = false; 56 | } 57 | } 58 | } 59 | } 60 | 61 | template 62 | int ConstIntegerSet::count(I i) const { 63 | if (i < lowest_member_ || i > highest_member_) { 64 | return 0; 65 | } else { 66 | if (contiguous_) return true; 67 | if (quick_) { 68 | return (quick_set_[i-lowest_member_] ? 1 : 0); 69 | } else { 70 | bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i); 71 | return (ans ? 1 : 0); 72 | } 73 | } 74 | } 75 | 76 | template 77 | void ConstIntegerSet::Write(std::ostream &os, bool binary) const { 78 | WriteIntegerVector(os, binary, slow_set_); 79 | } 80 | 81 | template 82 | void ConstIntegerSet::Read(std::istream &is, bool binary) { 83 | ReadIntegerVector(is, binary, &slow_set_); 84 | InitInternal(); 85 | } 86 | 87 | 88 | 89 | } // end namespace kaldi 90 | 91 | #endif // KALDI_UTIL_CONST_INTEGER_SET_INL_H_ 92 | -------------------------------------------------------------------------------- /src/util/const-integer-set.h: -------------------------------------------------------------------------------- 1 | // util/const-integer-set.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | #ifndef KALDI_UTIL_CONST_INTEGER_SET_H_ 22 | #define KALDI_UTIL_CONST_INTEGER_SET_H_ 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "util/stl-utils.h" 29 | 30 | /* ConstIntegerSet is a way to efficiently test whether something is in a 31 | supplied set of integers. It can be initialized from a vector or set, but 32 | never changed after that. It either uses a sorted vector or an array of 33 | bool, depending on the input. It behaves like a const version of an STL set, with 34 | only a subset of the functionality, except all the member functions are 35 | upper-case. 36 | 37 | Note that we could get rid of the member slow_set_, but we'd have to 38 | do more work to implement an iterator type. This would save memory. 39 | */ 40 | 41 | namespace kaldi { 42 | 43 | template class ConstIntegerSet { 44 | public: 45 | ConstIntegerSet(): lowest_member_(1), highest_member_(0) { } 46 | 47 | void Init(const std::vector &input) { 48 | slow_set_ = input; 49 | SortAndUniq(&slow_set_); 50 | InitInternal(); 51 | } 52 | 53 | void Init(const std::set &input) { 54 | CopySetToVector(input, &slow_set_); 55 | InitInternal(); 56 | } 57 | 58 | explicit ConstIntegerSet(const std::vector &input): slow_set_(input) { 59 | SortAndUniq(&slow_set_); 60 | InitInternal(); 61 | } 62 | explicit ConstIntegerSet(const std::set &input) { 63 | CopySetToVector(input, &slow_set_); 64 | InitInternal(); 65 | } 66 | explicit ConstIntegerSet(const ConstIntegerSet &other): 67 | slow_set_(other.slow_set_) { 68 | InitInternal(); 69 | } 70 | 71 | int count(I i) const; // returns 1 or 0. 72 | 73 | typedef typename std::vector::const_iterator iterator; 74 | iterator begin() const { return slow_set_.begin(); } 75 | iterator end() const { return slow_set_.end(); } 76 | size_t size() const { return slow_set_.size(); } 77 | bool empty() const { return slow_set_.empty(); } 78 | 79 | void Write(std::ostream &os, bool binary) const; 80 | void Read(std::istream &is, bool binary); 81 | 82 | private: 83 | I lowest_member_; 84 | I highest_member_; 85 | bool contiguous_; 86 | bool quick_; 87 | std::vector quick_set_; 88 | std::vector slow_set_; 89 | void InitInternal(); 90 | }; 91 | 92 | } // end namespace kaldi 93 | 94 | #include "util/const-integer-set-inl.h" 95 | 96 | #endif // KALDI_UTIL_CONST_INTEGER_SET_H_ 97 | -------------------------------------------------------------------------------- /src/util/edit-distance.h: -------------------------------------------------------------------------------- 1 | // util/edit-distance.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Haihua Xu 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | #ifndef KALDI_UTIL_EDIT_DISTANCE_H_ 22 | #define KALDI_UTIL_EDIT_DISTANCE_H_ 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include "util/edit-distance-inl.h" 30 | #include "base/kaldi-types.h" 31 | 32 | namespace kaldi { 33 | 34 | // Compute the edit-distance between two strings. 35 | template 36 | int32 LevenshteinEditDistance(const std::vector &a, 37 | const std::vector &b); 38 | 39 | 40 | // edit distance calculation with conventional method. 41 | // note: noise word must be filtered out from the hypothesis and 42 | // reference sequence 43 | // before the following procedure conducted. 44 | template 45 | int32 LevenshteinEditDistance(const std::vector &ref, 46 | const std::vector &hyp, 47 | int32 *ins, int32 *del, int32 *sub); 48 | 49 | // This version of the edit-distance computation outputs the alignment 50 | // between the two. This is a vector of pairs of (symbol a, symbol b). 51 | // The epsilon symbol (eps_symbol) must not occur in sequences a or b. 52 | // Where one aligned to no symbol in the other (insertion or deletion), 53 | // epsilon will be the corresponding member of the pair. 54 | // It returns the edit-distance between the two strings. 55 | 56 | template 57 | int32 LevenshteinAlignment(const std::vector &a, 58 | const std::vector &b, 59 | T eps_symbol, 60 | std::vector > *output); 61 | 62 | } // end namespace kaldi 63 | 64 | #endif // KALDI_UTIL_EDIT_DISTANCE_H_ 65 | -------------------------------------------------------------------------------- /src/util/kaldi-cygwin-io-inl.h: -------------------------------------------------------------------------------- 1 | // util/kaldi-cygwin-io-inl.h 2 | 3 | // Copyright 2015 Smart Action Company LLC (author: Kirill Katsnelson) 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_ 20 | #define KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_ 21 | 22 | #ifndef _MSC_VER 23 | #error This is a Windows-compatibility file. Something went wery wrong. 24 | #endif 25 | 26 | #include 27 | 28 | // This file is included only into kaldi-io.cc, and only if 29 | // KALDI_CYGWIN_COMPAT is enabled. 30 | // 31 | // The routines map unix-ey paths passed to Windows programs from shell 32 | // scripts in egs. Since shell scripts run under cygwin, they use cygwin's 33 | // own mount table and a mapping to the file system. It is quite possible to 34 | // create quite an intricate mapping that only own cygwin API would be able 35 | // to untangle. Unfortunately, the API to map between filenames is not 36 | // available to non-cygwin programs. Running cygpath for every file operation 37 | // would as well be cumbersome. So this is only a simplistic path resolution, 38 | // assuming that the default cygwin prefix /cygdrive is used, and that all 39 | // resolved unix-style full paths end up prefixed with /cygdrive. This is 40 | // quite a sensible approach. We'll also try to map /dev/null and /tmp/**, 41 | // die on all other /dev/** and warn about all other rooted paths. 42 | 43 | namespace kaldi { 44 | 45 | static bool prefixp(const std::string& pfx, const std::string& str) { 46 | return pfx.length() <= str.length() && 47 | std::equal(pfx.begin(), pfx.end(), str.begin()); 48 | } 49 | 50 | static std::string cygprefix("/cygdrive/"); 51 | 52 | static std::string MapCygwinPathNoTmp(const std::string &filename) { 53 | // UNC(?), relative, native Windows and empty paths are ok already. 54 | if (prefixp("//", filename) || !prefixp("/", filename)) 55 | return filename; 56 | 57 | // /dev/... 58 | if (filename == "/dev/null") 59 | return "\\\\.\\nul"; 60 | if (prefixp("/dev/", filename)) { 61 | KALDI_ERR << "Unable to resolve path '" << filename 62 | << "' - only have /dev/null here."; 63 | return "\\\\.\\invalid"; 64 | } 65 | 66 | // /cygdrive/?[/....] 67 | int preflen = cygprefix.size(); 68 | if (prefixp(cygprefix, filename) 69 | && filename.size() >= preflen + 1 && isalpha(filename[preflen]) 70 | && (filename.size() == preflen + 1 || filename[preflen + 1] == '/')) { 71 | return std::string() + filename[preflen] + ':' + 72 | (filename.size() > preflen + 1 ? filename.substr(preflen + 1) : "/"); 73 | } 74 | 75 | KALDI_WARN << "Unable to resolve path '" << filename 76 | << "' - cannot map unix prefix. " 77 | << "Will go on, but breakage will likely ensue."; 78 | return filename; 79 | } 80 | 81 | // extern for unit testing. 82 | std::string MapCygwinPath(const std::string &filename) { 83 | // /tmp[/....] 84 | if (filename != "/tmp" && !prefixp("/tmp/", filename)) { 85 | return MapCygwinPathNoTmp(filename); 86 | } 87 | char *tmpdir = std::getenv("TMP"); 88 | if (tmpdir == nullptr) 89 | tmpdir = std::getenv("TEMP"); 90 | if (tmpdir == nullptr) { 91 | KALDI_ERR << "Unable to resolve path '" << filename 92 | << "' - unable to find temporary directory. Set TMP."; 93 | return filename; 94 | } 95 | // Map the value of tmpdir again, as cygwin environment actually may contain 96 | // unix-style paths. 97 | return MapCygwinPathNoTmp(std::string(tmpdir) + filename.substr(4)); 98 | } 99 | 100 | // A popen implementation that passes the command line through cygwin 101 | // bash.exe. This is necessary since some piped commands are cygwin links 102 | // (e. g. fgrep is a soft link to grep), and some are #!-files, such as 103 | // gunzip which is a shell script that invokes gzip, or kaldi's own run.pl 104 | // which is a perl script. 105 | // 106 | // _popen uses cmd.exe or whatever shell is specified via the COMSPEC 107 | // variable. Unfortunately, it adds a hardcoded " /c " to it, so we cannot 108 | // just substitute the environment variable COMSPEC to point to bash.exe. 109 | // Instead, quote the command and pass it to bash via its -c switch. 110 | static FILE *CygwinCompatPopen(const char* command, const char* mode) { 111 | // To speed up command launch marginally, optionally accept full path 112 | // to bash.exe. This will not work if the path contains spaces, but 113 | // no sane person would install cygwin into a space-ridden path. 114 | const char* bash_exe = std::getenv("BASH_EXE"); 115 | std::string qcmd(bash_exe != nullptr ? bash_exe : "bash.exe"); 116 | qcmd += " -c \""; 117 | for (; *command; ++command) { 118 | if (*command == '\"') 119 | qcmd += '\"'; 120 | qcmd += *command; 121 | } 122 | qcmd += '\"'; 123 | 124 | return _popen(qcmd.c_str(), mode); 125 | } 126 | 127 | } // namespace kaldi 128 | 129 | #endif // KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_ 130 | -------------------------------------------------------------------------------- /src/util/kaldi-io-inl.h: -------------------------------------------------------------------------------- 1 | // util/kaldi-io-inl.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_UTIL_KALDI_IO_INL_H_ 20 | #define KALDI_UTIL_KALDI_IO_INL_H_ 21 | 22 | #include 23 | 24 | namespace kaldi { 25 | 26 | bool Input::Open(const std::string &rxfilename, bool *binary) { 27 | return OpenInternal(rxfilename, true, binary); 28 | } 29 | 30 | bool Input::OpenTextMode(const std::string &rxfilename) { 31 | return OpenInternal(rxfilename, false, NULL); 32 | } 33 | 34 | bool Input::IsOpen() { 35 | return impl_ != NULL; 36 | } 37 | 38 | bool Output::IsOpen() { 39 | return impl_ != NULL; 40 | } 41 | 42 | 43 | } // end namespace kaldi. 44 | 45 | 46 | #endif // KALDI_UTIL_KALDI_IO_INL_H_ 47 | -------------------------------------------------------------------------------- /src/util/kaldi-pipebuf.h: -------------------------------------------------------------------------------- 1 | // util/kaldi-pipebuf.h 2 | 3 | // Copyright 2009-2011 Ondrej Glembek 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | 21 | /** @file kaldi-pipebuf.h 22 | * This is an Kaldi C++ Library header. 23 | */ 24 | 25 | #ifndef KALDI_UTIL_KALDI_PIPEBUF_H_ 26 | #define KALDI_UTIL_KALDI_PIPEBUF_H_ 27 | 28 | #include 29 | #if !defined(_LIBCPP_VERSION) // libc++ 30 | #include 31 | #else 32 | #include "util/basic-filebuf.h" 33 | #endif 34 | 35 | namespace kaldi { 36 | // This class provides a way to initialize a filebuf with a FILE* pointer 37 | // directly; it will not close the file pointer when it is deleted. 38 | // The C++ standard does not allow implementations of C++ to provide 39 | // this constructor within basic_filebuf, which makes it hard to deal 40 | // with pipes using completely native C++. This is a workaround 41 | 42 | #ifdef _MSC_VER 43 | #elif defined(_LIBCPP_VERSION) // libc++ 44 | template > 45 | class basic_pipebuf : public basic_filebuf { 46 | public: 47 | typedef basic_pipebuf ThisType; 48 | 49 | public: 50 | basic_pipebuf(FILE *fptr, std::ios_base::openmode mode) 51 | : basic_filebuf() { 52 | this->open(fptr, mode); 53 | if (!this->is_open()) { 54 | KALDI_WARN << "Error initializing pipebuf"; // probably indicates 55 | // code error, if the fptr was good. 56 | return; 57 | } 58 | } 59 | }; // class basic_pipebuf 60 | #else 61 | template > 62 | class basic_pipebuf : public std::basic_filebuf { 63 | public: 64 | typedef basic_pipebuf ThisType; 65 | 66 | public: 67 | basic_pipebuf(FILE *fptr, std::ios_base::openmode mode) 68 | : std::basic_filebuf() { 69 | this->_M_file.sys_open(fptr, mode); 70 | if (!this->is_open()) { 71 | KALDI_WARN << "Error initializing pipebuf"; // probably indicates 72 | // code error, if the fptr was good. 73 | return; 74 | } 75 | this->_M_mode = mode; 76 | this->_M_buf_size = BUFSIZ; 77 | this->_M_allocate_internal_buffer(); 78 | this->_M_reading = false; 79 | this->_M_writing = false; 80 | this->_M_set_buffer(-1); 81 | } 82 | }; // class basic_pipebuf 83 | #endif // _MSC_VER 84 | 85 | } // namespace kaldi 86 | 87 | #endif // KALDI_UTIL_KALDI_PIPEBUF_H_ 88 | -------------------------------------------------------------------------------- /src/util/kaldi-semaphore.cc: -------------------------------------------------------------------------------- 1 | // util/kaldi-semaphore.cc 2 | 3 | // Copyright 2012 Karel Vesely (Brno University of Technology) 4 | // 2017 Dogan Can (University of Southern California) 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | 22 | 23 | #include "base/kaldi-error.h" 24 | #include "util/kaldi-semaphore.h" 25 | 26 | namespace kaldi { 27 | 28 | Semaphore::Semaphore(int32 count) { 29 | KALDI_ASSERT(count >= 0); 30 | count_ = count; 31 | } 32 | 33 | Semaphore::~Semaphore() {} 34 | 35 | bool Semaphore::TryWait() { 36 | std::unique_lock lock(mutex_); 37 | if(count_) { 38 | count_--; 39 | return true; 40 | } 41 | return false; 42 | } 43 | 44 | void Semaphore::Wait() { 45 | std::unique_lock lock(mutex_); 46 | while(!count_) 47 | condition_variable_.wait(lock); 48 | count_--; 49 | } 50 | 51 | void Semaphore::Signal() { 52 | std::unique_lock lock(mutex_); 53 | count_++; 54 | condition_variable_.notify_one(); 55 | } 56 | 57 | } // namespace kaldi 58 | -------------------------------------------------------------------------------- /src/util/kaldi-semaphore.h: -------------------------------------------------------------------------------- 1 | // util/kaldi-semaphore.h 2 | 3 | // Copyright 2012 Karel Vesely (Brno University of Technology) 4 | // 2017 Dogan Can (University of Southern California) 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | 22 | #ifndef KALDI_THREAD_KALDI_SEMAPHORE_H_ 23 | #define KALDI_THREAD_KALDI_SEMAPHORE_H_ 1 24 | 25 | #include 26 | #include 27 | 28 | namespace kaldi { 29 | 30 | class Semaphore { 31 | public: 32 | Semaphore(int32 count = 0); 33 | 34 | ~Semaphore(); 35 | 36 | bool TryWait(); ///< Returns true if Wait() goes through 37 | void Wait(); ///< decrease the counter 38 | void Signal(); ///< increase the counter 39 | 40 | private: 41 | int32 count_; ///< the semaphore counter, 0 means block on Wait() 42 | 43 | std::mutex mutex_; 44 | std::condition_variable condition_variable_; 45 | KALDI_DISALLOW_COPY_AND_ASSIGN(Semaphore); 46 | }; 47 | 48 | } //namespace 49 | 50 | #endif // KALDI_THREAD_KALDI_SEMAPHORE_H_ 51 | -------------------------------------------------------------------------------- /src/util/kaldi-thread.cc: -------------------------------------------------------------------------------- 1 | // util/kaldi-thread.cc 2 | 3 | // Copyright 2012 Johns Hopkins University (Author: Daniel Povey) 4 | // Frantisek Skala 5 | 6 | // See ../../COPYING for clarification regarding multiple authors 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 16 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 17 | // MERCHANTABLITY OR NON-INFRINGEMENT. 18 | // See the Apache 2 License for the specific language governing permissions and 19 | // limitations under the License. 20 | 21 | #include "base/kaldi-common.h" 22 | #include "util/kaldi-thread.h" 23 | 24 | namespace kaldi { 25 | int32 g_num_threads = 8; // Initialize this global variable. 26 | 27 | MultiThreadable::~MultiThreadable() { 28 | // default implementation does nothing 29 | } 30 | 31 | 32 | 33 | } // end namespace kaldi 34 | -------------------------------------------------------------------------------- /src/util/simple-io-funcs.cc: -------------------------------------------------------------------------------- 1 | // util/simple-io-funcs.cc 2 | 3 | // Copyright 2009-2011 Microsoft Corporation 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #include "util/simple-io-funcs.h" 20 | #include "util/text-utils.h" 21 | 22 | namespace kaldi { 23 | 24 | bool WriteIntegerVectorSimple(const std::string &wxfilename, 25 | const std::vector &list) { 26 | kaldi::Output ko; 27 | // false, false is: text-mode, no Kaldi header. 28 | if (!ko.Open(wxfilename, false, false)) return false; 29 | for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n'; 30 | return ko.Close(); 31 | } 32 | 33 | bool ReadIntegerVectorSimple(const std::string &rxfilename, 34 | std::vector *list) { 35 | kaldi::Input ki; 36 | if (!ki.OpenTextMode(rxfilename)) return false; 37 | std::istream &is = ki.Stream(); 38 | int32 i; 39 | list->clear(); 40 | while ( !(is >> i).fail() ) 41 | list->push_back(i); 42 | is >> std::ws; 43 | return is.eof(); // should be eof, or junk at end of file. 44 | } 45 | 46 | bool WriteIntegerVectorVectorSimple(const std::string &wxfilename, 47 | const std::vector > &list) { 48 | kaldi::Output ko; 49 | // false, false is: text-mode, no Kaldi header. 50 | if (!ko.Open(wxfilename, false, false)) return false; 51 | std::ostream &os = ko.Stream(); 52 | for (size_t i = 0; i < list.size(); i++) { 53 | for (size_t j = 0; j < list[i].size(); j++) { 54 | os << list[i][j]; 55 | if (j+1 < list[i].size()) os << ' '; 56 | } 57 | os << '\n'; 58 | } 59 | return ko.Close(); 60 | } 61 | 62 | bool ReadIntegerVectorVectorSimple(const std::string &rxfilename, 63 | std::vector > *list) { 64 | kaldi::Input ki; 65 | if (!ki.OpenTextMode(rxfilename)) return false; 66 | std::istream &is = ki.Stream(); 67 | list->clear(); 68 | std::string line; 69 | while (std::getline(is, line)) { 70 | std::vector v; 71 | if (!SplitStringToIntegers(line, " \t\r", true, &v)) { 72 | list->clear(); 73 | return false; 74 | } 75 | list->push_back(v); 76 | } 77 | return is.eof(); // if we're not at EOF, something weird happened. 78 | } 79 | 80 | 81 | } // end namespace kaldi 82 | -------------------------------------------------------------------------------- /src/util/simple-io-funcs.h: -------------------------------------------------------------------------------- 1 | // util/simple-io-funcs.h 2 | 3 | // Copyright 2009-2011 Microsoft Corporation; Jan Silovsky 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | #ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_ 20 | #define KALDI_UTIL_SIMPLE_IO_FUNCS_H_ 21 | 22 | #include 23 | #include 24 | #include "util/kaldi-io.h" 25 | 26 | // This header contains some utilities for reading some common, simple text 27 | // formats:integers in files, one per line, and integers in files, possibly 28 | // multiple per line. these are not really fully native Kaldi formats; they are 29 | // mostly for small files that might be generated by scripts, and can be read 30 | // all at one time. for longer files of this type, we would probably use the 31 | // Table code. 32 | 33 | namespace kaldi { 34 | 35 | /// WriteToList attempts to write this list of integers, one per line, 36 | /// to the given file, in text format. 37 | /// returns true if succeeded. 38 | bool WriteIntegerVectorSimple(const std::string &wxfilename, 39 | const std::vector &v); 40 | 41 | /// ReadFromList attempts to read this list of integers, one per line, 42 | /// from the given file, in text format. 43 | /// returns true if succeeded. 44 | bool ReadIntegerVectorSimple(const std::string &rxfilename, 45 | std::vector *v); 46 | 47 | // This is a file format like: 48 | // 1 2 49 | // 3 50 | // 51 | // 4 5 6 52 | // etc. 53 | bool WriteIntegerVectorVectorSimple(const std::string &wxfilename, 54 | const std::vector > &v); 55 | 56 | bool ReadIntegerVectorVectorSimple(const std::string &rxfilename, 57 | std::vector > *v); 58 | 59 | 60 | } // end namespace kaldi. 61 | 62 | 63 | #endif // KALDI_UTIL_SIMPLE_IO_FUNCS_H_ 64 | -------------------------------------------------------------------------------- /src/util/simple-options.h: -------------------------------------------------------------------------------- 1 | // util/simple-options.h 2 | 3 | // Copyright 2013 Tanel Alumae, Tallinn University of Technology 4 | 5 | // See ../../COPYING for clarification regarding multiple authors 6 | // 7 | // Licensed under the Apache License, Version 2.0 (the "License"); 8 | // you may not use this file except in compliance with the License. 9 | // You may obtain a copy of the License at 10 | // 11 | // http://www.apache.org/licenses/LICENSE-2.0 12 | // 13 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 15 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 16 | // MERCHANTABLITY OR NON-INFRINGEMENT. 17 | // See the Apache 2 License for the specific language governing permissions and 18 | // limitations under the License. 19 | 20 | #ifndef KALDI_UTIL_SIMPLE_OPTIONS_H_ 21 | #define KALDI_UTIL_SIMPLE_OPTIONS_H_ 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | #include "base/kaldi-common.h" 28 | #include "itf/options-itf.h" 29 | 30 | namespace kaldi { 31 | 32 | 33 | /// The class SimpleOptions is an implementation of OptionsItf that allows 34 | /// setting and getting option values programmatically, i.e., via getter 35 | /// and setter methods. It doesn't provide any command line parsing 36 | /// functionality. 37 | /// The class ParseOptions should be used for command-line options. 38 | class SimpleOptions : public OptionsItf { 39 | public: 40 | SimpleOptions() { 41 | } 42 | 43 | virtual ~SimpleOptions() { 44 | } 45 | 46 | // Methods from the interface 47 | void Register(const std::string &name, bool *ptr, const std::string &doc); 48 | void Register(const std::string &name, int32 *ptr, const std::string &doc); 49 | void Register(const std::string &name, uint32 *ptr, const std::string &doc); 50 | void Register(const std::string &name, float *ptr, const std::string &doc); 51 | void Register(const std::string &name, double *ptr, const std::string &doc); 52 | void Register(const std::string &name, std::string *ptr, 53 | const std::string &doc); 54 | 55 | // set option with the specified key, return true if successful 56 | bool SetOption(const std::string &key, const bool &value); 57 | bool SetOption(const std::string &key, const int32 &value); 58 | bool SetOption(const std::string &key, const uint32 &value); 59 | bool SetOption(const std::string &key, const float &value); 60 | bool SetOption(const std::string &key, const double &value); 61 | bool SetOption(const std::string &key, const std::string &value); 62 | bool SetOption(const std::string &key, const char* value); 63 | 64 | // get option with the specified key and put to 'value', 65 | // return true if successful 66 | bool GetOption(const std::string &key, bool *value); 67 | bool GetOption(const std::string &key, int32 *value); 68 | bool GetOption(const std::string &key, uint32 *value); 69 | bool GetOption(const std::string &key, float *value); 70 | bool GetOption(const std::string &key, double *value); 71 | bool GetOption(const std::string &key, std::string *value); 72 | 73 | enum OptionType { 74 | kBool, 75 | kInt32, 76 | kUint32, 77 | kFloat, 78 | kDouble, 79 | kString 80 | }; 81 | 82 | struct OptionInfo { 83 | OptionInfo(const std::string &doc, OptionType type) : 84 | doc(doc), type(type) { 85 | } 86 | std::string doc; 87 | OptionType type; 88 | }; 89 | 90 | std::vector > GetOptionInfoList(); 91 | 92 | /* 93 | * Puts the type of the option with name 'key' in the argument 'type'. 94 | * Return true if such option is found, false otherwise. 95 | */ 96 | bool GetOptionType(const std::string &key, OptionType *type); 97 | 98 | private: 99 | 100 | std::vector > option_info_list_; 101 | 102 | // maps for option variables 103 | std::map bool_map_; 104 | std::map int_map_; 105 | std::map uint_map_; 106 | std::map float_map_; 107 | std::map double_map_; 108 | std::map string_map_; 109 | }; 110 | 111 | } // namespace kaldi 112 | 113 | #endif // KALDI_UTIL_SIMPLE_OPTIONS_H_ 114 | -------------------------------------------------------------------------------- /swig/numpy/pyfragments.swg: -------------------------------------------------------------------------------- 1 | /*-*- C -*-*/ 2 | 3 | /**********************************************************************/ 4 | 5 | /* For numpy versions prior to 1.0, the names of certain data types 6 | * are different than in later versions. This fragment provides macro 7 | * substitutions that allow us to support old and new versions of 8 | * numpy. 9 | */ 10 | 11 | /**********************************************************************/ 12 | 13 | /* Override the SWIG_AsVal_frag(long) fragment so that it also checks 14 | * for numpy scalar array types. The code through the %#endif is 15 | * essentially cut-and-paste from pyprimtype.swg 16 | */ 17 | 18 | %fragment(SWIG_AsVal_frag(long), "header", 19 | fragment="SWIG_CanCastAsInteger", 20 | fragment="NumPy_Backward_Compatibility") 21 | { 22 | SWIGINTERN int 23 | SWIG_AsVal_dec(long)(PyObject * obj, long * val) 24 | { 25 | PyArray_Descr * longDescr = PyArray_DescrNewFromType(NPY_LONG); 26 | if (PyInt_Check(obj)) { 27 | if (val) *val = PyInt_AsLong(obj); 28 | return SWIG_OK; 29 | } else if (PyLong_Check(obj)) { 30 | long v = PyLong_AsLong(obj); 31 | if (!PyErr_Occurred()) { 32 | if (val) *val = v; 33 | return SWIG_OK; 34 | } else { 35 | PyErr_Clear(); 36 | } 37 | } 38 | %#ifdef SWIG_PYTHON_CAST_MODE 39 | { 40 | int dispatch = 0; 41 | long v = PyInt_AsLong(obj); 42 | if (!PyErr_Occurred()) { 43 | if (val) *val = v; 44 | return SWIG_AddCast(SWIG_OK); 45 | } else { 46 | PyErr_Clear(); 47 | } 48 | if (!dispatch) { 49 | double d; 50 | int res = SWIG_AddCast(SWIG_AsVal(double)(obj,&d)); 51 | if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, LONG_MIN, LONG_MAX)) { 52 | if (val) *val = (long)(d); 53 | return res; 54 | } 55 | } 56 | } 57 | %#endif 58 | if (!PyArray_IsScalar(obj,Integer)) return SWIG_TypeError; 59 | PyArray_CastScalarToCtype(obj, (void*)val, longDescr); 60 | return SWIG_OK; 61 | } 62 | } 63 | 64 | 65 | /* Override the SWIG_AsVal_frag(unsigned long) fragment so that it 66 | * also checks for numpy scalar array types. The code through the 67 | * %#endif is essentially cut-and-paste from pyprimtype.swg 68 | */ 69 | 70 | %fragment(SWIG_AsVal_frag(unsigned long),"header", 71 | fragment="SWIG_CanCastAsInteger", 72 | fragment="NumPy_Backward_Compatibility") 73 | { 74 | SWIGINTERN int 75 | SWIG_AsVal_dec(unsigned long)(PyObject *obj, unsigned long *val) 76 | { 77 | PyArray_Descr * ulongDescr = PyArray_DescrNewFromType(NPY_ULONG); 78 | %#if PY_VERSION_HEX < 0x03000000 79 | if (PyInt_Check(obj)) 80 | { 81 | long v = PyInt_AsLong(obj); 82 | if (v >= 0) 83 | { 84 | if (val) *val = v; 85 | return SWIG_OK; 86 | } 87 | else 88 | { 89 | return SWIG_OverflowError; 90 | } 91 | } else 92 | %#endif 93 | if (PyLong_Check(obj)) { 94 | unsigned long v = PyLong_AsUnsignedLong(obj); 95 | if (!PyErr_Occurred()) { 96 | if (val) *val = v; 97 | return SWIG_OK; 98 | } else { 99 | PyErr_Clear(); 100 | } 101 | } 102 | %#ifdef SWIG_PYTHON_CAST_MODE 103 | { 104 | int dispatch = 0; 105 | unsigned long v = PyLong_AsUnsignedLong(obj); 106 | if (!PyErr_Occurred()) { 107 | if (val) *val = v; 108 | return SWIG_AddCast(SWIG_OK); 109 | } else { 110 | PyErr_Clear(); 111 | } 112 | if (!dispatch) { 113 | double d; 114 | int res = SWIG_AddCast(SWIG_AsVal(double)(obj,&d)); 115 | if (SWIG_IsOK(res) && SWIG_CanCastAsInteger(&d, 0, ULONG_MAX)) { 116 | if (val) *val = (unsigned long)(d); 117 | return res; 118 | } 119 | } 120 | } 121 | %#endif 122 | if (!PyArray_IsScalar(obj,Integer)) return SWIG_TypeError; 123 | PyArray_CastScalarToCtype(obj, (void*)val, ulongDescr); 124 | return SWIG_OK; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /swig/pydrobert/error.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2016 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | 19 | // wrapping kaldi's logging to fit into python-o-sphere 20 | 21 | %{ 22 | 23 | #include "base/kaldi-error.h" 24 | 25 | namespace kaldi { 26 | static PyObject *g_py_log_handler = NULL; 27 | 28 | void SetPythonLogHandler(PyObject *py_func) { 29 | Py_BEGIN_ALLOW_THREADS; 30 | Py_XDECREF(g_py_log_handler); 31 | g_py_log_handler = py_func; 32 | if (g_py_log_handler) { 33 | SetLogHandler([] 34 | (const LogMessageEnvelope &envelope, const char * message) 35 | { 36 | PyGILState_STATE gstate; 37 | gstate = PyGILState_Ensure(); 38 | PyObject *envelope_obj = Py_BuildValue( 39 | "(issi)", 40 | envelope.severity, 41 | envelope.func, envelope.file, envelope.line 42 | ); 43 | // kaldi does not guarantee that the message is of a specific 44 | // encoding, so we send it as bytes and decode it there, replacing 45 | // errors with 46 | PyObject *arg_list = Py_BuildValue("(Oy)", envelope_obj, message); 47 | PyObject *result = PyObject_CallObject(g_py_log_handler, arg_list); 48 | Py_DECREF(arg_list); 49 | Py_DECREF(envelope_obj); 50 | Py_XDECREF(result); 51 | PyGILState_Release(gstate); 52 | } 53 | ); 54 | } else { 55 | SetLogHandler(NULL); 56 | } 57 | 58 | Py_XINCREF(py_func); 59 | Py_END_ALLOW_THREADS; 60 | } 61 | 62 | void VerboseLog(long lvl, const char * message) { 63 | KALDI_VLOG(lvl) << message; 64 | } 65 | } 66 | %} 67 | 68 | namespace kaldi { 69 | long GetVerboseLevel(); 70 | void SetVerboseLevel(long i); 71 | void SetPythonLogHandler(PyObject *py_func); 72 | void VerboseLog(long lvl, const char * message); 73 | } // namespace kaldi 74 | 75 | %typemap(in) PyObject *py_func { 76 | if (!PyCallable_Check($input)) { 77 | PyErr_SetString(PyExc_TypeError, "Expected callable"); 78 | return NULL; 79 | } 80 | $1 = $input; 81 | } 82 | -------------------------------------------------------------------------------- /swig/pydrobert/io/tables/basic_tables.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2017 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | 19 | %{ 20 | #include "util/kaldi-table.h" 21 | %} 22 | 23 | namespace kaldi { 24 | template class BasicHolder {}; 25 | template class BasicVectorHolder {}; 26 | template class BasicVectorVectorHolder {}; 27 | template class BasicPairVectorHolder {}; 28 | } 29 | 30 | %define BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Name, HolderName, ValType...) 31 | TEMPLATE_WITH_NAME_AND_HOLDER_TYPE(Name, HolderName); 32 | 33 | %extend kaldi::SequentialTableReader { 34 | const ValType & Value() { 35 | return $self->Value(); 36 | }; 37 | } 38 | %extend kaldi::RandomAccessTableReaderMapped { 39 | const ValType & Value(const std::string& key) { 40 | return $self->Value(key); 41 | }; 42 | } 43 | %extend kaldi::TableWriter { 44 | void Write(const std::string& key, const ValType & val) { 45 | $self->Write(key, val); 46 | }; 47 | } 48 | %enddef 49 | 50 | // int 51 | %template() kaldi::BasicHolder; 52 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Int32, kaldi::BasicHolder, int32_t); 53 | %template() kaldi::BasicVectorHolder; 54 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Int32Vector, kaldi::BasicVectorHolder, std::vector); 55 | %template() kaldi::BasicVectorVectorHolder; 56 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Int32VectorVector, kaldi::BasicVectorVectorHolder, std::vector >); 57 | %template() kaldi::BasicPairVectorHolder; 58 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Int32PairVector, kaldi::BasicPairVectorHolder, std::vector >); 59 | 60 | // double 61 | %template(DoubleHolder) kaldi::BasicHolder; 62 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Double, kaldi::BasicHolder, double); 63 | 64 | // base float 65 | #if KALDI_DOUBLEPRECISION 66 | typedef DoubleHolder BaseFloatHolder; 67 | typedef SequentialDoubleReader SequentialBaseFloatReader; 68 | typedef RandomAccessDoubleReader RandomAccessBaseFloatReader; 69 | typedef DoubleWriter BaseFloatWriter; 70 | %template() kaldi::BasicPairVectorHolder; 71 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(BaseFloatPairVector, kaldi::BasicPairVectorHolder, std::vector >); 72 | #else 73 | %template() kaldi::BasicHolder; 74 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(BaseFloat, kaldi::BasicHolder, float); 75 | %template() kaldi::BasicPairVectorHolder; 76 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(BaseFloatPairVector, kaldi::BasicPairVectorHolder, std::vector >); 77 | #endif 78 | 79 | // bool 80 | %template() kaldi::BasicHolder; 81 | BASIC_TABLE_TEMPLATE_WITH_NAME_AND_TYPE(Bool, kaldi::BasicHolder, bool); 82 | -------------------------------------------------------------------------------- /swig/pydrobert/io/tables/tables.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2017 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | 19 | %{ 20 | #include "util/kaldi-holder.h" 21 | #include "util/kaldi-table.h" 22 | %} 23 | 24 | // general table types 25 | namespace kaldi { 26 | 27 | template class KaldiObjectHolder { 28 | public: 29 | typedef KaldiType T; 30 | }; 31 | template class SequentialTableReader { 32 | public: 33 | typedef typename Holder::T T; 34 | bool Done(); 35 | std::string Key(); 36 | bool IsOpen() const; 37 | bool Close(); 38 | bool Open(const std::string &rspecifier); 39 | void Next(); 40 | // const T &Value(); 41 | %extend { 42 | bool OpenThreaded(const std::string &rspecifier) { 43 | // If we're trying to open a "background" sequential table reader, we 44 | // have to initialize python threads. We do this lazily since there's 45 | // a performance penalty. 46 | bool ret; 47 | Py_BEGIN_ALLOW_THREADS; 48 | ret = $self->Open(rspecifier); 49 | Py_END_ALLOW_THREADS; 50 | return ret; 51 | }; 52 | 53 | void NextThreaded() { 54 | Py_BEGIN_ALLOW_THREADS; 55 | $self->Next(); 56 | Py_END_ALLOW_THREADS; 57 | }; 58 | 59 | bool CloseThreaded() { 60 | bool ret; 61 | Py_BEGIN_ALLOW_THREADS; 62 | ret = $self->Close(); 63 | Py_END_ALLOW_THREADS; 64 | return ret; 65 | } 66 | } 67 | }; 68 | template class RandomAccessTableReaderMapped { 69 | public: 70 | typedef typename Holder::T T; 71 | bool Open(const std::string &table_rxfilename, 72 | const std::string &utt2spk_rxfilename); 73 | bool IsOpen() const; 74 | bool Close(); 75 | bool HasKey(const std::string &key); 76 | }; 77 | template class TableWriter { 78 | public: 79 | typedef typename Holder::T T; 80 | bool Open(const std::string &wspecifier); 81 | bool IsOpen() const; 82 | bool Close(); 83 | }; 84 | } 85 | 86 | %define EXTEND_RW_WITH_IS_BINARY(RWName, HolderName) 87 | %extend RWName ## < ## HolderName > { 88 | static bool IsBinary() { 89 | return HolderName ## ::IsReadInBinary(); 90 | }; 91 | } 92 | %enddef 93 | 94 | %define TEMPLATE_WITH_NAME_AND_HOLDER_TYPE(Name, HolderName) 95 | %template(Sequential ## Name ## Reader) kaldi::SequentialTableReader; 96 | %template(RandomAccess ## Name ## Reader) kaldi::RandomAccessTableReaderMapped; 97 | %template(Name ## Writer) kaldi::TableWriter; 98 | EXTEND_RW_WITH_IS_BINARY(kaldi::SequentialTableReader, HolderName); 99 | EXTEND_RW_WITH_IS_BINARY(kaldi::RandomAccessTableReaderMapped, HolderName); 100 | EXTEND_RW_WITH_IS_BINARY(kaldi::TableWriter, HolderName); 101 | %enddef 102 | 103 | %define TEMPLATE_WITH_KOBJECT_NAME_AND_TYPE(Name, Type) 104 | %template() Type; 105 | %template() kaldi::KaldiObjectHolder; 106 | TEMPLATE_WITH_NAME_AND_HOLDER_TYPE(Name, kaldi::KaldiObjectHolder); 107 | %enddef 108 | 109 | %include "pydrobert/io/tables/mv_tables.i" 110 | %include "pydrobert/io/tables/token_tables.i" 111 | %include "pydrobert/io/tables/wave_tables.i" 112 | %include "pydrobert/io/tables/basic_tables.i" 113 | -------------------------------------------------------------------------------- /swig/pydrobert/io/tables/token_tables.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2016 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | 19 | %{ 20 | 21 | #include "util/text-utils.h" 22 | 23 | %} 24 | 25 | namespace kaldi { 26 | class TokenHolder { 27 | public: 28 | typedef std::string T; 29 | }; 30 | class TokenVectorHolder { 31 | public: 32 | typedef std::vector T; 33 | }; 34 | } 35 | 36 | %extend kaldi::TableWriter { 37 | void Write(const std::string& key, const std::string& token) { 38 | if (!kaldi::IsToken(token)) { 39 | PyErr_SetString(PyExc_ValueError, "Value is not a token"); 40 | return; 41 | } 42 | $self->Write(key, token); 43 | }; 44 | } 45 | 46 | %extend kaldi::SequentialTableReader { 47 | const std::string& Value() { 48 | return $self->Value(); 49 | }; 50 | } 51 | 52 | %extend kaldi::RandomAccessTableReaderMapped { 53 | const std::string& Value(const std::string& key) { 54 | return $self->Value(key); 55 | }; 56 | } 57 | 58 | %extend kaldi::TableWriter { 59 | void Write(const std::string& key, const std::vector& token_vec) { 60 | for (std::vector::const_iterator iter = token_vec.begin(); 61 | iter != token_vec.end(); ++iter) { 62 | if (!kaldi::IsToken(*iter)) { 63 | PyErr_SetString(PyExc_ValueError, "At least one element is not a token"); 64 | return; 65 | } 66 | } 67 | $self->Write(key, token_vec); 68 | }; 69 | } 70 | 71 | %extend kaldi::SequentialTableReader { 72 | const std::vector& Value() { 73 | return $self->Value(); 74 | }; 75 | } 76 | 77 | %extend kaldi::RandomAccessTableReaderMapped { 78 | const std::vector& Value(const std::string& key) { 79 | return $self->Value(key); 80 | }; 81 | } 82 | 83 | TEMPLATE_WITH_NAME_AND_HOLDER_TYPE(Token, kaldi::TokenHolder); 84 | TEMPLATE_WITH_NAME_AND_HOLDER_TYPE(TokenVector, kaldi::TokenVectorHolder); 85 | -------------------------------------------------------------------------------- /swig/pydrobert/io/util.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2017 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | 19 | %{ 20 | #include "base/kaldi-error.h" 21 | #include "util/kaldi-table.h" 22 | 23 | namespace kaldi { 24 | PyObject* ParseInputPath(const std::string &rspecifier) { 25 | std::string rxfilename = rspecifier; 26 | RspecifierOptions options; 27 | RspecifierType rspecifer_type = ClassifyRspecifier(rspecifier, 28 | &rxfilename, &options); 29 | InputType input_type = ClassifyRxfilename(rxfilename); 30 | PyObject *ret = Py_BuildValue("(isiOOOOO)", 31 | rspecifer_type, 32 | rxfilename.c_str(), 33 | input_type, 34 | options.once ? Py_True : Py_False, 35 | options.sorted ? Py_True : Py_False, 36 | options.called_sorted ? Py_True : Py_False, 37 | options.permissive ? Py_True : Py_False, 38 | options.background ? Py_True : Py_False 39 | ); 40 | return ret; 41 | } 42 | 43 | PyObject* ParseOutputPath(const std::string &wspecifier) { 44 | std::string arch_wxfilename, script_wxfilename; 45 | WspecifierOptions options; 46 | WspecifierType wspecifier_type = ClassifyWspecifier(wspecifier, 47 | &arch_wxfilename, 48 | &script_wxfilename, 49 | &options); 50 | PyObject *ret; 51 | switch (wspecifier_type) { 52 | case kArchiveWspecifier: 53 | ret = Py_BuildValue("(isiOOO)", 54 | wspecifier_type, 55 | arch_wxfilename.c_str(), 56 | ClassifyWxfilename(arch_wxfilename), 57 | options.binary ? Py_True : Py_False, 58 | options.flush ? Py_True : Py_False, 59 | options.permissive ? Py_True : Py_False 60 | ); 61 | break; 62 | case kScriptWspecifier: 63 | ret = Py_BuildValue("(isiOOO)", 64 | wspecifier_type, 65 | script_wxfilename.c_str(), 66 | ClassifyWxfilename(script_wxfilename), 67 | options.binary ? Py_True : Py_False, 68 | options.flush ? Py_True : Py_False, 69 | options.permissive ? Py_True : Py_False 70 | ); 71 | break; 72 | case kBothWspecifier: 73 | ret = Py_BuildValue("(issiiOOO)", 74 | wspecifier_type, 75 | arch_wxfilename.c_str(), 76 | script_wxfilename.c_str(), 77 | ClassifyWxfilename(arch_wxfilename), 78 | ClassifyWxfilename(script_wxfilename), 79 | options.binary ? Py_True : Py_False, 80 | options.flush ? Py_True : Py_False, 81 | options.permissive ? Py_True : Py_False 82 | ); 83 | break; 84 | case kNoWspecifier: default: 85 | ret = Py_BuildValue("(isi)", 86 | wspecifier_type, 87 | wspecifier.c_str(), 88 | ClassifyWxfilename(wspecifier) 89 | ); 90 | break; 91 | } 92 | return ret; 93 | } 94 | } 95 | %} 96 | 97 | namespace kaldi { 98 | PyObject* ParseInputPath(const std::string &rspecifier); 99 | PyObject* ParseOutputPath(const std::string &wspecifier); 100 | } 101 | -------------------------------------------------------------------------------- /swig/pydrobert/kaldi.i: -------------------------------------------------------------------------------- 1 | /* -*- C++ -*- 2 | 3 | Copyright 2017 Sean Robertson 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | */ 18 | %module(package="pydrobert.kaldi") internal 19 | 20 | %{ 21 | #define SWIG_FILE_WITH_INIT 22 | %} 23 | 24 | #include "base/version.h" 25 | 26 | %include "stdint.i" 27 | %include "typemaps.i" 28 | %include "std_string.i" 29 | %include "std_vector.i" 30 | %include "std_pair.i" 31 | %include "numpy/numpy.i" 32 | %include "exception.i" 33 | 34 | %exception { 35 | try { 36 | $action 37 | if (PyErr_Occurred()) return 0; 38 | } catch (const std::invalid_argument& e) { 39 | SWIG_exception(SWIG_TypeError, e.what()); 40 | } catch (const std::out_of_range& e) { 41 | SWIG_exception(SWIG_IndexError, e.what()); 42 | } catch (const std::exception& e) { 43 | SWIG_exception(SWIG_RuntimeError, e.what()); 44 | } catch (...) { 45 | SWIG_exception(SWIG_RuntimeError, "unkown error"); 46 | } 47 | } 48 | 49 | %init %{ 50 | import_array(); 51 | %} 52 | 53 | %template() std::vector; 54 | // we support the combinations of basic types/vectors that have typedefs in 55 | // table-types.h 56 | %template() std::vector; 57 | %template() std::vector >; 58 | %template() std::pair; 59 | %template() std::vector >; 60 | %template() std::pair; 61 | %template() std::vector >; 62 | %template() std::pair; 63 | %template() std::vector >; 64 | 65 | // to determine BaseFloat in python wrapper 66 | #if KALDI_DOUBLEPRECISION 67 | %constant bool kDoubleIsBase = true; 68 | namespace kaldi { 69 | typedef double BaseFloat; 70 | } 71 | %numpy_typemaps(kaldi::BaseFloat, NPY_DOUBLE, kaldi::MatrixIndexT); 72 | #else 73 | %constant bool kDoubleIsBase = false; 74 | namespace kaldi { 75 | typedef float BaseFloat; 76 | } 77 | %numpy_typemaps(kaldi::BaseFloat, NPY_FLOAT, kaldi::MatrixIndexT); 78 | #endif 79 | 80 | namespace kaldi { 81 | typedef int MatrixIndexT; 82 | typedef int SignedMatrixIndexT; 83 | typedef unsigned int UnsignedMatrixIndexT; 84 | } 85 | 86 | %numpy_typemaps(double, NPY_DOUBLE, kaldi::MatrixIndexT); 87 | %numpy_typemaps(float, NPY_FLOAT, kaldi::MatrixIndexT); 88 | 89 | %include "pydrobert/error.i" 90 | %include "pydrobert/io/util.i" 91 | %include "pydrobert/io/tables/tables.i" 92 | %include "pydrobert/io/duck.i" 93 | -------------------------------------------------------------------------------- /tests/python/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Fixtures for pytests""" 16 | 17 | import os 18 | import locale 19 | 20 | import pytest 21 | 22 | from tempfile import NamedTemporaryFile 23 | from tempfile import mkdtemp 24 | from shutil import rmtree 25 | 26 | locale.setlocale(locale.LC_ALL, "C") 27 | 28 | 29 | def pytest_runtest_setup(item): 30 | if any(mark.name == "pytorch" for mark in item.iter_markers()): 31 | pytest.importorskip("torch") 32 | 33 | 34 | @pytest.fixture 35 | def temp_file_1_name(): 36 | temp = NamedTemporaryFile(delete=False, suffix="_1") 37 | temp.close() 38 | yield temp.name 39 | os.remove(temp.name) 40 | 41 | 42 | @pytest.fixture 43 | def temp_file_2_name(): 44 | temp = NamedTemporaryFile(delete=False, suffix="_2") 45 | temp.close() 46 | yield temp.name 47 | os.remove(temp.name) 48 | 49 | 50 | @pytest.fixture 51 | def temp_file_3_name(): 52 | temp = NamedTemporaryFile(suffix="_2", delete=False) 53 | temp.close() 54 | yield temp.name 55 | os.remove(temp.name) 56 | 57 | 58 | @pytest.fixture 59 | def temp_dir(): 60 | dir_name = mkdtemp() 61 | yield dir_name 62 | rmtree(dir_name) 63 | 64 | 65 | @pytest.fixture(autouse=True) 66 | def logging_cleanup(): 67 | yield 68 | from pydrobert.kaldi.logging import deregister_all_loggers_for_kaldi 69 | 70 | deregister_all_loggers_for_kaldi() 71 | -------------------------------------------------------------------------------- /tests/python/test_argparse.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytests for `pydrobert.kaldi.argparse`""" 16 | 17 | import logging 18 | 19 | from pydrobert.kaldi.io import argparse 20 | from pydrobert.kaldi.logging import kaldi_lvl_to_logging_lvl 21 | 22 | 23 | def test_can_parse_equals(): 24 | parser = argparse.KaldiParser() 25 | parser.add_argument("--foo", type=int, default=1) 26 | assert parser.parse_args([]).foo == 1 27 | assert parser.parse_args(["--foo", "2"]).foo == 2 28 | assert parser.parse_args(["--foo=2"]).foo == 2 29 | 30 | 31 | def test_config(temp_file_1_name): 32 | with open(temp_file_1_name, mode="w") as conf_file: 33 | conf_file.write("--foo 2\n") 34 | conf_file.write("#--foo 3\n") 35 | conf_file.write("#--foo 4\n") 36 | parser = argparse.KaldiParser() 37 | parser.add_argument("--foo", type=int, default=1) 38 | assert parser.parse_args([]).foo == 1 39 | assert parser.parse_args(["--config", temp_file_1_name]).foo == 2 40 | assert parser.parse_args(["--foo", "4", "--config", temp_file_1_name]).foo == 4 41 | 42 | 43 | def test_can_parse_kaldi_types(): 44 | parser = argparse.KaldiParser() 45 | parser.add_argument("a", type="kaldi_bool") 46 | parser.add_argument("b", type="kaldi_rspecifier") 47 | parser.add_argument("c", type="kaldi_wspecifier") 48 | parser.add_argument("d", type="kaldi_rxfilename") 49 | parser.add_argument("e", type="kaldi_wxfilename") 50 | parser.add_argument("f", type="kaldi_dtype") 51 | parser.add_argument("g", type="numpy_dtype") 52 | parser.parse_args(["true", "ark:-", "ark:-", "-", "-", "bm", "int32"]) 53 | 54 | 55 | def test_verbosity(): 56 | logger = logging.getLogger("this_should_not_be_used") 57 | parser = argparse.KaldiParser(logger=logger) 58 | assert logger.level == kaldi_lvl_to_logging_lvl(0) 59 | parser.parse_args(["-v", "-1"]) 60 | assert logger.level == kaldi_lvl_to_logging_lvl(-1) 61 | parser.parse_args(["-v", "9"]) 62 | assert logger.level == kaldi_lvl_to_logging_lvl(9) 63 | -------------------------------------------------------------------------------- /tests/python/test_duck_streams.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Tests for `pydrobert.kaldi.io.duck_streams`""" 16 | 17 | from random import shuffle 18 | 19 | import numpy as np 20 | import pytest 21 | 22 | from pydrobert.kaldi.io import open as io_open 23 | 24 | 25 | def test_chained(temp_file_1_name): 26 | # wholly too limited a test 27 | obj_list = [ 28 | ("iv", tuple(x for x in range(1000))), 29 | ("fm", [[1, 2.5], [1e-10, 4]]), 30 | ("dv", np.random.random(1)), 31 | ("dm", np.random.random((100, 20))), 32 | ("t", "fiddlesticks"), 33 | ("t", "munsters"), 34 | ] 35 | shuffle(obj_list) 36 | with io_open(temp_file_1_name, mode="w") as outp: 37 | for dtype, obj in obj_list: 38 | outp.write(obj, dtype) 39 | with io_open(temp_file_1_name) as inp: 40 | for dtype, obj in obj_list: 41 | read = inp.read(dtype) 42 | if dtype in ("fv", "fm", "dv", "dm"): 43 | assert np.allclose(read, obj) 44 | else: 45 | assert read == obj 46 | 47 | 48 | @pytest.mark.parametrize( 49 | "ktype,value", 50 | [ 51 | ("bv", []), 52 | ("bm", [[]]), 53 | ("bv", [float("inf")]), 54 | ("bv", [1] * 100), 55 | ("bm", [[1, 2], [3, 4]]), 56 | ("fv", [-1, -1, 0, 0.1]), 57 | ("fm", np.random.random((10, 10)).astype(np.float32)), 58 | ("dv", np.arange(1000, dtype=np.float64) - 10), 59 | ( 60 | "dm", 61 | np.outer( 62 | np.arange(100, dtype=np.float32), np.arange(111, dtype=np.float32) 63 | ), 64 | ), # upcast ok 65 | ("t", "able"), 66 | # our methods can accept unicode, but always return strings, 67 | # so we don't enforce that these be unicode type. 68 | ("t", "\u00D6a"), 69 | ("t", "n\u00F9"), 70 | # lists can be written, but tuples are read 71 | ("tv", tuple()), 72 | ("tv", ("foo", "bar")), 73 | ("tv", ("skryyyyy",)), 74 | ("tv", ("\u00D6a", "n\u00F9")), 75 | ("i", -10), 76 | ("iv", (0, 1, 2)), 77 | ("iv", tuple()), 78 | ("ivv", ((100,), (10, 40))), 79 | ("ipv", ((1, 2), (3, 4))), 80 | ("d", 0.1), 81 | ("d", 1), 82 | ("b", -0.1), 83 | ("b", -10000), 84 | ("bpv", ((0, 1.3), (4.5, 6))), 85 | ("B", True), 86 | ("B", False), 87 | ], 88 | ) 89 | @pytest.mark.parametrize("binary", [True, False]) 90 | def test_read_write_valid(temp_file_1_name, ktype, value, binary): 91 | with io_open(temp_file_1_name, mode="w", header=False) as outp: 92 | outp.write(value, ktype, write_binary=binary) 93 | with io_open(temp_file_1_name, header=False) as inp: 94 | read_value = inp.read(ktype, read_binary=binary) 95 | if ktype in ("bv", "bm", "fv", "fm", "dv", "dm", "b", "d", "bpv"): 96 | assert np.allclose(read_value, value) 97 | else: 98 | assert read_value == value 99 | 100 | 101 | @pytest.mark.parametrize( 102 | "ktype,dtype,value", 103 | [ 104 | ("b", np.float32, 3.14), # upcast ok (if applicable) 105 | ("bpv", np.float32, ((0, 1.2), (3.4, 5), (6, 7.89))), # upcast ok (if app) 106 | ("i", np.int32, 420), 107 | ("iv", np.int32, (1, 1, 2, 3, 5, 8, 13, 21)), 108 | ("ivv", np.int32, ((0, 1), (2, 3), (4, 5))), 109 | ("ipv", np.int32, ((0, 1), (2, 3), (4, 5))), 110 | ("t", str, "foo"), 111 | ("tv", str, ("foo", "bar")), 112 | ], 113 | ) 114 | def test_write_read_numpy_versions(temp_file_1_name, ktype, dtype, value): 115 | npy_value = np.array(value).astype(dtype) 116 | with io_open(temp_file_1_name, mode="w", header=False) as outp: 117 | outp.write(npy_value, ktype) 118 | with io_open(temp_file_1_name, header=False) as inp: 119 | act_value = inp.read(ktype) 120 | if ktype in ("b", "bpv"): 121 | assert np.allclose(value, act_value) 122 | else: 123 | assert value == act_value 124 | 125 | 126 | @pytest.mark.parametrize( 127 | "ktype,value", 128 | [ 129 | ("bv", ["a", 2, 3]), 130 | ("bv", "abc"), 131 | ("bv", [[1, 2]]), 132 | ("fv", np.arange(3, dtype=np.float64)), # downcast not ok 133 | ("bm", [["a", 2]]), 134 | ("bm", [0]), 135 | ("fm", np.random.random((10, 1)).astype(np.float64)), 136 | ("t", 1), 137 | ("t", []), 138 | ("t", "was I"), 139 | ("tv", ["a", 1]), 140 | ("tv", ("it's", "me DIO")), 141 | ("tv", "foobar"), 142 | ("i", "zimble"), 143 | ("iv", 1), 144 | ("ivv", [[[1]]]), 145 | ("ipv", ((1, 2), (3,))), 146 | ("d", 1 + 1j), 147 | ("b", "akljdal"), 148 | ("bpv", ((1,), (2, 3))), 149 | ], 150 | ) 151 | @pytest.mark.parametrize("binary", [True, False]) 152 | def test_write_invalid(temp_file_1_name, ktype, value, binary): 153 | outp = io_open(temp_file_1_name, mode="w") 154 | with pytest.raises(Exception): 155 | outp.write(value, ktype, write_binary=binary) 156 | -------------------------------------------------------------------------------- /tests/python/test_eval_commands.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import pydrobert.kaldi.io as kaldi_io 16 | 17 | from pydrobert.kaldi.eval import command_line 18 | 19 | 20 | def test_compute_error_rate(temp_file_1_name, temp_file_2_name, temp_file_3_name): 21 | with kaldi_io.open("ark:" + temp_file_1_name, "tv", "w") as ref_writer: 22 | ref_writer.write("A", ("lorem", "ipsum", "dolor", "sit", "amet")) 23 | ref_writer.write("B", ("consectetur", "adipiscing", "elit")) 24 | with kaldi_io.open("ark:" + temp_file_2_name, "tv", "w") as hyp_writer: 25 | hyp_writer.write("A", ("laura", "ipsum", "dollars", "sit", "down", "amet")) 26 | hyp_writer.write("B", ("consecutive", "elite")) 27 | # A : lorem -> laura, dolor -> dollars, -> down 28 | # B : consectetur -> consecutive, adipiscing -> , elit -> elite 29 | # with insertions = 6 / 8 30 | # without insertions = 5 / 8 31 | ret_code = command_line.compute_error_rate( 32 | ["ark:" + temp_file_1_name, "ark:" + temp_file_2_name, temp_file_3_name] 33 | ) 34 | assert ret_code == 0 35 | with open(temp_file_3_name) as out_file_reader: 36 | out_text = out_file_reader.read() 37 | assert "Error rate: 75.00%" in out_text 38 | ret_code = command_line.compute_error_rate( 39 | [ 40 | "ark:" + temp_file_1_name, 41 | "ark:" + temp_file_2_name, 42 | temp_file_3_name, 43 | "--include-inserts-in-cost=false", 44 | "--report-accuracy=true", 45 | ] 46 | ) 47 | assert ret_code == 0 48 | with open(temp_file_3_name) as out_file_reader: 49 | out_text = out_file_reader.read() 50 | assert "Accuracy: {:.2f}%".format((1 - 5 / 8) * 100) in out_text 51 | -------------------------------------------------------------------------------- /tests/python/test_eval_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytests for `pydrobert.kaldi.eval.util`""" 16 | 17 | import pydrobert.kaldi.eval as kaldi_eval 18 | 19 | 20 | def test_edit_distance(): 21 | # An example from wikipedia. Thanks Wiki! 22 | ref = "kitten" 23 | hyp = "sitting" 24 | assert kaldi_eval.util.edit_distance(ref, hyp) == 3 25 | _, inserts, deletes, subs, totals = kaldi_eval.util.edit_distance( 26 | ref, hyp, return_tables=True 27 | ) 28 | assert inserts == {"g": 1} 29 | assert deletes == dict() 30 | assert subs == {"k": 1, "e": 1} 31 | assert totals == {"k": 1, "i": 1, "t": 2, "e": 1, "n": 1} 32 | dist, inserts, deletes, subs, totals = kaldi_eval.util.edit_distance( 33 | ref, hyp, insertion_cost=0, substitution_cost=2, return_tables=True 34 | ) 35 | assert dist == 2 36 | assert inserts == {"s": 1, "i": 1, "g": 1} 37 | assert deletes == {"k": 1, "e": 1} 38 | assert subs == dict() 39 | assert totals == {"k": 1, "i": 1, "t": 2, "e": 1, "n": 1} 40 | -------------------------------------------------------------------------------- /tests/python/test_feat_commands.py: -------------------------------------------------------------------------------- 1 | # Copyright 2018 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytests for `pydrobert.kaldi.feats.command_line`""" 16 | 17 | import numpy as np 18 | 19 | from pydrobert.kaldi.feat import command_line 20 | from pydrobert.kaldi.io import open as kaldi_open 21 | 22 | 23 | def test_normalize_feat_lens(temp_file_1_name, temp_file_2_name, temp_file_3_name): 24 | feats_a = np.random.random((10, 4)) 25 | feats_b = np.random.random((5, 4)) 26 | feats_c = np.random.random((4, 4)) 27 | with kaldi_open("ark:" + temp_file_1_name, "dm", "w") as feats_in_writer: 28 | feats_in_writer.write("A", feats_a) 29 | feats_in_writer.write("B", feats_b) 30 | feats_in_writer.write("C", feats_c) 31 | with kaldi_open("ark:" + temp_file_2_name, "i", "w") as len_in_writer: 32 | len_in_writer.write("A", 9) 33 | len_in_writer.write("B", 7) 34 | len_in_writer.write("C", 4) 35 | ret_code = command_line.normalize_feat_lens( 36 | [ 37 | "ark:" + temp_file_1_name, 38 | "ark:" + temp_file_2_name, 39 | "ark:" + temp_file_3_name, 40 | "--type=dm", 41 | "--pad-mode=zero", 42 | ] 43 | ) 44 | assert ret_code == 0 45 | with kaldi_open("ark:" + temp_file_3_name, "dm") as feats_out_reader: 46 | out_a = next(feats_out_reader) 47 | out_b = next(feats_out_reader) 48 | out_c = next(feats_out_reader) 49 | assert out_a.shape == (9, 4) 50 | assert np.allclose(out_a, feats_a[:9]) 51 | assert out_b.shape == (7, 4) 52 | assert np.allclose(out_b[:5], feats_b) 53 | assert np.allclose(out_b[5:], 0) 54 | assert out_c.shape == (4, 4) 55 | assert np.allclose(out_c, feats_c) 56 | ret_code = command_line.normalize_feat_lens( 57 | [ 58 | "ark:" + temp_file_1_name, 59 | "ark:" + temp_file_2_name, 60 | "ark:" + temp_file_3_name, 61 | "--type=dm", 62 | "--tolerance=1", 63 | "--strict=true", 64 | ] 65 | ) 66 | assert ret_code == 1 67 | -------------------------------------------------------------------------------- /tests/python/test_io_commands.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytests for `pydrobert.kaldi.io.command_line`""" 16 | 17 | import os 18 | import pickle 19 | 20 | import numpy as np 21 | import pytest 22 | import pydrobert.kaldi.io.command_line as command_line 23 | 24 | 25 | from pydrobert.kaldi.io.util import infer_kaldi_data_type 26 | from pydrobert.kaldi.io import open as kaldi_open 27 | 28 | 29 | @pytest.mark.parametrize( 30 | "values", 31 | [ 32 | [ 33 | np.array([1, 2, 3], dtype=np.float32), 34 | np.array([4], dtype=np.float32), 35 | np.array([], dtype=np.float32), 36 | ], 37 | [np.random.random((100, 20))], 38 | ["foo", "bar", "baz"], 39 | [("foo", "bar"), ("baz",)], 40 | [], 41 | ], 42 | ) 43 | def test_write_pickle_to_table(values, temp_file_1_name, temp_file_2_name): 44 | if len(values): 45 | kaldi_dtype = infer_kaldi_data_type(values[0]).value 46 | else: 47 | kaldi_dtype = "bm" 48 | with open(temp_file_1_name, "wb") as pickle_file: 49 | for num, value in enumerate(values): 50 | pickle.dump((str(num), value), pickle_file) 51 | ret_code = command_line.write_pickle_to_table( 52 | [temp_file_1_name, "ark:" + temp_file_2_name, "-o", kaldi_dtype] 53 | ) 54 | assert ret_code == 0 55 | kaldi_reader = kaldi_open("ark:" + temp_file_2_name, kaldi_dtype, "r") 56 | num_entries = 0 57 | for key, value in kaldi_reader.items(): 58 | num_entries = int(key) + 1 59 | try: 60 | values[num_entries - 1].dtype 61 | assert np.allclose(value, values[num_entries - 1]) 62 | except AttributeError: 63 | assert value == values[num_entries - 1] 64 | assert num_entries == len(values) 65 | 66 | 67 | @pytest.mark.parametrize( 68 | "values", 69 | [ 70 | [ 71 | np.array([1, 2, 3], dtype=np.float32), 72 | np.array([4], dtype=np.float32), 73 | np.array([], dtype=np.float32), 74 | ], 75 | [np.random.random((100, 20))], 76 | ["foo", "bar", "baz"], 77 | [("foo", "bar"), ("baz",)], 78 | [], 79 | ], 80 | ) 81 | def test_write_table_to_pickle(values, temp_file_1_name, temp_file_2_name): 82 | if len(values): 83 | kaldi_dtype = infer_kaldi_data_type(values[0]).value 84 | else: 85 | kaldi_dtype = "bm" 86 | with kaldi_open("ark:" + temp_file_1_name, kaldi_dtype, "w") as writer: 87 | for num, value in enumerate(values): 88 | writer.write(str(num), value) 89 | ret_code = command_line.write_table_to_pickle( 90 | ["ark:" + temp_file_1_name, temp_file_2_name, "-i", kaldi_dtype] 91 | ) 92 | assert ret_code == 0 93 | num_entries = 0 94 | pickle_file = open(temp_file_2_name, "rb") 95 | num_entries = 0 96 | try: 97 | while True: 98 | key, value = pickle.load(pickle_file) 99 | num_entries = int(key) + 1 100 | try: 101 | values[num_entries - 1].dtype 102 | assert np.allclose(value, values[num_entries - 1]) 103 | except AttributeError: 104 | assert value == values[num_entries - 1] 105 | except EOFError: 106 | pass 107 | assert num_entries == len(values) 108 | 109 | 110 | @pytest.mark.pytorch 111 | def test_write_table_to_torch_dir(temp_dir): 112 | import torch 113 | 114 | out_dir = os.path.join(temp_dir, "test_write_table_to_torch_dir") 115 | os.makedirs(out_dir) 116 | fname = os.path.join(out_dir, "table.ark") 117 | a = torch.rand(10, 4) 118 | b = torch.rand(5, 2) 119 | c = torch.rand(5, 100) 120 | with kaldi_open(f"ark:{fname}", "bm", mode="w") as table: 121 | table.write("a", a.numpy()) 122 | table.write("b", b.numpy()) 123 | table.write("c", c.numpy()) 124 | assert not command_line.write_table_to_torch_dir([f"ark:{fname}", out_dir]) 125 | assert torch.allclose(c, torch.load(os.path.join(out_dir, "c.pt"))) 126 | assert torch.allclose(b, torch.load(os.path.join(out_dir, "b.pt"))) 127 | assert torch.allclose(a, torch.load(os.path.join(out_dir, "a.pt"))) 128 | 129 | with open(fname, "w") as table: 130 | table.write("a T\n") 131 | assert not command_line.write_table_to_torch_dir( 132 | [f"ark,t:{fname}", "-i", "B", out_dir] 133 | ) 134 | assert torch.load(os.path.join(out_dir, "a.pt")) 135 | 136 | 137 | @pytest.mark.pytorch 138 | def test_write_torch_dir_to_table(temp_dir): 139 | import torch 140 | 141 | in_dir = os.path.join(temp_dir, "test_write_torch_dir_to_table") 142 | rwspecifier = "ark:" + os.path.join(in_dir, "table.ark") 143 | os.makedirs(in_dir) 144 | a = torch.rand(5, 4) 145 | b = torch.rand(4, 3) 146 | c = torch.rand(3, 2) 147 | torch.save(a, os.path.join(in_dir, "a.pt")) 148 | torch.save(b, os.path.join(in_dir, "b.pt")) 149 | torch.save(c, os.path.join(in_dir, "c.pt")) 150 | assert not command_line.write_torch_dir_to_table([in_dir, rwspecifier]) 151 | with kaldi_open(rwspecifier, "bm") as table: 152 | keys, vals = zip(*table.items()) 153 | keys = tuple(keys) 154 | vals = tuple(vals) 155 | assert keys == ("a", "b", "c") 156 | assert len(vals) == 3 157 | for dval, tval in zip((a, b, c), vals): 158 | assert torch.allclose(dval, torch.from_numpy(tval)) 159 | -------------------------------------------------------------------------------- /tests/python/test_logging.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Sean Robertson 2 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """Pytests for `pydrobert.kaldi.logging`""" 16 | 17 | from io import StringIO 18 | import logging 19 | 20 | import numpy as np 21 | import pytest 22 | 23 | from builtins import chr 24 | 25 | from pydrobert.kaldi import io 26 | from pydrobert.kaldi._internal import VerboseLog as verbose_log # type: ignore 27 | from pydrobert.kaldi.logging import KaldiLogger 28 | from pydrobert.kaldi.logging import deregister_logger_for_kaldi 29 | from pydrobert.kaldi.logging import register_logger_for_kaldi 30 | 31 | 32 | @pytest.fixture 33 | def kaldi_logger(): 34 | logger_name = "".join(chr(x + 97) for x in np.random.choice(26, 100)) 35 | old_class = logging.getLoggerClass() 36 | logging.setLoggerClass(KaldiLogger) 37 | ret_logger = logging.getLogger(logger_name) 38 | logging.setLoggerClass(old_class) 39 | s_stream = StringIO() 40 | ret_logger.addHandler(logging.StreamHandler(s_stream)) 41 | register_logger_for_kaldi(logger_name) 42 | yield ret_logger 43 | deregister_logger_for_kaldi(logger_name) 44 | for handler in ret_logger.handlers: 45 | ret_logger.removeHandler(handler) 46 | 47 | 48 | @pytest.fixture 49 | def registered_regular_logger(): 50 | logger_name = "".join(chr(x + 97) for x in np.random.choice(26, 100)) 51 | ret_logger = logging.getLogger(logger_name) 52 | s_stream = StringIO() 53 | ret_logger.addHandler(logging.StreamHandler(s_stream)) 54 | register_logger_for_kaldi(logger_name) 55 | yield ret_logger 56 | deregister_logger_for_kaldi(logger_name) 57 | ret_logger.removeHandler(s_stream) 58 | 59 | 60 | def test_kaldi_logger_basic_write(kaldi_logger): 61 | kaldi_logger.setLevel(logging.WARNING) 62 | s_stream = kaldi_logger.handlers[-1].stream 63 | assert not s_stream.tell() 64 | test_string = "I'm a dude playing a dude disguised as another dude" 65 | kaldi_logger.warning(test_string) 66 | assert s_stream.tell() 67 | assert test_string + "\n" == s_stream.getvalue() 68 | kaldi_logger.info(test_string) 69 | assert test_string + "\n" == s_stream.getvalue() 70 | 71 | 72 | def test_callback_delivers_correct_messages(kaldi_logger, registered_regular_logger): 73 | kaldi_logger.setLevel(logging.INFO) 74 | k_stream = kaldi_logger.handlers[-1].stream 75 | registered_regular_logger.setLevel(logging.WARNING) 76 | r_stream = registered_regular_logger.handlers[-1].stream 77 | verbose_log(-1, "everyone gets this") 78 | verbose_log(0, "not r_stream, here") 79 | verbose_log(1, "noone gets this") 80 | assert "everyone gets this\nnot r_stream, here\n" == k_stream.getvalue() 81 | assert "everyone gets this\n" == r_stream.getvalue() 82 | 83 | 84 | def test_do_not_callback_unregistered(kaldi_logger): 85 | kaldi_logger.setLevel(logging.WARNING) 86 | verbose_log(-1, "should see this") 87 | deregister_logger_for_kaldi(kaldi_logger.name) 88 | verbose_log(-1, "should not see this") 89 | register_logger_for_kaldi("bingobangobongo") 90 | verbose_log(-1, "still nothing") 91 | register_logger_for_kaldi(kaldi_logger.name) 92 | verbose_log(-1, "but see this") 93 | s_stream = kaldi_logger.handlers[-1].stream 94 | assert "should see this\nbut see this\n" == s_stream.getvalue() 95 | 96 | 97 | def elicit_warning(filename, threaded=False): 98 | # helper to elicit a natural warning from kaldi 99 | writer = io.open("ark,t:{}".format(filename), "bv", "w") 100 | writer.write("zz", [float("inf")]) 101 | writer.close() 102 | reader = io.open("ark,t{}:{}".format(",bg" if threaded else "", filename), "bv") 103 | next(reader) 104 | reader.close() 105 | 106 | 107 | @pytest.mark.parametrize("threaded", [False]) 108 | def test_elicit_kaldi_warning(kaldi_logger, temp_file_1_name, threaded): 109 | s_stream = kaldi_logger.handlers[-1].stream 110 | assert not s_stream.tell() 111 | elicit_warning(temp_file_1_name, threaded) 112 | assert s_stream.tell() 113 | assert "Reading infinite value into vector.\n" == s_stream.getvalue() 114 | 115 | 116 | def test_log_source_is_appropriate(kaldi_logger, temp_file_1_name): 117 | handler = kaldi_logger.handlers[-1] 118 | s_stream = handler.stream 119 | handler.setFormatter(logging.Formatter("%(filename)s: %(message)s")) 120 | assert not s_stream.tell() 121 | kaldi_logger.warning("pokeymans") 122 | assert "test_logging.py" in s_stream.getvalue() 123 | assert "kaldi-vector.cc" not in s_stream.getvalue() 124 | s_stream.seek(0) 125 | elicit_warning(temp_file_1_name) 126 | assert "kaldi-vector.cc" in s_stream.getvalue() 127 | assert "__init__.py" not in s_stream.getvalue() 128 | 129 | 130 | def test_python_error_doesnt_segfault(registered_regular_logger, temp_file_1_name): 131 | def _raise_exception(*args, **kwargs): 132 | raise Exception() 133 | 134 | registered_regular_logger.makeRecord = _raise_exception 135 | with pytest.raises(Exception): 136 | registered_regular_logger.warning("foo") 137 | with pytest.raises(Exception): 138 | elicit_warning(temp_file_1_name) 139 | -------------------------------------------------------------------------------- /tests/python/test_metadata.py: -------------------------------------------------------------------------------- 1 | """Test package metadata""" 2 | 3 | import pydrobert.kaldi 4 | 5 | 6 | def test_version(): 7 | assert pydrobert.kaldi.__version__ != "inplace" 8 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # tox (https://tox.readthedocs.io/) is a tool for running tests 2 | # in multiple virtualenvs. This configuration file will run the 3 | # test suite on all supported python versions. To use it, "pip install tox" 4 | # and then run "tox" from this directory. 5 | 6 | [tox] 7 | envlist = 8 | 3.13 9 | numpy1 10 | 3.12 11 | 3.11 12 | 3.10 13 | 3.9 14 | 15 | [testenv] 16 | deps = 17 | pytest 18 | torch >=1.8 19 | numpy1: numpy <2 20 | 21 | commands = 22 | write-table-to-pickle --help 23 | write-pickle-to-table --help 24 | compute-error-rate --help 25 | normalize-feat-lens --help 26 | write-table-to-torch-dir --help 27 | write-torch-dir-to-table --help 28 | pytest 29 | 30 | [gh] 31 | python = 32 | 3.13 = 3.13 33 | 3.12 = 3.12, numpy1 34 | 3.11 = 3.11 35 | 3.10 = 3.10 36 | 3.9 = 3.9 37 | --------------------------------------------------------------------------------