├── .clang-format ├── .flake8 ├── .github └── workflows │ ├── build-doc.yml │ ├── macos-cpu-wheels.yml │ ├── publish_to_pypi.yml │ ├── run-tests-macos-cpu.yml │ ├── run-tests-ubuntu-cpu.yml │ ├── run-tests-ubuntu-cuda.yml │ ├── run-tests-windows-cpu.yml │ ├── run-tests-windows-cuda.yml │ ├── style_check.yml │ ├── test-wheels.yml │ ├── ubuntu-arm64-cpu-wheels.yml │ ├── ubuntu-cpu-wheels.yml │ ├── ubuntu-cuda-wheels.yml │ └── windows-x64-cpu-wheels.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CMakeLists.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── cmake ├── Modules │ ├── FetchContent.cmake │ ├── FetchContent │ │ └── CMakeLists.cmake.in │ └── README.md ├── __init__.py ├── cmake_extension.py ├── googletest.cmake ├── kaldifeatConfig.cmake.in ├── kaldifeatConfigVersion.cmake.in ├── pybind11.cmake └── torch.cmake ├── doc ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ └── .gitkeep │ ├── conf.py │ ├── images │ ├── README.md │ ├── cuda_ge_10.1-orange.svg │ ├── os-green.svg │ ├── python_ge_3.6-blue.svg │ └── pytorch_ge_1.5.0-green.svg │ ├── index.rst │ ├── installation │ ├── faq.rst │ ├── from_source.rst │ ├── from_wheels.rst │ └── index.rst │ ├── intro.rst │ └── usage │ ├── code │ ├── compute-fbank-feats-help.txt │ ├── fbank_options-1.txt │ └── test_fbank_options.py │ ├── fbank.rst │ ├── fbank_options.rst │ ├── index.rst │ └── online_fbank.rst ├── get_version.py ├── kaldifeat ├── CMakeLists.txt ├── csrc │ ├── CMakeLists.txt │ ├── CPPLINT.cfg │ ├── feature-common-inl.h │ ├── feature-common.h │ ├── feature-fbank.cc │ ├── feature-fbank.h │ ├── feature-functions.cc │ ├── feature-functions.h │ ├── feature-mfcc.cc │ ├── feature-mfcc.h │ ├── feature-plp.cc │ ├── feature-plp.h │ ├── feature-spectrogram.cc │ ├── feature-spectrogram.h │ ├── feature-window-test.cc │ ├── feature-window.cc │ ├── feature-window.h │ ├── generate-whisper-melbank-v3.py │ ├── generate-whisper-melbank.py │ ├── log.h │ ├── matrix-functions.cc │ ├── matrix-functions.h │ ├── mel-computations.cc │ ├── mel-computations.h │ ├── online-feature-itf.h │ ├── online-feature-test.cc │ ├── online-feature.cc │ ├── online-feature.h │ ├── pitch-functions.h │ ├── test_kaldifeat.cc │ ├── whisper-fbank.cc │ ├── whisper-fbank.h │ ├── whisper-mel-bank.h │ └── whisper-v3-mel-bank.h └── python │ ├── CMakeLists.txt │ ├── csrc │ ├── CMakeLists.txt │ ├── CPPLINT.cfg │ ├── feature-fbank.cc │ ├── feature-fbank.h │ ├── feature-mfcc.cc │ ├── feature-mfcc.h │ ├── feature-plp.cc │ ├── feature-plp.h │ ├── feature-spectrogram.cc │ ├── feature-spectrogram.h │ ├── feature-window.cc │ ├── feature-window.h │ ├── kaldifeat.cc │ ├── kaldifeat.h │ ├── mel-computations.cc │ ├── mel-computations.h │ ├── online-feature.cc │ ├── online-feature.h │ ├── utils.cc │ ├── utils.h │ ├── whisper-fbank.cc │ └── whisper-fbank.h │ ├── kaldifeat │ ├── __init__.py │ ├── fbank.py │ ├── mfcc.py │ ├── offline_feature.py │ ├── online_feature.py │ ├── plp.py │ ├── spectrogram.py │ ├── torch_version.py.in │ └── whisper_fbank.py │ └── tests │ ├── CMakeLists.txt │ ├── Makefile │ ├── __init__.py │ ├── test_data │ ├── run.sh │ ├── test-40-no-snip-edges.txt │ ├── test-40.txt │ ├── test-htk.txt │ ├── test-mfcc-no-snip-edges.txt │ ├── test-mfcc.txt │ ├── test-plp-htk-10-ceps.txt │ ├── test-plp-no-snip-edges.txt │ ├── test-plp.txt │ ├── test-spectrogram-no-snip-edges.txt │ ├── test-spectrogram.txt │ ├── test-with-energy.txt │ ├── test.scp │ ├── test.txt │ ├── test.wav │ └── test2.wav │ ├── test_fbank.py │ ├── test_fbank_options.py │ ├── test_frame_extraction_options.py │ ├── test_mel_bank_options.py │ ├── test_mfcc.py │ ├── test_mfcc_options.py │ ├── test_plp.py │ ├── test_plp_options.py │ ├── test_spectrogram.py │ ├── test_spectrogram_options.py │ ├── test_whisper_fbank.py │ ├── test_whisper_v3_fbank.py │ └── utils.py ├── requirements.txt ├── scripts ├── build_conda.sh ├── build_conda_cpu.sh ├── check_style_cpplint.sh ├── conda-cpu │ ├── cpuonly │ │ └── meta.yaml │ └── kaldifeat │ │ └── meta.yaml ├── conda │ └── kaldifeat │ │ ├── build.sh │ │ └── meta.yaml ├── github_actions │ ├── build-ubuntu-cpu-arm64.sh │ ├── build-ubuntu-cpu.sh │ ├── build-ubuntu-cuda.sh │ ├── generate_build_matrix.py │ ├── install_cuda.sh │ ├── install_cudnn.sh │ ├── install_torch.sh │ └── run-nightly-build.py └── utils.sh └── setup.py /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | --- 4 | Language: Cpp 5 | Cpp11BracedListStyle: true 6 | Standard: Cpp11 7 | DerivePointerAlignment: false 8 | PointerAlignment: Right 9 | --- 10 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | 4 | exclude = 5 | .git, 6 | doc, 7 | build, 8 | build_release, 9 | cmake/cmake_extension.py, 10 | kaldifeat/python/kaldifeat/__init__.py 11 | 12 | ignore = 13 | E402 14 | -------------------------------------------------------------------------------- /.github/workflows/build-doc.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # refer to https://github.com/actions/starter-workflows/pull/47/files 18 | 19 | # You can access it at https://csukuangfj.github.io/kaldifeat 20 | name: Generate doc 21 | on: 22 | push: 23 | branches: 24 | - master 25 | - doc 26 | workflow_dispatch: 27 | 28 | jobs: 29 | build-doc: 30 | runs-on: ${{ matrix.os }} 31 | strategy: 32 | fail-fast: false 33 | matrix: 34 | os: [ubuntu-latest] 35 | python-version: [3.8] 36 | steps: 37 | # refer to https://github.com/actions/checkout 38 | - uses: actions/checkout@v4 39 | with: 40 | fetch-depth: 0 41 | 42 | - name: Setup Python ${{ matrix.python-version }} 43 | uses: actions/setup-python@v2 44 | with: 45 | python-version: ${{ matrix.python-version }} 46 | 47 | - name: Display Python version 48 | run: python -c "import sys; print(sys.version)" 49 | 50 | - name: Update wheels 51 | shell: bash 52 | run: | 53 | export KALDIFEAT_DIR=$PWD 54 | ls -lh $KALDIFEAT_DIR 55 | 56 | export GIT_LFS_SKIP_SMUDGE=1 57 | export GIT_CLONE_PROTECTION_ACTIVE=false 58 | git clone https://huggingface.co/csukuangfj/kaldifeat huggingface 59 | cd huggingface 60 | 61 | ./run.sh 62 | 63 | - name: Build doc 64 | shell: bash 65 | run: | 66 | cd doc 67 | git status 68 | python3 -m pip install -r ./requirements.txt 69 | make html 70 | cp source/cpu.html build/html/ 71 | cp source/cuda.html build/html/ 72 | cp source/cpu-cn.html build/html/ 73 | cp source/cuda-cn.html build/html/ 74 | touch build/html/.nojekyll 75 | 76 | - name: Deploy 77 | uses: peaceiris/actions-gh-pages@v3 78 | with: 79 | github_token: ${{ secrets.GITHUB_TOKEN }} 80 | publish_dir: ./doc/build/html 81 | publish_branch: gh-pages 82 | -------------------------------------------------------------------------------- /.github/workflows/macos-cpu-wheels.yml: -------------------------------------------------------------------------------- 1 | name: build-wheels-cpu-macos 2 | 3 | on: 4 | push: 5 | branches: 6 | - fix-wheel-2 7 | tags: 8 | - '*' 9 | workflow_dispatch: 10 | 11 | concurrency: 12 | group: build-wheels-cpu-macos-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | generate_build_matrix: 17 | # see https://github.com/pytorch/pytorch/pull/50633 18 | runs-on: ubuntu-latest 19 | outputs: 20 | matrix: ${{ steps.set-matrix.outputs.matrix }} 21 | steps: 22 | - uses: actions/checkout@v4 23 | with: 24 | fetch-depth: 0 25 | - name: Generating build matrix 26 | id: set-matrix 27 | run: | 28 | # outputting for debugging purposes 29 | # python ./scripts/github_actions/generate_build_matrix.py --for-macos 30 | # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos) 31 | 32 | python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch 33 | MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-macos --test-only-latest-torch) 34 | echo "::set-output name=matrix::${MATRIX}" 35 | 36 | build_wheels_macos_cpu: 37 | needs: generate_build_matrix 38 | name: ${{ matrix.torch }} ${{ matrix.python-version }} 39 | runs-on: macos-14 40 | strategy: 41 | fail-fast: false 42 | matrix: 43 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 44 | 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | fetch-depth: 0 49 | 50 | - name: Setup Python 51 | uses: actions/setup-python@v5 52 | with: 53 | python-version: ${{ matrix.python-version }} 54 | 55 | - name: Install dependencies 56 | shell: bash 57 | run: | 58 | pip install -q torch==${{ matrix.torch}} cmake numpy wheel>=0.40.0 twine setuptools 59 | 60 | - name: Build wheel 61 | shell: bash 62 | run: | 63 | python3 setup.py bdist_wheel 64 | mkdir wheelhouse 65 | cp -v dist/* wheelhouse 66 | 67 | - name: Display wheels (before fix) 68 | shell: bash 69 | run: | 70 | ls -lh ./wheelhouse/ 71 | 72 | - name: Fix wheel platform tag 73 | run: | 74 | # See https://github.com/glencoesoftware/zeroc-ice-py-macos-x86_64/pull/3/files 75 | # See: 76 | # * https://github.com/pypa/wheel/issues/406 77 | python -m wheel tags \ 78 | --platform-tag=macosx_11_0_arm64 \ 79 | --remove wheelhouse/*.whl 80 | 81 | - name: Display wheels (after fix) 82 | shell: bash 83 | run: | 84 | ls -lh ./wheelhouse/ 85 | 86 | - name: Upload Wheel 87 | uses: actions/upload-artifact@v4 88 | with: 89 | name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-macos-latest-cpu 90 | path: wheelhouse/*.whl 91 | 92 | # https://huggingface.co/docs/hub/spaces-github-actions 93 | - name: Publish to huggingface 94 | if: github.repository_owner == 'csukuangfj' 95 | env: 96 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 97 | uses: nick-fields/retry@v2 98 | with: 99 | max_attempts: 20 100 | timeout_seconds: 200 101 | shell: bash 102 | command: | 103 | git config --global user.email "csukuangfj@gmail.com" 104 | git config --global user.name "Fangjun Kuang" 105 | 106 | rm -rf huggingface 107 | export GIT_LFS_SKIP_SMUDGE=1 108 | 109 | git clone https://huggingface.co/csukuangfj/kaldifeat huggingface 110 | cd huggingface 111 | git pull 112 | 113 | d=cpu/1.25.5.dev20241029/macos 114 | mkdir -p $d 115 | cp -v ../wheelhouse/*.whl ./$d 116 | git status 117 | git lfs track "*.whl" 118 | git add . 119 | git commit -m "upload macos wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}" 120 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main 121 | -------------------------------------------------------------------------------- /.github/workflows/publish_to_pypi.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | name: Publish to PyPI 18 | 19 | on: 20 | push: 21 | tags: 22 | - '*' 23 | workflow_dispatch: 24 | 25 | jobs: 26 | pypi: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v4 30 | with: 31 | fetch-depth: 0 32 | 33 | - name: Setup Python 34 | uses: actions/setup-python@v2 35 | with: 36 | python-version: 3.8 37 | 38 | - name: Install Python dependencies 39 | shell: bash 40 | run: | 41 | python3 -m pip install --upgrade pip 42 | python3 -m pip install wheel twine setuptools 43 | python3 -m pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html 44 | 45 | - name: Build 46 | shell: bash 47 | run: | 48 | python3 setup.py sdist 49 | ls -l dist/* 50 | 51 | - name: Publish wheels to PyPI 52 | env: 53 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 54 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 55 | run: | 56 | twine upload dist/kaldifeat-*.tar.gz 57 | -------------------------------------------------------------------------------- /.github/workflows/run-tests-macos-cpu.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | name: Run tests macos cpu 18 | 19 | on: 20 | push: 21 | branches: 22 | - master 23 | 24 | pull_request: 25 | branches: 26 | - master 27 | 28 | jobs: 29 | generate_build_matrix: 30 | # see https://github.com/pytorch/pytorch/pull/50633 31 | runs-on: ubuntu-latest 32 | outputs: 33 | matrix: ${{ steps.set-matrix.outputs.matrix }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Generating build matrix 39 | id: set-matrix 40 | run: | 41 | # outputting for debugging purposes 42 | python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch 43 | MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch) 44 | echo "::set-output name=matrix::${MATRIX}" 45 | 46 | run_tests_macos_cpu: 47 | needs: generate_build_matrix 48 | runs-on: macos-latest 49 | strategy: 50 | fail-fast: false 51 | matrix: 52 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 53 | 54 | steps: 55 | - uses: actions/checkout@v4 56 | with: 57 | fetch-depth: 0 58 | 59 | - name: Setup Python 60 | uses: actions/setup-python@v2 61 | with: 62 | python-version: ${{ matrix.python-version }} 63 | 64 | - name: Install PyTorch ${{ matrix.torch }} 65 | shell: bash 66 | run: | 67 | python3 -m pip install -qq --upgrade pip 68 | python3 -m pip install -qq wheel twine typing_extensions soundfile numpy 69 | python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }} -f https://download.pytorch.org/whl/torch/ 70 | 71 | python3 -c "import torch; print('torch version:', torch.__version__)" 72 | 73 | - name: Build 74 | shell: bash 75 | run: | 76 | mkdir build_release 77 | cd build_release 78 | cmake -DCMAKE_CXX_STANDARD=17 .. 79 | make VERBOSE=1 -j3 80 | 81 | - name: Run tests 82 | shell: bash 83 | run: | 84 | cd build_release 85 | ctest --output-on-failure 86 | -------------------------------------------------------------------------------- /.github/workflows/run-tests-ubuntu-cpu.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | name: Run tests ubuntu cpu 18 | 19 | on: 20 | push: 21 | branches: 22 | - master 23 | 24 | pull_request: 25 | branches: 26 | - master 27 | 28 | jobs: 29 | generate_build_matrix: 30 | # see https://github.com/pytorch/pytorch/pull/50633 31 | runs-on: ubuntu-latest 32 | outputs: 33 | matrix: ${{ steps.set-matrix.outputs.matrix }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Generating build matrix 39 | id: set-matrix 40 | run: | 41 | # outputting for debugging purposes 42 | python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch 43 | MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch) 44 | echo "::set-output name=matrix::${MATRIX}" 45 | 46 | run_tests_ubuntu_cpu: 47 | needs: generate_build_matrix 48 | runs-on: ubuntu-18.04 49 | strategy: 50 | fail-fast: false 51 | matrix: 52 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 53 | 54 | steps: 55 | - uses: actions/checkout@v4 56 | with: 57 | fetch-depth: 0 58 | 59 | - name: Setup Python 60 | uses: actions/setup-python@v2 61 | with: 62 | python-version: ${{ matrix.python-version }} 63 | 64 | - name: Install PyTorch ${{ matrix.torch }} 65 | shell: bash 66 | run: | 67 | sudo apt-get update 68 | sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg 69 | python3 -m pip install --upgrade pip 70 | python3 -m pip install wheel twine typing_extensions soundfile 71 | python3 -m pip install bs4 requests tqdm numpy 72 | python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || python3 -m pip install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/ 73 | 74 | python3 -c "import torch; print('torch version:', torch.__version__)" 75 | 76 | - name: Build 77 | shell: bash 78 | run: | 79 | mkdir build_release 80 | cd build_release 81 | cmake -DCMAKE_CXX_STANDARD=17 .. 82 | make VERBOSE=1 -j3 83 | 84 | - name: Run tests 85 | shell: bash 86 | run: | 87 | cd build_release 88 | ctest --output-on-failure 89 | -------------------------------------------------------------------------------- /.github/workflows/run-tests-ubuntu-cuda.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | name: Run tests ubuntu cuda 18 | 19 | on: 20 | push: 21 | branches: 22 | - master 23 | 24 | pull_request: 25 | branches: 26 | - master 27 | 28 | jobs: 29 | generate_build_matrix: 30 | # see https://github.com/pytorch/pytorch/pull/50633 31 | runs-on: ubuntu-latest 32 | outputs: 33 | matrix: ${{ steps.set-matrix.outputs.matrix }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Generating build matrix 39 | id: set-matrix 40 | run: | 41 | # outputting for debugging purposes 42 | python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch 43 | MATRIX=$(python scripts/github_actions/generate_build_matrix.py --enable-cuda --test-only-latest-torch) 44 | echo "::set-output name=matrix::${MATRIX}" 45 | 46 | run_tests_ubuntu_cuda: 47 | needs: generate_build_matrix 48 | runs-on: ubuntu-latest 49 | strategy: 50 | fail-fast: false 51 | matrix: 52 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 53 | 54 | steps: 55 | - uses: actions/checkout@v4 56 | with: 57 | fetch-depth: 0 58 | 59 | - name: Setup Python 60 | uses: actions/setup-python@v2 61 | with: 62 | python-version: ${{ matrix.python-version }} 63 | 64 | - name: Install CUDA Toolkit ${{ matrix.cuda }} 65 | shell: bash 66 | env: 67 | cuda: ${{ matrix.cuda }} 68 | run: | 69 | source ./scripts/github_actions/install_cuda.sh 70 | echo "CUDA_HOME=${CUDA_HOME}" >> $GITHUB_ENV 71 | echo "${CUDA_HOME}/bin" >> $GITHUB_PATH 72 | echo "LD_LIBRARY_PATH=${CUDA_HOME}/lib:${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}" >> $GITHUB_ENV 73 | 74 | - name: Display NVCC version 75 | run: | 76 | which nvcc 77 | nvcc --version 78 | 79 | - name: Install PyTorch ${{ matrix.torch }} 80 | env: 81 | cuda: ${{ matrix.cuda }} 82 | torch: ${{ matrix.torch }} 83 | shell: bash 84 | run: | 85 | sudo apt-get update 86 | sudo apt-get install -y libsndfile1-dev libsndfile1 ffmpeg 87 | python3 -m pip install --upgrade pip 88 | python3 -m pip install wheel twine typing_extensions soundfile 89 | python3 -m pip install bs4 requests tqdm numpy 90 | 91 | ./scripts/github_actions/install_torch.sh 92 | python3 -c "import torch; print('torch version:', torch.__version__)" 93 | 94 | - name: Download cudnn 8.0 95 | env: 96 | cuda: ${{ matrix.cuda }} 97 | run: | 98 | ./scripts/github_actions/install_cudnn.sh 99 | 100 | - name: Build 101 | shell: bash 102 | run: | 103 | mkdir build_release 104 | cd build_release 105 | cmake -DCMAKE_CXX_STANDARD=17 .. 106 | make VERBOSE=1 -j3 107 | 108 | - name: Run tests 109 | shell: bash 110 | run: | 111 | cd build_release 112 | ctest --output-on-failure 113 | -------------------------------------------------------------------------------- /.github/workflows/run-tests-windows-cpu.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | name: Run tests windows cpu 18 | 19 | on: 20 | push: 21 | branches: 22 | - master 23 | 24 | pull_request: 25 | branches: 26 | - master 27 | 28 | jobs: 29 | generate_build_matrix: 30 | # see https://github.com/pytorch/pytorch/pull/50633 31 | runs-on: ubuntu-latest 32 | outputs: 33 | matrix: ${{ steps.set-matrix.outputs.matrix }} 34 | steps: 35 | - uses: actions/checkout@v4 36 | with: 37 | fetch-depth: 0 38 | - name: Generating build matrix 39 | id: set-matrix 40 | run: | 41 | # outputting for debugging purposes 42 | python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch 43 | MATRIX=$(python scripts/github_actions/generate_build_matrix.py --test-only-latest-torch) 44 | echo "::set-output name=matrix::${MATRIX}" 45 | 46 | run_tests_windows_cpu: 47 | # see https://github.com/actions/virtual-environments/blob/win19/20210525.0/images/win/Windows2019-Readme.md 48 | needs: generate_build_matrix 49 | runs-on: windows-latest 50 | strategy: 51 | fail-fast: false 52 | matrix: 53 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 54 | 55 | steps: 56 | - uses: actions/checkout@v4 57 | with: 58 | fetch-depth: 0 59 | 60 | # see https://github.com/microsoft/setup-msbuild 61 | - name: Add msbuild to PATH 62 | uses: microsoft/setup-msbuild@v1.0.2 63 | 64 | - name: Setup Python ${{ matrix.python-version }} 65 | uses: actions/setup-python@v2 66 | with: 67 | python-version: ${{ matrix.python-version }} 68 | 69 | - name: Display Python version 70 | run: python -c "import sys; print(sys.version)" 71 | 72 | - name: Install PyTorch ${{ matrix.torch }} 73 | run: | 74 | pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip3 install -qq torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/torch/ 75 | pip3 install -qq wheel twine dataclasses numpy typing_extensions soundfile 76 | 77 | - name: Display CMake version 78 | run: | 79 | cmake --version 80 | cmake --help 81 | 82 | - name: Configure CMake 83 | shell: bash 84 | run: | 85 | mkdir build_release 86 | cd build_release 87 | cmake -DCMAKE_BUILD_TYPE=$BUILD_TYPE .. 88 | ls -lh 89 | 90 | - name: Build kaldifeat 91 | run: | 92 | cd build_release 93 | cmake --build -DCMAKE_CXX_STANDARD=17 . --target _kaldifeat --config Release 94 | 95 | - name: Display generated files 96 | shell: bash 97 | run: | 98 | cd build_release 99 | ls -lh lib/*/* 100 | 101 | - name: Build wheel 102 | shell: bash 103 | run: | 104 | python3 setup.py bdist_wheel 105 | ls -lh dist/ 106 | pip install ./dist/*.whl 107 | python3 -c "import kaldifeat; print(kaldifeat.__version__)" 108 | 109 | - name: Upload Wheel 110 | uses: actions/upload-artifact@v4 111 | with: 112 | name: python-${{ matrix.python-version }}-${{ matrix.os }}-cpu 113 | path: dist/*.whl 114 | 115 | - name: Build tests 116 | shell: bash 117 | run: | 118 | cd build_release 119 | cmake -DCMAKE_CXX_STANDARD=17 --build . --target ALL_BUILD --config Release 120 | ls -lh bin/*/* 121 | ctest -C Release --verbose --output-on-failure 122 | -------------------------------------------------------------------------------- /.github/workflows/style_check.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) 2 | 3 | # See ../../LICENSE for clarification regarding multiple authors 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | name: style_check 18 | 19 | on: 20 | push: 21 | branches: 22 | - master 23 | pull_request: 24 | branches: 25 | - master 26 | 27 | jobs: 28 | style_check: 29 | runs-on: ${{ matrix.os }} 30 | strategy: 31 | matrix: 32 | os: [ubuntu-latest, macos-latest] 33 | python-version: ["3.8"] 34 | fail-fast: false 35 | 36 | steps: 37 | - uses: actions/checkout@v4 38 | with: 39 | fetch-depth: 0 40 | 41 | - name: Setup Python ${{ matrix.python-version }} 42 | uses: actions/setup-python@v1 43 | with: 44 | python-version: ${{ matrix.python-version }} 45 | 46 | - name: Install Python dependencies 47 | run: | 48 | python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4 49 | # See https://github.com/psf/black/issues/2964 50 | # The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4 51 | 52 | - name: Run flake8 53 | shell: bash 54 | working-directory: ${{github.workspace}} 55 | run: | 56 | # stop the build if there are Python syntax errors or undefined names 57 | flake8 . --count --show-source --statistics 58 | flake8 . 59 | 60 | - name: Run black 61 | shell: bash 62 | working-directory: ${{github.workspace}} 63 | run: | 64 | black --check --diff . 65 | -------------------------------------------------------------------------------- /.github/workflows/test-wheels.yml: -------------------------------------------------------------------------------- 1 | name: Test pre-compiled wheels 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | torch_version: 6 | description: "torch version, e.g., 2.0.1" 7 | required: true 8 | kaldifeat_version: 9 | description: "kaldifeat version, e.g., 1.25.0.dev20230726" 10 | required: true 11 | 12 | jobs: 13 | Test_pre_compiled_wheels: 14 | name: ${{ matrix.os }} ${{ github.event.inputs.torch_version }} ${{ github.event.inputs.kaldifeat_version }} ${{ matrix.python-version }} 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest, windows-latest, macos-latest] 20 | python-version: ["3.8", "3.9", "3.10"] 21 | steps: 22 | # refer to https://github.com/actions/checkout 23 | - uses: actions/checkout@v4 24 | with: 25 | fetch-depth: 0 26 | 27 | - name: Setup Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | - name: Display Python version 33 | run: python -c "import sys; print(sys.version)" 34 | 35 | - name: Install dependencies 36 | shell: bash 37 | run: | 38 | pip install numpy 39 | 40 | - name: Install torch 41 | if: startsWith(matrix.os, 'macos') 42 | shell: bash 43 | run: | 44 | pip install torch==${{ github.event.inputs.torch_version }} 45 | 46 | - name: Install torch 47 | if: startsWith(matrix.os, 'ubuntu') || startsWith(matrix.os, 'windows') 48 | shell: bash 49 | run: | 50 | pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch_stable.html || pip install torch==${{ github.event.inputs.torch_version }}+cpu -f https://download.pytorch.org/whl/torch/ 51 | 52 | - name: Install kaldifeat 53 | shell: bash 54 | run: | 55 | pip install kaldifeat==${{ github.event.inputs.kaldifeat_version }}+cpu.torch${{ github.event.inputs.torch_version }} -f https://csukuangfj.github.io/kaldifeat/cpu.html 56 | 57 | - name: Run tests 58 | shell: bash 59 | run: | 60 | cd kaldifeat/python/tests 61 | 62 | python3 -c "import kaldifeat; print(kaldifeat.__file__)" 63 | python3 -c "import kaldifeat; print(kaldifeat.__version__)" 64 | 65 | python3 ./test_fbank_options.py 66 | 67 | python3 ./test_mfcc_options.py 68 | -------------------------------------------------------------------------------- /.github/workflows/windows-x64-cpu-wheels.yml: -------------------------------------------------------------------------------- 1 | name: build-wheels-cpu-win64 2 | 3 | on: 4 | push: 5 | branches: 6 | - fix-wheel-2 7 | tags: 8 | - '*' 9 | workflow_dispatch: 10 | 11 | concurrency: 12 | group: build-wheels-cpu-win64-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | generate_build_matrix: 17 | # see https://github.com/pytorch/pytorch/pull/50633 18 | runs-on: ubuntu-latest 19 | outputs: 20 | matrix: ${{ steps.set-matrix.outputs.matrix }} 21 | steps: 22 | - uses: actions/checkout@v4 23 | with: 24 | fetch-depth: 0 25 | - name: Generating build matrix 26 | id: set-matrix 27 | run: | 28 | # outputting for debugging purposes 29 | # python ./scripts/github_actions/generate_build_matrix.py --for-windows 30 | # MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows) 31 | 32 | python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch 33 | MATRIX=$(python ./scripts/github_actions/generate_build_matrix.py --for-windows --test-only-latest-torch) 34 | echo "::set-output name=matrix::${MATRIX}" 35 | 36 | build_wheels_win64_cpu: 37 | needs: generate_build_matrix 38 | name: ${{ matrix.torch }} ${{ matrix.python-version }} 39 | runs-on: windows-latest 40 | strategy: 41 | fail-fast: false 42 | matrix: 43 | ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} 44 | 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | fetch-depth: 0 49 | 50 | # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ 51 | # for a list of versions 52 | - name: Build wheels 53 | uses: pypa/cibuildwheel@v2.22.0 54 | env: 55 | CIBW_BEFORE_BUILD: pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch_stable.html cmake numpy || pip install torch==${{ matrix.torch}}+cpu -f https://download.pytorch.org/whl/torch/ cmake numpy 56 | CIBW_BUILD: ${{ matrix.python-version }}-win_amd64 57 | CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "" 58 | CIBW_BUILD_VERBOSITY: 3 59 | 60 | - name: Display wheels 61 | shell: bash 62 | run: | 63 | ls -lh ./wheelhouse/ 64 | 65 | - name: Upload Wheel 66 | uses: actions/upload-artifact@v4 67 | with: 68 | name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-windows-latest-cpu 69 | path: wheelhouse/*.whl 70 | 71 | # https://huggingface.co/docs/hub/spaces-github-actions 72 | - name: Publish to huggingface 73 | if: github.repository_owner == 'csukuangfj' 74 | env: 75 | HF_TOKEN: ${{ secrets.HF_TOKEN }} 76 | uses: nick-fields/retry@v2 77 | with: 78 | max_attempts: 20 79 | timeout_seconds: 200 80 | shell: bash 81 | command: | 82 | git config --global user.email "csukuangfj@gmail.com" 83 | git config --global user.name "Fangjun Kuang" 84 | 85 | rm -rf huggingface 86 | export GIT_LFS_SKIP_SMUDGE=1 87 | 88 | git clone https://huggingface.co/csukuangfj/kaldifeat huggingface 89 | cd huggingface 90 | git pull 91 | 92 | d=cpu/1.25.5.dev20241029/windows 93 | mkdir -p $d 94 | cp -v ../wheelhouse/*.whl ./$d 95 | git status 96 | git lfs track "*.whl" 97 | git add . 98 | git commit -m "upload windows-cpu wheel for torch ${{ matrix.torch }} python ${{ matrix.python-version }}" 99 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kaldifeat main 100 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | build*/ 3 | *.egg-info*/ 4 | dist/ 5 | __pycache__/ 6 | test-1hour.wav 7 | path.sh 8 | torch_version.py 9 | cpu*.html 10 | cuda*.html 11 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 21.6b0 4 | hooks: 5 | - id: black 6 | args: [--line-length=80] 7 | 8 | - repo: https://github.com/PyCQA/flake8 9 | rev: 3.9.2 10 | hooks: 11 | - id: flake8 12 | args: [--max-line-length=80] 13 | 14 | - repo: https://github.com/pycqa/isort 15 | rev: 5.9.2 16 | hooks: 17 | - id: isort 18 | args: [--profile=black] 19 | 20 | - repo: https://github.com/pre-commit/pre-commit-hooks 21 | rev: v4.0.1 22 | hooks: 23 | - id: check-executables-have-shebangs 24 | - id: end-of-file-fixer 25 | - id: mixed-line-ending 26 | - id: trailing-whitespace 27 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang) 2 | 3 | cmake_minimum_required(VERSION 3.8 FATAL_ERROR) 4 | 5 | project(kaldifeat) 6 | 7 | # remember to change the version in 8 | # scripts/conda/kaldifeat/meta.yaml 9 | # scripts/conda-cpu/kaldifeat/meta.yaml 10 | set(kaldifeat_VERSION "1.25.5") 11 | 12 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 13 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 14 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") 15 | 16 | set(CMAKE_SKIP_BUILD_RPATH FALSE) 17 | set(BUILD_RPATH_USE_ORIGIN TRUE) 18 | set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) 19 | 20 | 21 | if(NOT APPLE) 22 | set(kaldifeat_rpath_origin "$ORIGIN") 23 | else() 24 | set(kaldifeat_rpath_origin "@loader_path") 25 | endif() 26 | 27 | set(CMAKE_INSTALL_RPATH ${kaldifeat_rpath_origin}) 28 | set(CMAKE_BUILD_RPATH ${kaldifeat_rpath_origin}) 29 | 30 | 31 | if(NOT CMAKE_BUILD_TYPE) 32 | message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") 33 | set(CMAKE_BUILD_TYPE Release) 34 | endif() 35 | 36 | if (NOT CMAKE_CXX_STANDARD) 37 | set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to be used.") 38 | endif() 39 | message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") 40 | set(CMAKE_CXX_EXTENSIONS OFF) 41 | 42 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules) 43 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) 44 | 45 | option(BUILD_SHARED_LIBS "Whether to build shared libraries" ON) 46 | option(kaldifeat_BUILD_TESTS "Whether to build tests or not" OFF) 47 | option(kaldifeat_BUILD_PYMODULE "Whether to build python module or not" ON) 48 | 49 | message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") 50 | 51 | if(BUILD_SHARED_LIBS AND MSVC) 52 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) 53 | endif() 54 | 55 | if(kaldifeat_BUILD_PYMODULE) 56 | include(pybind11) 57 | endif() 58 | # to prevent cmake from trying to link with system installed mkl since we not directly use it 59 | # mkl libraries should be linked with pytorch already 60 | # ref: https://github.com/pytorch/pytorch/blob/master/cmake/public/mkl.cmake 61 | set(CMAKE_DISABLE_FIND_PACKAGE_MKL TRUE) 62 | include(torch) 63 | 64 | if(kaldifeat_BUILD_TESTS) 65 | include(googletest) 66 | enable_testing() 67 | endif() 68 | 69 | 70 | if(WIN32) 71 | # disable various warnings for MSVC 72 | # 4624: destructor was implicitly defined as deleted because a base class destructor is inaccessible or deleted 73 | set(disabled_warnings 74 | /wd4624 75 | ) 76 | message(STATUS "Disabled warnings: ${disabled_warnings}") 77 | foreach(w IN LISTS disabled_warnings) 78 | string(APPEND CMAKE_CXX_FLAGS " ${w} ") 79 | endforeach() 80 | endif() 81 | 82 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}) 83 | 84 | message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") 85 | 86 | message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") 87 | 88 | add_subdirectory(kaldifeat) 89 | 90 | # TORCH_VERSION is defined in cmake/torch.cmake 91 | configure_file( 92 | ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py.in 93 | ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py @ONLY 94 | ) 95 | 96 | configure_file( 97 | ${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfigVersion.cmake.in 98 | ${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake 99 | @ONLY 100 | ) 101 | 102 | configure_file( 103 | ${PROJECT_SOURCE_DIR}/cmake/kaldifeatConfig.cmake.in 104 | ${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake 105 | @ONLY 106 | ) 107 | 108 | install(FILES 109 | ${PROJECT_BINARY_DIR}/kaldifeatConfigVersion.cmake 110 | ${PROJECT_BINARY_DIR}/kaldifeatConfig.cmake 111 | DESTINATION share/cmake/kaldifeat 112 | ) 113 | 114 | install(FILES 115 | ${PROJECT_SOURCE_DIR}/kaldifeat/python/kaldifeat/torch_version.py 116 | DESTINATION ./ 117 | ) 118 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include CMakeLists.txt 4 | exclude pyproject.toml 5 | recursive-include kaldifeat *.* 6 | recursive-include cmake *.* 7 | -------------------------------------------------------------------------------- /cmake/Modules/FetchContent/CMakeLists.cmake.in: -------------------------------------------------------------------------------- 1 | # Distributed under the OSI-approved BSD 3-Clause License. See accompanying 2 | # file Copyright.txt or https://cmake.org/licensing for details. 3 | 4 | cmake_minimum_required(VERSION ${CMAKE_VERSION}) 5 | 6 | # We name the project and the target for the ExternalProject_Add() call 7 | # to something that will highlight to the user what we are working on if 8 | # something goes wrong and an error message is produced. 9 | 10 | project(${contentName}-populate NONE) 11 | 12 | include(ExternalProject) 13 | ExternalProject_Add(${contentName}-populate 14 | ${ARG_EXTRA} 15 | SOURCE_DIR "${ARG_SOURCE_DIR}" 16 | BINARY_DIR "${ARG_BINARY_DIR}" 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | INSTALL_COMMAND "" 20 | TEST_COMMAND "" 21 | ) 22 | -------------------------------------------------------------------------------- /cmake/Modules/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## FetchContent 3 | 4 | `FetchContent.cmake` and `FetchContent/CMakeLists.cmake.in` 5 | are copied from `cmake/3.11.0/share/cmake-3.11/Modules`. 6 | -------------------------------------------------------------------------------- /cmake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csukuangfj/kaldifeat/0ecdee6e88ed82d9504f9332fde92e142531ee6e/cmake/__init__.py -------------------------------------------------------------------------------- /cmake/googletest.cmake: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fangjun Kuang (csukuangfj@gmail.com) 2 | # See ../LICENSE for clarification regarding multiple authors 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | function(download_googltest) 17 | if(CMAKE_VERSION VERSION_LESS 3.11) 18 | # FetchContent is available since 3.11, 19 | # we've copied it to ${CMAKE_SOURCE_DIR}/cmake/Modules 20 | # so that it can be used in lower CMake versions. 21 | message(STATUS "Use FetchContent provided by kaldifeat") 22 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) 23 | endif() 24 | 25 | include(FetchContent) 26 | 27 | set(googletest_URL "https://github.com/google/googletest/archive/refs/tags/v1.13.0.tar.gz") 28 | set(googletest_URL2 "https://huggingface.co/csukuangfj/k2-cmake-deps/resolve/main/googletest-1.13.0.tar.gz") 29 | set(googletest_HASH "SHA256=ad7fdba11ea011c1d925b3289cf4af2c66a352e18d4c7264392fead75e919363") 30 | 31 | # If you don't have access to the Internet, 32 | # please pre-download googletest 33 | set(possible_file_locations 34 | $ENV{HOME}/Downloads/googletest-1.13.0.tar.gz 35 | ${PROJECT_SOURCE_DIR}/googletest-1.13.0.tar.gz 36 | ${PROJECT_BINARY_DIR}/googletest-1.13.0.tar.gz 37 | /tmp/googletest-1.13.0.tar.gz 38 | /star-fj/fangjun/download/github/googletest-1.13.0.tar.gz 39 | ) 40 | 41 | foreach(f IN LISTS possible_file_locations) 42 | if(EXISTS ${f}) 43 | set(googletest_URL "${f}") 44 | file(TO_CMAKE_PATH "${googletest_URL}" googletest_URL) 45 | set(googletest_URL2) 46 | break() 47 | endif() 48 | endforeach() 49 | 50 | set(BUILD_GMOCK ON CACHE BOOL "" FORCE) 51 | set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) 52 | set(gtest_disable_pthreads ON CACHE BOOL "" FORCE) 53 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 54 | 55 | FetchContent_Declare(googletest 56 | URL 57 | ${googletest_URL} 58 | ${googletest_URL2} 59 | URL_HASH ${googletest_HASH} 60 | ) 61 | 62 | FetchContent_GetProperties(googletest) 63 | if(NOT googletest_POPULATED) 64 | message(STATUS "Downloading googletest from ${googletest_URL}") 65 | FetchContent_Populate(googletest) 66 | endif() 67 | message(STATUS "googletest is downloaded to ${googletest_SOURCE_DIR}") 68 | message(STATUS "googletest's binary dir is ${googletest_BINARY_DIR}") 69 | 70 | if(APPLE) 71 | set(CMAKE_MACOSX_RPATH ON) # to solve the following warning on macOS 72 | endif() 73 | #[==[ 74 | -- Generating done 75 | Policy CMP0042 is not set: MACOSX_RPATH is enabled by default. Run "cmake 76 | --help-policy CMP0042" for policy details. Use the cmake_policy command to 77 | set the policy and suppress this warning. 78 | 79 | MACOSX_RPATH is not specified for the following targets: 80 | 81 | gmock 82 | gmock_main 83 | gtest 84 | gtest_main 85 | 86 | This warning is for project developers. Use -Wno-dev to suppress it. 87 | ]==] 88 | 89 | add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) 90 | 91 | target_include_directories(gtest 92 | INTERFACE 93 | ${googletest_SOURCE_DIR}/googletest/include 94 | ${googletest_SOURCE_DIR}/googlemock/include 95 | ) 96 | endfunction() 97 | 98 | download_googltest() 99 | -------------------------------------------------------------------------------- /cmake/kaldifeatConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # Findkaldifeat 2 | # ------------- 3 | # 4 | # Finds the kaldifeat library 5 | # 6 | # This will define the following variables: 7 | # 8 | # KALDIFEAT_FOUND -- True if the system has the kaldifeat library 9 | # KALDIFEAT_INCLUDE_DIRS -- The include directories for kaldifeat 10 | # KALDIFEAT_LIBRARIES -- Libraries to link against 11 | # KALDIFEAT_CXX_FLAGS -- Additional (required) compiler flags 12 | # KALDIFEAT_TORCH_VERSION_MAJOR -- The major version of PyTorch used to compile kaldifeat 13 | # KALDIFEAT_TORCH_VERSION_MINOR -- The minor version of PyTorch used to compile kaldifeat 14 | # KALDIFEAT_VERSION -- The version of kaldifeat 15 | # 16 | # and the following imported targets: 17 | # 18 | # kaldifeat_core 19 | 20 | # This file is modified from pytorch/cmake/TorchConfig.cmake.in 21 | 22 | set(KALDIFEAT_CXX_FLAGS "@CMAKE_CXX_FLAGS@") 23 | set(KALDIFEAT_TORCH_VERSION_MAJOR @KALDIFEAT_TORCH_VERSION_MAJOR@) 24 | set(KALDIFEAT_TORCH_VERSION_MINOR @KALDIFEAT_TORCH_VERSION_MINOR@) 25 | set(KALDIFEAT_VERSION @kaldifeat_VERSION@) 26 | 27 | if(DEFINED ENV{KALDIFEAT_INSTALL_PREFIX}) 28 | set(KALDIFEAT_INSTALL_PREFIX $ENV{KALDIFEAT_INSTALL_PREFIX}) 29 | else() 30 | # Assume we are in /share/cmake/kaldifeat/kaldifeatConfig.cmake 31 | get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 32 | get_filename_component(KALDIFEAT_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE) 33 | endif() 34 | 35 | set(KALDIFEAT_INCLUDE_DIRS ${KALDIFEAT_INSTALL_PREFIX}/include) 36 | 37 | set(KALDIFEAT_LIBRARIES kaldifeat_core) 38 | 39 | foreach(lib IN LISTS KALDIFEAT_LIBRARIES) 40 | find_library(location_${lib} ${lib} 41 | PATHS 42 | "${KALDIFEAT_INSTALL_PREFIX}/lib" 43 | "${KALDIFEAT_INSTALL_PREFIX}/lib64" 44 | ) 45 | 46 | if(NOT MSVC) 47 | add_library(${lib} SHARED IMPORTED) 48 | else() 49 | add_library(${lib} STATIC IMPORTED) 50 | endif() 51 | 52 | set_target_properties(${lib} PROPERTIES 53 | INTERFACE_INCLUDE_DIRECTORIES "${KALDIFEAT_INCLUDE_DIRS}" 54 | IMPORTED_LOCATION "${location_${lib}}" 55 | CXX_STANDARD 14 56 | ) 57 | 58 | set_property(TARGET ${lib} PROPERTY INTERFACE_COMPILE_OPTIONS @CMAKE_CXX_FLAGS@) 59 | endforeach() 60 | 61 | include(FindPackageHandleStandardArgs) 62 | 63 | find_package_handle_standard_args(kaldifeat DEFAULT_MSG 64 | location_kaldifeat_core 65 | ) 66 | -------------------------------------------------------------------------------- /cmake/kaldifeatConfigVersion.cmake.in: -------------------------------------------------------------------------------- 1 | # This file is modified from pytorch/cmake/TorchConfigVersion.cmake.in 2 | set(PACKAGE_VERSION "@kaldifeat_VERSION@") 3 | 4 | # Check whether the requested PACKAGE_FIND_VERSION is compatible 5 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") 6 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 7 | else() 8 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 9 | if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") 10 | set(PACKAGE_VERSION_EXACT TRUE) 11 | endif() 12 | endif() 13 | -------------------------------------------------------------------------------- /cmake/pybind11.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Fangjun Kuang (csukuangfj@gmail.com) 2 | # See ../LICENSE for clarification regarding multiple authors 3 | 4 | function(download_pybind11) 5 | if(CMAKE_VERSION VERSION_LESS 3.11) 6 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) 7 | endif() 8 | 9 | include(FetchContent) 10 | 11 | set(pybind11_URL "https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.tar.gz") 12 | set(pybind11_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/pybind11-2.12.0.tar.gz") 13 | set(pybind11_HASH "SHA256=bf8f242abd1abcd375d516a7067490fb71abd79519a282d22b6e4d19282185a7") 14 | 15 | # If you don't have access to the Internet, 16 | # please pre-download pybind11 17 | set(possible_file_locations 18 | $ENV{HOME}/Downloads/pybind11-2.12.0.tar.gz 19 | ${CMAKE_SOURCE_DIR}/pybind11-2.12.0.tar.gz 20 | ${CMAKE_BINARY_DIR}/pybind11-2.12.0.tar.gz 21 | /tmp/pybind11-2.12.0.tar.gz 22 | /star-fj/fangjun/download/github/pybind11-2.12.0.tar.gz 23 | ) 24 | 25 | foreach(f IN LISTS possible_file_locations) 26 | if(EXISTS ${f}) 27 | set(pybind11_URL "${f}") 28 | file(TO_CMAKE_PATH "${pybind11_URL}" pybind11_URL) 29 | set(pybind11_URL2) 30 | break() 31 | endif() 32 | endforeach() 33 | 34 | FetchContent_Declare(pybind11 35 | URL 36 | ${pybind11_URL} 37 | ${pybind11_URL2} 38 | URL_HASH ${pybind11_HASH} 39 | ) 40 | 41 | FetchContent_GetProperties(pybind11) 42 | if(NOT pybind11_POPULATED) 43 | message(STATUS "Downloading pybind11 from ${pybind11_URL}") 44 | FetchContent_Populate(pybind11) 45 | endif() 46 | message(STATUS "pybind11 is downloaded to ${pybind11_SOURCE_DIR}") 47 | add_subdirectory(${pybind11_SOURCE_DIR} ${pybind11_BINARY_DIR} EXCLUDE_FROM_ALL) 48 | endfunction() 49 | 50 | download_pybind11() 51 | -------------------------------------------------------------------------------- /cmake/torch.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 Fangjun Kuang (csukuangfj@gmail.com) 2 | # See ../LICENSE for clarification regarding multiple authors 3 | 4 | # PYTHON_EXECUTABLE is set by cmake/pybind11.cmake 5 | message(STATUS "Python executable: ${PYTHON_EXECUTABLE}") 6 | execute_process( 7 | COMMAND "${PYTHON_EXECUTABLE}" -c "import os; import torch; print(os.path.dirname(torch.__file__))" 8 | OUTPUT_STRIP_TRAILING_WHITESPACE 9 | OUTPUT_VARIABLE TORCH_DIR 10 | ) 11 | message(STATUS "TORCH_DIR: ${TORCH_DIR}") 12 | 13 | list(APPEND CMAKE_PREFIX_PATH "${TORCH_DIR}") 14 | find_package(Torch REQUIRED) 15 | 16 | # set the global CMAKE_CXX_FLAGS so that 17 | # kaldifeat uses the same ABI flag as PyTorch 18 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") 19 | 20 | execute_process( 21 | COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__)" 22 | OUTPUT_STRIP_TRAILING_WHITESPACE 23 | OUTPUT_VARIABLE TORCH_VERSION 24 | ) 25 | 26 | message(STATUS "PyTorch version: ${TORCH_VERSION}") 27 | 28 | execute_process( 29 | COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[0])" 30 | OUTPUT_STRIP_TRAILING_WHITESPACE 31 | OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MAJOR 32 | ) 33 | 34 | execute_process( 35 | COMMAND "${PYTHON_EXECUTABLE}" -c "import torch; print(torch.__version__.split('.')[1])" 36 | OUTPUT_STRIP_TRAILING_WHITESPACE 37 | OUTPUT_VARIABLE KALDIFEAT_TORCH_VERSION_MINOR 38 | ) 39 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | dataclasses 2 | recommonmark 3 | sphinx<7.0 4 | sphinx-autodoc-typehints 5 | sphinx_rtd_theme 6 | sphinxcontrib-bibtex 7 | -------------------------------------------------------------------------------- /doc/source/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csukuangfj/kaldifeat/0ecdee6e88ed82d9504f9332fde92e142531ee6e/doc/source/_static/.gitkeep -------------------------------------------------------------------------------- /doc/source/images/README.md: -------------------------------------------------------------------------------- 1 | ## File description 2 | 3 | is used to create the following files: 4 | 5 | - ./os.svg 6 | - ./python_ge_3.6-blue.svg 7 | - ./cuda_ge_10.1-orange.svg 8 | - ./pytorch_ge_1.5.0-green.svg 9 | -------------------------------------------------------------------------------- /doc/source/images/cuda_ge_10.1-orange.svg: -------------------------------------------------------------------------------- 1 | cuda: >= 10.1cuda>= 10.1 2 | -------------------------------------------------------------------------------- /doc/source/images/os-green.svg: -------------------------------------------------------------------------------- 1 | os: Linux | macOS | WindowsosLinux | macOS | Windows 2 | -------------------------------------------------------------------------------- /doc/source/images/python_ge_3.6-blue.svg: -------------------------------------------------------------------------------- 1 | python: >= 3.6python>= 3.6 2 | -------------------------------------------------------------------------------- /doc/source/images/pytorch_ge_1.5.0-green.svg: -------------------------------------------------------------------------------- 1 | pytorch: >= 1.5.0pytorch>= 1.5.0 2 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. kaldifeat documentation master file, created by 2 | sphinx-quickstart on Fri Jul 16 20:15:27 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | kaldifeat 7 | ========= 8 | 9 | 10 | .. toctree:: 11 | :maxdepth: 2 12 | :caption: Contents 13 | 14 | intro 15 | installation/index 16 | usage/index 17 | -------------------------------------------------------------------------------- /doc/source/installation/faq.rst: -------------------------------------------------------------------------------- 1 | FAQs 2 | ==== 3 | 4 | How to install a CUDA version of kaldifeat from source 5 | ------------------------------------------------------ 6 | 7 | You need to first install a CUDA version of `PyTorch`_ and then install `kaldifeat`_. 8 | 9 | .. note:: 10 | 11 | You can use a CUDA version of `kaldifeat`_ on machines with no GPUs. 12 | 13 | How to install a CPU version of kaldifeat from source 14 | ----------------------------------------------------- 15 | 16 | You need to first install a CPU version of `PyTorch`_ and then install `kaldifeat`_. 17 | 18 | How to fix `Caffe2: Cannot find cuDNN library` 19 | ---------------------------------------------- 20 | 21 | .. code-block:: 22 | 23 | Your installed Caffe2 version uses cuDNN but I cannot find the cuDNN 24 | libraries. Please set the proper cuDNN prefixes and / or install cuDNN. 25 | 26 | You will have such an error when you want to install a CUDA version of `kaldifeat`_ 27 | by ``pip install kaldifeat`` or from source. 28 | 29 | You need to first install cuDNN. Assume you have installed cuDNN to the 30 | path ``/path/to/cudnn``. You can fix the error by using ``one`` of the following 31 | commands. 32 | 33 | (1) Fix for installation using ``pip install`` 34 | 35 | .. code-block:: bash 36 | 37 | export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include" 38 | pip install --verbose kaldifeat 39 | 40 | (2) Fix for installation from source 41 | 42 | .. code-block:: bash 43 | 44 | mkdir /some/path 45 | git clone https://github.com/csukuangfj/kaldifeat.git 46 | cd kaldifeat 47 | export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=Release -DCUDNN_LIBRARY_PATH=/path/to/cudnn/lib/libcudnn.so -DCUDNN_INCLUDE_PATH=/path/to/cudnn/include" 48 | python setup.py install 49 | -------------------------------------------------------------------------------- /doc/source/installation/from_source.rst: -------------------------------------------------------------------------------- 1 | .. _from source: 2 | 3 | Install kaldifeat from source 4 | ============================= 5 | 6 | You have to install ``cmake`` and `PyTorch`_ first. 7 | 8 | - ``cmake`` 3.11 is known to work. Other CMake versions may also work. 9 | - `PyTorch`_ >= 1.5.0 is known to work. Other PyTorch versions may also work. 10 | - Python >= 3.6 11 | - A compiler that supports C++ 14 12 | 13 | 14 | The commands to install `kaldifeat`_ from source are: 15 | 16 | .. code-block:: bash 17 | 18 | git clone https://github.com/csukuangfj/kaldifeat 19 | cd kaldifeat 20 | python3 setup.py install 21 | 22 | To test that you have installed `kaldifeat`_ successfully, please run: 23 | 24 | .. code-block:: bash 25 | 26 | python3 -c "import kaldifeat; print(kaldifeat.__version__)" 27 | 28 | It should print the version, e.g., ``1.0``. 29 | 30 | .. _from PyPI: 31 | 32 | Install kaldifeat from PyPI 33 | --------------------------- 34 | 35 | The command to install `kaldifeat`_ from PyPI is: 36 | 37 | .. code-block:: bash 38 | 39 | pip install --verbose kaldifeat 40 | 41 | To test that you have installed `kaldifeat`_ successfully, please run: 42 | 43 | .. code-block:: bash 44 | 45 | python3 -c "import kaldifeat; print(kaldifeat.__version__)" 46 | 47 | It should print the version, e.g., ``1.0``. 48 | -------------------------------------------------------------------------------- /doc/source/installation/index.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | .. toctree:: 5 | :maxdepth: 3 6 | 7 | ./from_wheels.rst 8 | ./from_source.rst 9 | ./faq.rst 10 | 11 | 12 | -------------------------------------------------------------------------------- /doc/source/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | `kaldifeat`_ implements 5 | speech feature extraction algorithms **compatible** with `Kaldi`_ using `PyTorch`_, 6 | supporting CUDA as well as autograd. 7 | 8 | `kaldifeat`_ has the following features: 9 | 10 | - Fully compatible with `Kaldi`_ 11 | 12 | .. note:: 13 | 14 | The underlying C++ code is copied & modified from `Kaldi`_ directly. 15 | It is rewritten with `PyTorch` C++ APIs. 16 | 17 | - Provide not only ``C++ APIs`` but also ``Python APIs`` 18 | 19 | .. note:: 20 | 21 | You can access `kaldifeat`_ from ``Python``. 22 | 23 | - Support autograd 24 | - Support ``CUDA`` and ``CPU`` 25 | 26 | .. note:: 27 | 28 | You can use CUDA for feature extraction. 29 | 30 | - Support ``online`` (i.e., ``streaming``) and ``offline`` (i.e., ``non-streaming``) 31 | feature extraction 32 | - Support chunk-based processing 33 | 34 | .. note:: 35 | 36 | This is especially usefull if you want to process audios of several 37 | hours long, which may cause OOM if you send them for computation at once. 38 | With chunk-based processing, you can process audios of arbirtray length. 39 | 40 | - Support batch processing 41 | 42 | .. note:: 43 | 44 | With `kaldifeat`_ you can extract features for a batch of audios 45 | 46 | 47 | .. see https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html 48 | 49 | Currently implemented speech features and their counterparts in `Kaldi`_ are 50 | listed in the following table. 51 | 52 | .. list-table:: Supported speech features 53 | :widths: 50 50 54 | :header-rows: 1 55 | 56 | * - Supported speech features 57 | - Counterpart in `Kaldi`_ 58 | * - `kaldifeat.Fbank`_ 59 | - `compute-fbank-feats`_ 60 | * - `kaldifeat.Mfcc`_ 61 | - `compute-mfcc-feats`_ 62 | * - `kaldifeat.Plp`_ 63 | - `compute-plp-feats`_ 64 | * - `kaldifeat.Spectrogram`_ 65 | - `compute-spectrogram-feats`_ 66 | * - `kaldifeat.OnlineFbank`_ 67 | - `kaldi::OnlineFbank`_ 68 | * - `kaldifeat.OnlineMfcc`_ 69 | - `kaldi::OnlineMfcc`_ 70 | * - `kaldifeat.OnlinePlp`_ 71 | - `kaldi::OnlinePlp`_ 72 | 73 | Each feature computer needs an option. The following table lists the options 74 | for each computer and the corresponding options in `Kaldi`_. 75 | 76 | .. hint:: 77 | 78 | Note that we reuse the parameter names from `Kaldi`_. 79 | 80 | Also, both online feature computers and offline feature computers share the 81 | same option. 82 | 83 | .. list-table:: Feature computer options 84 | :widths: 50 50 85 | :header-rows: 1 86 | 87 | * - Options in `kaldifeat`_ 88 | - Corresponding options in `Kaldi`_ 89 | * - `kaldifeat.FbankOptions`_ 90 | - `kaldi::FbankOptions`_ 91 | * - `kaldifeat.MfccOptions`_ 92 | - `kaldi::MfccOptions`_ 93 | * - `kaldifeat.PlpOptions`_ 94 | - `kaldi::PlpOptions`_ 95 | * - `kaldifeat.SpectrogramOptions`_ 96 | - `kaldi::SpectrogramOptions`_ 97 | * - `kaldifeat.FrameExtractionOptions`_ 98 | - `kaldi::FrameExtractionOptions`_ 99 | * - `kaldifeat.MelBanksOptions`_ 100 | - `kaldi::MelBanksOptions`_ 101 | 102 | Read more to learn how to install `kaldifeat`_ and how to use each feature 103 | computer. 104 | -------------------------------------------------------------------------------- /doc/source/usage/code/fbank_options-1.txt: -------------------------------------------------------------------------------- 1 | $ python3 2 | Python 3.8.0 (default, Oct 28 2019, 16:14:01) 3 | [GCC 8.3.0] on linux 4 | Type "help", "copyright", "credits" or "license" for more information. 5 | >>> import kaldifeat 6 | >>> opts = kaldifeat.FbankOptions() 7 | >>> print(opts) 8 | frame_opts: 9 | samp_freq: 16000 10 | frame_shift_ms: 10 11 | frame_length_ms: 25 12 | dither: 1 13 | preemph_coeff: 0.97 14 | remove_dc_offset: 1 15 | window_type: povey 16 | round_to_power_of_two: 1 17 | blackman_coeff: 0.42 18 | snip_edges: 1 19 | max_feature_vectors: -1 20 | 21 | 22 | mel_opts: 23 | num_bins: 23 24 | low_freq: 20 25 | high_freq: 0 26 | vtln_low: 100 27 | vtln_high: -500 28 | debug_mel: 0 29 | htk_mode: 0 30 | 31 | use_energy: 0 32 | energy_floor: 0 33 | raw_energy: 1 34 | htk_compat: 0 35 | use_log_fbank: 1 36 | use_power: 1 37 | device: cpu 38 | 39 | >>> print(opts.dither) 40 | Traceback (most recent call last): 41 | File "", line 1, in 42 | AttributeError: '_kaldifeat.FbankOptions' object has no attribute 'dither' 43 | >>> 44 | >>> print(opts.frame_opts.dither) 45 | 1.0 46 | >>> opts.frame_opts.dither = 0 # disable dither 47 | >>> print(opts.frame_opts.dither) 48 | 0.0 49 | >>> import torch 50 | >>> print(opts.device) 51 | cpu 52 | >>> opts.device = 'cuda:0' 53 | >>> print(opts.device) 54 | cuda:0 55 | >>> opts.device = torch.device('cuda', 1) 56 | >>> print(opts.device) 57 | cuda:1 58 | >>> opts.device = 'cpu' 59 | >>> print(opts.device) 60 | cpu 61 | >>> print(opts.mel_opts.num_bins) 62 | 23 63 | >>> opts.mel_opts.num_bins = 80 64 | >>> print(opts.mel_opts.num_bins) 65 | 80 66 | -------------------------------------------------------------------------------- /doc/source/usage/code/test_fbank_options.py: -------------------------------------------------------------------------------- 1 | ../../../../kaldifeat/python/tests/test_fbank_options.py -------------------------------------------------------------------------------- /doc/source/usage/fbank.rst: -------------------------------------------------------------------------------- 1 | kaldifeat.Fbank 2 | =============== 3 | 4 | -------------------------------------------------------------------------------- /doc/source/usage/fbank_options.rst: -------------------------------------------------------------------------------- 1 | kaldifeat.FbankOptions 2 | ====================== 3 | 4 | If you want to construct an instance of `kaldifeat.Fbank`_ or 5 | `kaldifeat.OnlineFbank`_, you have to provide an instance of 6 | `kaldifeat.FbankOptions`_. 7 | 8 | The following code shows how to construct an instance of `kaldifeat.FbankOptions`_. 9 | 10 | .. literalinclude:: ./code/fbank_options-1.txt 11 | :caption: Usage of `kaldifeat.FbankOptions`_ 12 | :emphasize-lines: 6,8,22,37 13 | 14 | Note that we reuse the same option name with `compute-fbank-feats`_ from `Kaldi`_: 15 | 16 | .. code-block:: bash 17 | 18 | $ compute-fbank-feats --help 19 | 20 | 21 | .. literalinclude:: ./code/compute-fbank-feats-help.txt 22 | :caption: Output of ``compute-fbank-feats --help`` 23 | 24 | Please refer to the output of ``compute-fbank-feats --help`` for the meaning 25 | of each field of `kaldifeat.FbankOptions`_. 26 | 27 | One thing worth noting is that `kaldifeat.FbankOptions`_ has a field ``device``, 28 | which is an instance of ``torch.device``. You can assign it either a string, e.g., 29 | ``"cpu"`` or ``"cuda:0"``, or an instance of ``torch.device``, e.g., ``torch.device("cpu")`` or 30 | ``torch.device("cuda", 1)``. 31 | 32 | .. hint:: 33 | 34 | You can use this field to control whether the feature computer 35 | constructed from it performs computation on CPU or CUDA. 36 | 37 | .. caution:: 38 | 39 | If you use a CUDA device, make sure that you have installed a CUDA version 40 | of `PyTorch`_. 41 | 42 | Example usage 43 | ------------- 44 | 45 | The following code from 46 | ``_ 47 | demonstrate the usage of `kaldifeat.FbankOptions`_: 48 | 49 | .. literalinclude:: ./code/test_fbank_options.py 50 | :caption: Example usage of `kaldifeat.FbankOptions`_ 51 | :language: python 52 | -------------------------------------------------------------------------------- /doc/source/usage/index.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | This section describes how to use feature computers in `kaldifeat`_. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | fbank_options 10 | fbank 11 | online_fbank 12 | -------------------------------------------------------------------------------- /doc/source/usage/online_fbank.rst: -------------------------------------------------------------------------------- 1 | kaldifeat.OnlineFbank 2 | ===================== 3 | 4 | -------------------------------------------------------------------------------- /get_version.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import datetime 4 | import os 5 | import platform 6 | import re 7 | import shutil 8 | 9 | import torch 10 | 11 | 12 | def is_macos(): 13 | return platform.system() == "Darwin" 14 | 15 | 16 | def is_windows(): 17 | return platform.system() == "Windows" 18 | 19 | 20 | def with_cuda(): 21 | if shutil.which("nvcc") is None: 22 | return False 23 | 24 | if is_macos(): 25 | return False 26 | 27 | return True 28 | 29 | 30 | def get_pytorch_version(): 31 | # if it is 1.7.1+cuda101, then strip +cuda101 32 | return torch.__version__.split("+")[0] 33 | 34 | 35 | def get_cuda_version(): 36 | from torch.utils import collect_env 37 | 38 | running_cuda_version = collect_env.get_running_cuda_version(collect_env.run) 39 | cuda_version = torch.version.cuda 40 | if running_cuda_version is not None and cuda_version is not None: 41 | assert cuda_version in running_cuda_version, ( 42 | f"PyTorch is built with CUDA version: {cuda_version}.\n" 43 | f"The current running CUDA version is: {running_cuda_version}" 44 | ) 45 | return cuda_version 46 | 47 | 48 | def is_for_pypi(): 49 | ans = os.environ.get("KALDIFEAT_IS_FOR_PYPI", None) 50 | return ans is not None 51 | 52 | 53 | def is_stable(): 54 | ans = os.environ.get("KALDIFEAT_IS_STABLE", None) 55 | return ans is not None 56 | 57 | 58 | def is_for_conda(): 59 | ans = os.environ.get("KALDIFEAT_IS_FOR_CONDA", None) 60 | return ans is not None 61 | 62 | 63 | def get_package_version(): 64 | # Set a default CUDA version here so that `pip install kaldifeat` 65 | # uses the default CUDA version. 66 | # 67 | default_cuda_version = "10.1" # CUDA 10.1 68 | 69 | if with_cuda(): 70 | cuda_version = get_cuda_version() 71 | if is_for_pypi() and default_cuda_version == cuda_version: 72 | cuda_version = "" 73 | pytorch_version = "" 74 | local_version = "" 75 | else: 76 | cuda_version = f"+cuda{cuda_version}" 77 | pytorch_version = get_pytorch_version() 78 | local_version = f"{cuda_version}.torch{pytorch_version}" 79 | else: 80 | pytorch_version = get_pytorch_version() 81 | local_version = f"+cpu.torch{pytorch_version}" 82 | 83 | if is_for_conda(): 84 | local_version = "" 85 | 86 | if is_for_pypi() and is_macos(): 87 | local_version = "" 88 | 89 | with open("CMakeLists.txt") as f: 90 | content = f.read() 91 | 92 | latest_version = re.search(r"set\(kaldifeat_VERSION (.*)\)", content).group( 93 | 1 94 | ) 95 | latest_version = latest_version.strip('"') 96 | 97 | if not is_stable(): 98 | dt = datetime.datetime.utcnow() 99 | package_version = f"{latest_version}.dev{dt.year}{dt.month:02d}{dt.day:02d}{local_version}" 100 | else: 101 | package_version = f"{latest_version}" 102 | return package_version 103 | 104 | 105 | if __name__ == "__main__": 106 | print(get_package_version()) 107 | -------------------------------------------------------------------------------- /kaldifeat/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(csrc) 2 | if(kaldifeat_BUILD_PYMODULE) 3 | add_subdirectory(python) 4 | endif() 5 | -------------------------------------------------------------------------------- /kaldifeat/csrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang) 2 | 3 | set(kaldifeat_srcs 4 | feature-fbank.cc 5 | feature-functions.cc 6 | feature-mfcc.cc 7 | feature-plp.cc 8 | feature-spectrogram.cc 9 | feature-window.cc 10 | matrix-functions.cc 11 | mel-computations.cc 12 | online-feature.cc 13 | whisper-fbank.cc 14 | ) 15 | 16 | add_library(kaldifeat_core ${kaldifeat_srcs}) 17 | target_link_libraries(kaldifeat_core PUBLIC ${TORCH_LIBRARIES}) 18 | 19 | target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MAJOR=${KALDIFEAT_TORCH_VERSION_MAJOR}) 20 | target_compile_definitions(kaldifeat_core PUBLIC KALDIFEAT_TORCH_VERSION_MINOR=${KALDIFEAT_TORCH_VERSION_MINOR}) 21 | 22 | if(APPLE) 23 | execute_process( 24 | COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" 25 | OUTPUT_STRIP_TRAILING_WHITESPACE 26 | OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR 27 | ) 28 | message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}") 29 | target_link_libraries(kaldifeat_core PUBLIC "-L ${PYTHON_SITE_PACKAGE_DIR}/../..") 30 | endif() 31 | 32 | add_executable(test_kaldifeat test_kaldifeat.cc) 33 | target_link_libraries(test_kaldifeat PRIVATE kaldifeat_core) 34 | 35 | function(kaldifeat_add_test source) 36 | get_filename_component(name ${source} NAME_WE) 37 | add_executable(${name} "${source}") 38 | target_link_libraries(${name} 39 | PRIVATE 40 | kaldifeat_core 41 | gtest 42 | gtest_main 43 | ) 44 | 45 | # NOTE: We set the working directory here so that 46 | # it works also on windows. The reason is that 47 | # the required DLLs are inside ${TORCH_DIR}/lib 48 | # and they can be found by the exe if the current 49 | # working directory is ${TORCH_DIR}\lib 50 | add_test(NAME "Test.${name}" 51 | COMMAND 52 | $ 53 | WORKING_DIRECTORY ${TORCH_DIR}/lib 54 | ) 55 | endfunction() 56 | 57 | if(kaldifeat_BUILD_TESTS) 58 | # please sort the source files alphabetically 59 | set(test_srcs 60 | feature-window-test.cc 61 | online-feature-test.cc 62 | ) 63 | 64 | foreach(source IN LISTS test_srcs) 65 | kaldifeat_add_test(${source}) 66 | endforeach() 67 | endif() 68 | 69 | file(MAKE_DIRECTORY 70 | DESTINATION 71 | ${PROJECT_BINARY_DIR}/include/kaldifeat/csrc 72 | ) 73 | 74 | file(GLOB_RECURSE all_headers *.h) 75 | message(STATUS "All headers: ${all_headers}") 76 | 77 | file(COPY 78 | ${all_headers} 79 | DESTINATION 80 | ${PROJECT_BINARY_DIR}/include/kaldifeat/csrc 81 | ) 82 | if(BUILD_SHARED_LIBS AND WIN32) 83 | install(TARGETS kaldifeat_core 84 | DESTINATION ../ 85 | ) 86 | endif() 87 | install(TARGETS kaldifeat_core 88 | DESTINATION ${CMAKE_INSTALL_LIBDIR} 89 | ) 90 | 91 | install(FILES ${all_headers} 92 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kaldifeat/csrc 93 | ) 94 | -------------------------------------------------------------------------------- /kaldifeat/csrc/CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | exclude_files=whisper-mel-bank.h,whisper-v3-mel-bank.h 2 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-common-inl.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-common-inl.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-common-inl.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_COMMON_INL_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_COMMON_INL_H_ 9 | 10 | #include "kaldifeat/csrc/feature-window.h" 11 | 12 | namespace kaldifeat { 13 | 14 | template 15 | torch::Tensor OfflineFeatureTpl::ComputeFeatures(const torch::Tensor &wave, 16 | float vtln_warp) { 17 | const FrameExtractionOptions &frame_opts = computer_.GetFrameOptions(); 18 | 19 | torch::Tensor strided_input; 20 | if (wave.dim() == 1) { 21 | strided_input = GetStrided(wave, frame_opts); 22 | } else { 23 | KALDIFEAT_ASSERT(wave.dim() == 2); 24 | KALDIFEAT_ASSERT(wave.size(1) == frame_opts.WindowSize()); 25 | strided_input = wave; 26 | } 27 | 28 | if (frame_opts.dither != 0.0f) { 29 | strided_input = Dither(strided_input, frame_opts.dither); 30 | } 31 | 32 | if (frame_opts.remove_dc_offset) { 33 | torch::Tensor row_means = strided_input.mean(1).unsqueeze(1); 34 | strided_input = strided_input - row_means; 35 | } 36 | 37 | bool use_raw_log_energy = computer_.NeedRawLogEnergy(); 38 | torch::Tensor log_energy_pre_window; 39 | 40 | // torch.finfo(torch.float32).eps 41 | constexpr float kEps = 1.1920928955078125e-07f; 42 | 43 | if (use_raw_log_energy) { 44 | // it is true iff use_energy==true and row_energy==true 45 | log_energy_pre_window = 46 | torch::clamp_min(strided_input.pow(2).sum(1), kEps).log(); 47 | } 48 | 49 | if (frame_opts.preemph_coeff != 0.0f) { 50 | strided_input = Preemphasize(frame_opts.preemph_coeff, strided_input); 51 | } 52 | 53 | strided_input = feature_window_function_.Apply(strided_input); 54 | 55 | int32_t padding = frame_opts.PaddedWindowSize() - strided_input.size(1); 56 | 57 | if (padding > 0) { 58 | #ifdef __ANDROID__ 59 | auto padding_value = torch::zeros( 60 | {strided_input.size(0), padding}, 61 | torch::dtype(torch::kFloat).device(strided_input.device())); 62 | strided_input = torch::cat({strided_input, padding_value}, 1); 63 | #else 64 | strided_input = torch::nn::functional::pad( 65 | strided_input, torch::nn::functional::PadFuncOptions({0, padding}) 66 | .mode(torch::kConstant) 67 | .value(0)); 68 | #endif 69 | } 70 | 71 | return computer_.Compute(log_energy_pre_window, vtln_warp, strided_input); 72 | } 73 | 74 | } // namespace kaldifeat 75 | 76 | #endif // KALDIFEAT_CSRC_FEATURE_COMMON_INL_H_ 77 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-common.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-common.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-common.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_COMMON_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_COMMON_H_ 9 | 10 | #include "kaldifeat/csrc/feature-functions.h" 11 | #include "kaldifeat/csrc/feature-window.h" 12 | // See "The torch.fft module in PyTorch 1.7" 13 | // https://github.com/pytorch/pytorch/wiki/The-torch.fft-module-in-PyTorch-1.7 14 | #if KALDIFEAT_TORCH_VERSION_MAJOR > 1 || \ 15 | (KALDIFEAT_TORCH_VERSION_MAJOR == 1 && KALDIFEAT_TORCH_VERSION_MINOR > 6) 16 | #include "torch/fft.h" 17 | #define KALDIFEAT_HAS_FFT_NAMESPACE 18 | // It uses torch::fft::rfft 19 | // Its input shape is [x, N], output shape is [x, N/2] 20 | // which is a complex tensor 21 | #else 22 | #include "ATen/Functions.h" 23 | // It uses torch::fft 24 | // Its input shape is [x, N], output shape is [x, N/2, 2] 25 | // which contains the real part [..., ], and imaginary part [..., 1] 26 | #endif 27 | 28 | namespace kaldifeat { 29 | 30 | template 31 | class OfflineFeatureTpl { 32 | public: 33 | using Options = typename F::Options; 34 | 35 | // Note: feature_window_function_ is the windowing function, which initialized 36 | // using the options class, that we cache at this level. 37 | explicit OfflineFeatureTpl(const Options &opts) 38 | : computer_(opts), 39 | feature_window_function_(computer_.GetFrameOptions(), opts.device) {} 40 | 41 | /** 42 | Computes the features for one file (one sequence of features). 43 | This is the newer interface where you specify the sample frequency 44 | of the input waveform. 45 | @param [in] wave The input waveform. It can be either 1-D or 2-D. 46 | If it is a 1-D tensor, we assume it contains 47 | samples of a mono channel sound file. 48 | If it is a 2-D tensor, we assume each row 49 | is a frame of size opts.WindowSize(). 50 | @param [in] sample_freq The sampling frequency with which 51 | 'wave' was sampled. 52 | if sample_freq is higher than the frequency 53 | specified in the config, we will downsample 54 | the waveform, but if lower, it's an error. 55 | @param [in] vtln_warp The VTLN warping factor (will normally 56 | be 1.0) 57 | @param [out] output The matrix of features, where the row-index 58 | is the frame index. 59 | */ 60 | torch::Tensor ComputeFeatures(const torch::Tensor &wave, float vtln_warp); 61 | 62 | int32_t Dim() const { return computer_.Dim(); } 63 | const Options &GetOptions() const { return computer_.GetOptions(); } 64 | 65 | const FrameExtractionOptions &GetFrameOptions() const { 66 | return GetOptions().frame_opts; 67 | } 68 | 69 | // Copy constructor. 70 | OfflineFeatureTpl(const OfflineFeatureTpl &) = delete; 71 | OfflineFeatureTpl &operator=(const OfflineFeatureTpl &) = delete; 72 | 73 | private: 74 | F computer_; 75 | FeatureWindowFunction feature_window_function_; 76 | }; 77 | 78 | } // namespace kaldifeat 79 | 80 | #include "kaldifeat/csrc/feature-common-inl.h" 81 | 82 | #endif // KALDIFEAT_CSRC_FEATURE_COMMON_H_ 83 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-fbank.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-fbank.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-fbank.cc 6 | 7 | #include "kaldifeat/csrc/feature-fbank.h" 8 | 9 | #include 10 | 11 | namespace kaldifeat { 12 | 13 | std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) { 14 | os << opts.ToString(); 15 | return os; 16 | } 17 | 18 | FbankComputer::FbankComputer(const FbankOptions &opts) : opts_(opts) { 19 | if (opts.energy_floor > 0.0f) log_energy_floor_ = logf(opts.energy_floor); 20 | 21 | // We'll definitely need the filterbanks info for VTLN warping factor 1.0. 22 | // [note: this call caches it.] 23 | GetMelBanks(1.0f); 24 | } 25 | 26 | FbankComputer::~FbankComputer() { 27 | for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter) 28 | delete iter->second; 29 | } 30 | 31 | const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) { 32 | MelBanks *this_mel_banks = nullptr; 33 | 34 | // std::map::iterator iter = mel_banks_.find(vtln_warp); 35 | auto iter = mel_banks_.find(vtln_warp); 36 | if (iter == mel_banks_.end()) { 37 | this_mel_banks = 38 | new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp, opts_.device); 39 | mel_banks_[vtln_warp] = this_mel_banks; 40 | } else { 41 | this_mel_banks = iter->second; 42 | } 43 | return this_mel_banks; 44 | } 45 | 46 | // ans.shape [signal_frame.size(0), this->Dim()] 47 | torch::Tensor FbankComputer::Compute(torch::Tensor signal_raw_log_energy, 48 | float vtln_warp, 49 | const torch::Tensor &signal_frame) { 50 | const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); 51 | 52 | KALDIFEAT_ASSERT(signal_frame.dim() == 2); 53 | 54 | KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize()); 55 | 56 | // torch.finfo(torch.float32).eps 57 | constexpr float kEps = 1.1920928955078125e-07f; 58 | 59 | // Compute energy after window function (not the raw one). 60 | if (opts_.use_energy && !opts_.raw_energy) { 61 | signal_raw_log_energy = 62 | torch::clamp_min(signal_frame.pow(2).sum(1), kEps).log(); 63 | } 64 | 65 | // note spectrum is in magnitude, not power, because of `abs()` 66 | #if defined(KALDIFEAT_HAS_FFT_NAMESPACE) 67 | // signal_frame shape: [x, 512] 68 | // spectrum shape [x, 257] 69 | torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs(); 70 | #else 71 | // signal_frame shape [x, 512] 72 | // real_imag shape [x, 257, 2], 73 | // where [..., 0] is the real part 74 | // [..., 1] is the imaginary part 75 | torch::Tensor real_imag = torch::rfft(signal_frame, 1); 76 | torch::Tensor real = real_imag.index({"...", 0}); 77 | torch::Tensor imag = real_imag.index({"...", 1}); 78 | torch::Tensor spectrum = (real.square() + imag.square()).sqrt(); 79 | #endif 80 | 81 | // remove the last column, i.e., the highest fft bin 82 | spectrum = spectrum.index( 83 | {"...", torch::indexing::Slice(0, -1, torch::indexing::None)}); 84 | 85 | // Use power instead of magnitude if requested. 86 | if (opts_.use_power) { 87 | spectrum = spectrum.pow(2); 88 | } 89 | 90 | torch::Tensor mel_energies = mel_banks.Compute(spectrum); 91 | if (opts_.use_log_fbank) { 92 | // Avoid log of zero (which should be prevented anyway by dithering). 93 | mel_energies = torch::clamp_min(mel_energies, kEps).log(); 94 | } 95 | 96 | // if use_energy is true, then we get an extra bin. That is, 97 | // if num_mel_bins is 23, the feature will contain 24 bins. 98 | // 99 | // if htk_compat is false, then the 0th bin is the log energy 100 | // if htk_compat is true, then the last bin is the log energy 101 | 102 | // Copy energy as first value (or the last, if htk_compat == true). 103 | if (opts_.use_energy) { 104 | if (opts_.energy_floor > 0.0f) { 105 | signal_raw_log_energy = 106 | torch::clamp_min(signal_raw_log_energy, log_energy_floor_); 107 | } 108 | 109 | signal_raw_log_energy.unsqueeze_(1); 110 | 111 | if (opts_.htk_compat) { 112 | mel_energies = torch::cat({mel_energies, signal_raw_log_energy}, 1); 113 | } else { 114 | mel_energies = torch::cat({signal_raw_log_energy, mel_energies}, 1); 115 | } 116 | } 117 | 118 | return mel_energies; 119 | } 120 | 121 | } // namespace kaldifeat 122 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-fbank.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-fbank.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-fbank.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_FBANK_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_FBANK_H_ 9 | 10 | #include 11 | #include 12 | 13 | #include "kaldifeat/csrc/feature-common.h" 14 | #include "kaldifeat/csrc/feature-window.h" 15 | #include "kaldifeat/csrc/mel-computations.h" 16 | 17 | namespace kaldifeat { 18 | 19 | struct FbankOptions { 20 | FrameExtractionOptions frame_opts; 21 | MelBanksOptions mel_opts; 22 | // append an extra dimension with energy to the filter banks 23 | bool use_energy = false; 24 | float energy_floor = 0.0f; // active iff use_energy==true 25 | 26 | // If true, compute log_energy before preemphasis and windowing 27 | // If false, compute log_energy after preemphasis ans windowing 28 | bool raw_energy = true; // active iff use_energy==true 29 | 30 | // If true, put energy last (if using energy) 31 | // If false, put energy first 32 | bool htk_compat = false; // active iff use_energy==true 33 | 34 | // if true (default), produce log-filterbank, else linear 35 | bool use_log_fbank = true; 36 | 37 | // if true (default), use power in filterbank 38 | // analysis, else magnitude. 39 | bool use_power = true; 40 | 41 | torch::Device device{"cpu"}; 42 | 43 | FbankOptions() { mel_opts.num_bins = 23; } 44 | 45 | std::string ToString() const { 46 | std::ostringstream os; 47 | os << "FbankOptions("; 48 | 49 | os << "frame_opts=" << frame_opts.ToString() << ", "; 50 | os << "mel_opts=" << mel_opts.ToString() << ", "; 51 | 52 | os << "use_energy=" << (use_energy ? "True" : "False") << ", "; 53 | os << "energy_floor=" << energy_floor << ", "; 54 | os << "raw_energy=" << (raw_energy ? "True" : "False") << ", "; 55 | os << "htk_compat=" << (htk_compat ? "True" : "False") << ", "; 56 | os << "use_log_fbank=" << (use_log_fbank ? "True" : "False") << ", "; 57 | os << "use_power=" << (use_power ? "True" : "False") << ", "; 58 | os << "device=\"" << device << "\")"; 59 | return os.str(); 60 | } 61 | }; 62 | 63 | std::ostream &operator<<(std::ostream &os, const FbankOptions &opts); 64 | 65 | class FbankComputer { 66 | public: 67 | using Options = FbankOptions; 68 | 69 | explicit FbankComputer(const FbankOptions &opts); 70 | ~FbankComputer(); 71 | 72 | FbankComputer &operator=(const FbankComputer &) = delete; 73 | FbankComputer(const FbankComputer &) = delete; 74 | 75 | int32_t Dim() const { 76 | return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0); 77 | } 78 | 79 | // if true, compute log_energy_pre_window but after dithering and dc removal 80 | bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } 81 | 82 | const FrameExtractionOptions &GetFrameOptions() const { 83 | return opts_.frame_opts; 84 | } 85 | 86 | const FbankOptions &GetOptions() const { return opts_; } 87 | 88 | // signal_raw_log_energy is log_energy_pre_window, which is not empty 89 | // iff NeedRawLogEnergy() returns true. 90 | torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp, 91 | const torch::Tensor &signal_frame); 92 | 93 | private: 94 | const MelBanks *GetMelBanks(float vtln_warp); 95 | 96 | FbankOptions opts_; 97 | float log_energy_floor_; 98 | std::map mel_banks_; // float is VTLN coefficient. 99 | }; 100 | 101 | using Fbank = OfflineFeatureTpl; 102 | 103 | } // namespace kaldifeat 104 | 105 | #endif // KALDIFEAT_CSRC_FEATURE_FBANK_H_ 106 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-functions.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-functions.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-functions.cc 6 | 7 | #include "kaldifeat/csrc/feature-functions.h" 8 | 9 | #include 10 | 11 | namespace kaldifeat { 12 | 13 | void InitIdftBases(int32_t n_bases, int32_t dimension, torch::Tensor *mat_out) { 14 | float angle = M_PI / (dimension - 1); 15 | float scale = 1.0f / (2 * (dimension - 1)); 16 | 17 | *mat_out = torch::empty({n_bases, dimension}, torch::kFloat); 18 | float *data = mat_out->data_ptr(); 19 | 20 | int32_t stride = mat_out->stride(0); 21 | 22 | for (int32_t i = 0; i < n_bases; ++i) { 23 | float *this_row = data + i * stride; 24 | this_row[0] = scale; 25 | for (int32_t j = 1; j < dimension - 1; ++j) { 26 | this_row[j] = 2 * scale * std::cos(angle * i * j); 27 | } 28 | 29 | this_row[dimension - 1] = scale * std::cos(angle * i * (dimension - 1)); 30 | } 31 | } 32 | 33 | } // namespace kaldifeat 34 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-functions.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-functions.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-functions.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_ 9 | 10 | #include "torch/script.h" 11 | 12 | namespace kaldifeat { 13 | 14 | void InitIdftBases(int32_t n_bases, int32_t dimension, torch::Tensor *mat_out); 15 | 16 | } 17 | 18 | #endif // KALDIFEAT_CSRC_FEATURE_FUNCTIONS_H_ 19 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-mfcc.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-mfcc.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-mfcc.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_MFCC_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_MFCC_H_ 9 | 10 | #include 11 | #include 12 | 13 | #include "kaldifeat/csrc/feature-common.h" 14 | #include "kaldifeat/csrc/feature-window.h" 15 | #include "kaldifeat/csrc/mel-computations.h" 16 | #include "torch/script.h" 17 | 18 | namespace kaldifeat { 19 | 20 | /// MfccOptions contains basic options for computing MFCC features. 21 | // (this class is copied from kaldi) 22 | struct MfccOptions { 23 | FrameExtractionOptions frame_opts; 24 | MelBanksOptions mel_opts; 25 | 26 | // Number of cepstra in MFCC computation (including C0) 27 | int32_t num_ceps = 13; 28 | 29 | // Use energy (not C0) in MFCC computation 30 | bool use_energy = true; 31 | 32 | // Floor on energy (absolute, not relative) in MFCC 33 | // computation. Only makes a difference if use_energy=true; 34 | // only necessary if dither=0.0. 35 | // Suggested values: 0.1 or 1.0 36 | float energy_floor = 0.0; 37 | 38 | // If true, compute energy before preemphasis and windowing 39 | bool raw_energy = true; 40 | 41 | // Constant that controls scaling of MFCCs 42 | float cepstral_lifter = 22.0; 43 | 44 | // If true, put energy or C0 last and use a factor of 45 | // sqrt(2) on C0. 46 | // Warning: not sufficient to get HTK compatible features 47 | // (need to change other parameters) 48 | bool htk_compat = false; 49 | 50 | torch::Device device{"cpu"}; 51 | 52 | MfccOptions() { mel_opts.num_bins = 23; } 53 | 54 | std::string ToString() const { 55 | std::ostringstream os; 56 | os << "MfccOptions("; 57 | os << "frame_opts=" << frame_opts.ToString() << ", "; 58 | os << "mel_opts=" << mel_opts.ToString() << ", "; 59 | 60 | os << "num_ceps=" << num_ceps << ", "; 61 | os << "use_energy=" << (use_energy ? "True" : "False") << ", "; 62 | os << "energy_floor=" << energy_floor << ", "; 63 | os << "raw_energy=" << (raw_energy ? "True" : "False") << ", "; 64 | os << "cepstral_lifter=" << cepstral_lifter << ", "; 65 | os << "htk_compat=" << (htk_compat ? "True" : "False") << ", "; 66 | os << "device=\"" << device << "\")"; 67 | 68 | return os.str(); 69 | } 70 | }; 71 | 72 | std::ostream &operator<<(std::ostream &os, const MfccOptions &opts); 73 | 74 | class MfccComputer { 75 | public: 76 | using Options = MfccOptions; 77 | 78 | explicit MfccComputer(const MfccOptions &opts); 79 | ~MfccComputer(); 80 | 81 | MfccComputer &operator=(const MfccComputer &) = delete; 82 | MfccComputer(const MfccComputer &) = delete; 83 | 84 | int32_t Dim() const { return opts_.num_ceps; } 85 | 86 | bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } 87 | 88 | const FrameExtractionOptions &GetFrameOptions() const { 89 | return opts_.frame_opts; 90 | } 91 | 92 | const MfccOptions &GetOptions() const { return opts_; } 93 | 94 | // signal_raw_log_energy is log_energy_pre_window, which is not empty 95 | // iff NeedRawLogEnergy() returns true. 96 | torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp, 97 | const torch::Tensor &signal_frame); 98 | 99 | private: 100 | const MelBanks *GetMelBanks(float vtln_warp); 101 | 102 | MfccOptions opts_; 103 | torch::Tensor lifter_coeffs_; // 1-D tensor 104 | 105 | // Note we save a transposed version of dct_matrix_ 106 | // dct_matrix_.rows is num_mel_bins 107 | // dct_matrix_.cols is num_ceps 108 | torch::Tensor dct_matrix_; // matrix we right-multiply by to perform DCT. 109 | float log_energy_floor_; 110 | std::map mel_banks_; // float is VTLN coefficient. 111 | }; 112 | 113 | using Mfcc = OfflineFeatureTpl; 114 | 115 | } // namespace kaldifeat 116 | 117 | #endif // KALDIFEAT_CSRC_FEATURE_MFCC_H_ 118 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-spectrogram.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-spectrogram.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-spectrogram.cc 6 | 7 | #include "kaldifeat/csrc/feature-spectrogram.h" 8 | 9 | namespace kaldifeat { 10 | 11 | std::ostream &operator<<(std::ostream &os, const SpectrogramOptions &opts) { 12 | os << opts.ToString(); 13 | return os; 14 | } 15 | 16 | SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts) 17 | : opts_(opts) { 18 | if (opts.energy_floor > 0.0) log_energy_floor_ = logf(opts.energy_floor); 19 | } 20 | 21 | // ans.shape [signal_frame.size(0), this->Dim()] 22 | torch::Tensor SpectrogramComputer::Compute(torch::Tensor signal_raw_log_energy, 23 | float vtln_warp, 24 | const torch::Tensor &signal_frame) { 25 | KALDIFEAT_ASSERT(signal_frame.dim() == 2); 26 | 27 | KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize()); 28 | 29 | // torch.finfo(torch.float32).eps 30 | constexpr float kEps = 1.1920928955078125e-07f; 31 | 32 | // Compute energy after window function (not the raw one). 33 | if (!opts_.raw_energy) { 34 | signal_raw_log_energy = 35 | torch::clamp_min(signal_frame.pow(2).sum(1), kEps).log(); 36 | } 37 | 38 | // note spectrum is in magnitude, not power, because of `abs()` 39 | #if defined(KALDIFEAT_HAS_FFT_NAMESPACE) 40 | // signal_frame shape: [x, 512] 41 | // spectrum shape [x, 257 42 | torch::Tensor spectrum = torch::fft::rfft(signal_frame).abs(); 43 | #else 44 | // signal_frame shape [x, 512] 45 | // real_imag shape [x, 257, 2], 46 | // where [..., 0] is the real part 47 | // [..., 1] is the imaginary part 48 | torch::Tensor real_imag = torch::rfft(signal_frame, 1); 49 | torch::Tensor real = real_imag.index({"...", 0}); 50 | torch::Tensor imag = real_imag.index({"...", 1}); 51 | torch::Tensor spectrum = (real.square() + imag.square()).sqrt(); 52 | #endif 53 | 54 | if (opts_.return_raw_fft) { 55 | KALDIFEAT_ERR << "return raw fft is not supported yet"; 56 | } 57 | 58 | // compute power spectrum 59 | spectrum = spectrum.pow(2); 60 | 61 | // NOTE: take the log 62 | spectrum = torch::clamp_min(spectrum, kEps).log(); 63 | 64 | if (opts_.energy_floor > 0.0f) { 65 | signal_raw_log_energy = 66 | torch::clamp_min(signal_raw_log_energy, log_energy_floor_); 67 | } 68 | 69 | // The zeroth spectrogram component is always set to the signal energy, 70 | // instead of the square of the constant component of the signal. 71 | // 72 | // spectrum[:,0] = signal_raw_log_energy 73 | spectrum.index({"...", 0}) = signal_raw_log_energy; 74 | 75 | return spectrum; 76 | } 77 | 78 | } // namespace kaldifeat 79 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-spectrogram.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-spectrogram.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/feat/feature-spectrogram.h 6 | 7 | #ifndef KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_ 8 | #define KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_ 9 | 10 | #include 11 | 12 | #include "kaldifeat/csrc/feature-common.h" 13 | #include "kaldifeat/csrc/feature-window.h" 14 | #include "torch/script.h" 15 | 16 | namespace kaldifeat { 17 | 18 | struct SpectrogramOptions { 19 | FrameExtractionOptions frame_opts; 20 | 21 | // Floor on energy (absolute, not relative) in Spectrogram 22 | // computation. Caution: this floor is applied to the 23 | // zeroth component, representing the total signal energy. 24 | // The floor on the individual spectrogram elements is fixed at 25 | // std::numeric_limits::epsilon() 26 | float energy_floor = 0.0; 27 | 28 | // If true, compute energy before preemphasis and windowing 29 | bool raw_energy = true; 30 | 31 | // If true, return raw FFT complex numbers instead of log magnitudes 32 | // Not implemented yet 33 | bool return_raw_fft = false; 34 | 35 | torch::Device device{"cpu"}; 36 | 37 | std::string ToString() const { 38 | std::ostringstream os; 39 | os << "SpectrogramOptions("; 40 | os << "frame_opts=" << frame_opts.ToString() << ", "; 41 | os << "energy_floor=" << energy_floor << ", "; 42 | os << "raw_energy=" << (raw_energy ? "True" : "False") << ", "; 43 | os << "return_raw_fft=" << (return_raw_fft ? "True" : "False") << ", "; 44 | os << "device=\"" << device << "\")"; 45 | return os.str(); 46 | } 47 | }; 48 | 49 | std::ostream &operator<<(std::ostream &os, const SpectrogramOptions &opts); 50 | 51 | class SpectrogramComputer { 52 | public: 53 | using Options = SpectrogramOptions; 54 | 55 | explicit SpectrogramComputer(const SpectrogramOptions &opts); 56 | 57 | ~SpectrogramComputer() = default; 58 | 59 | const FrameExtractionOptions &GetFrameOptions() const { 60 | return opts_.frame_opts; 61 | } 62 | 63 | const SpectrogramOptions &GetOptions() const { return opts_; } 64 | 65 | int32_t Dim() const { 66 | if (opts_.return_raw_fft) { 67 | return opts_.frame_opts.PaddedWindowSize(); 68 | } else { 69 | return opts_.frame_opts.PaddedWindowSize() / 2 + 1; 70 | } 71 | } 72 | 73 | bool NeedRawLogEnergy() const { return opts_.raw_energy; } 74 | 75 | // signal_raw_log_energy is log_energy_pre_window, which is not empty 76 | // iff NeedRawLogEnergy() returns true. 77 | // 78 | // vtln_warp is ignored by this function, it's only 79 | // needed for interface compatibility. 80 | torch::Tensor Compute(torch::Tensor signal_raw_log_energy, float vtln_warp, 81 | const torch::Tensor &signal_frame); 82 | 83 | private: 84 | SpectrogramOptions opts_; 85 | float log_energy_floor_; 86 | }; 87 | 88 | using Spectrogram = OfflineFeatureTpl; 89 | 90 | } // namespace kaldifeat 91 | 92 | #endif // KALDIFEAT_CSRC_FEATURE_SPECTROGRAM_H_ 93 | -------------------------------------------------------------------------------- /kaldifeat/csrc/feature-window-test.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/feature-window-test.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/csrc/feature-window.h" 6 | 7 | #include "gtest/gtest.h" 8 | 9 | namespace kaldifeat { 10 | 11 | TEST(FeatureWindow, NumFrames) { 12 | FrameExtractionOptions opts; 13 | opts.samp_freq = 1000; 14 | opts.frame_length_ms = 4; 15 | opts.frame_shift_ms = 2; 16 | 17 | int32_t frame_length = opts.samp_freq / 1000 * opts.frame_length_ms; 18 | int32_t frame_shift = opts.samp_freq / 1000 * opts.frame_shift_ms; 19 | 20 | for (int32_t num_samples = 10; num_samples < 1000; ++num_samples) { 21 | opts.snip_edges = true; 22 | int32_t num_frames = NumFrames(num_samples, opts); 23 | int32_t expected_num_frames = 24 | (num_samples - frame_length) / frame_shift + 1; 25 | ASSERT_EQ(num_frames, expected_num_frames); 26 | 27 | opts.snip_edges = false; 28 | num_frames = NumFrames(num_samples, opts); 29 | expected_num_frames = (num_samples + frame_shift / 2) / frame_shift; 30 | ASSERT_EQ(num_frames, expected_num_frames); 31 | } 32 | } 33 | 34 | TEST(FeatureWindow, FirstSampleOfFrame) { 35 | FrameExtractionOptions opts; 36 | opts.samp_freq = 1000; 37 | opts.frame_length_ms = 4; 38 | opts.frame_shift_ms = 2; 39 | 40 | // samples: [a, b, c, d, e, f] 41 | int32_t num_samples = 6; 42 | opts.snip_edges = true; 43 | ASSERT_EQ(NumFrames(num_samples, opts), 2); 44 | EXPECT_EQ(FirstSampleOfFrame(0, opts), 0); 45 | EXPECT_EQ(FirstSampleOfFrame(1, opts), 2); 46 | 47 | // now for snip edges if false 48 | opts.snip_edges = false; 49 | ASSERT_EQ(NumFrames(num_samples, opts), 3); 50 | EXPECT_EQ(FirstSampleOfFrame(0, opts), -1); 51 | EXPECT_EQ(FirstSampleOfFrame(1, opts), 1); 52 | EXPECT_EQ(FirstSampleOfFrame(2, opts), 3); 53 | } 54 | 55 | TEST(FeatureWindow, GetStrided) { 56 | FrameExtractionOptions opts; 57 | opts.samp_freq = 1000; 58 | opts.frame_length_ms = 4; 59 | opts.frame_shift_ms = 2; 60 | 61 | // [0 1 2 3 4 5] 62 | torch::Tensor samples = torch::arange(0, 6).to(torch::kFloat); 63 | opts.snip_edges = true; 64 | auto frames = GetStrided(samples, opts); 65 | // 0 1 2 3 66 | // 2 3 4 5 67 | std::vector v = {0, 1, 2, 3, 2, 3, 4, 5}; 68 | torch::Tensor expected = 69 | torch::from_blob(v.data(), {int64_t(v.size())}, torch::kFloat32); 70 | EXPECT_TRUE(frames.flatten().allclose(expected)); 71 | 72 | // 0 0 1 2 73 | // 1 2 3 4 74 | // 3 4 5 5 75 | opts.snip_edges = false; 76 | frames = GetStrided(samples, opts); 77 | v = {0, 0, 1, 2, 1, 2, 3, 4, 3, 4, 5, 5}; 78 | expected = torch::from_blob(v.data(), {int64_t(v.size())}, torch::kFloat32); 79 | EXPECT_TRUE(frames.flatten().allclose(expected)); 80 | } 81 | 82 | } // namespace kaldifeat 83 | -------------------------------------------------------------------------------- /kaldifeat/csrc/generate-whisper-melbank-v3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import librosa 6 | import numpy as np 7 | 8 | 9 | def main(): 10 | m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=128) 11 | assert m.shape == (128, 201) 12 | s = "// Auto-generated. Do NOT edit!\n\n" 13 | s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n" 14 | s += "\n" 15 | s += "#ifndef KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n" 16 | s += "#define KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n" 17 | s += "namespace kaldifeat {\n\n" 18 | s += f"constexpr int32_t kWhisperV3MelRows = {m.shape[0]};\n" 19 | s += f"constexpr int32_t kWhisperV3MelCols = {m.shape[1]};\n" 20 | s += "\n" 21 | s += "constexpr float kWhisperV3MelArray[] = {\n" 22 | sep = "" 23 | for i, f in enumerate(m.reshape(-1).tolist()): 24 | s += f"{sep}{f:.8f}" 25 | sep = ", " 26 | if i and i % 7 == 0: 27 | s += ",\n" 28 | sep = "" 29 | 30 | s += "};\n\n" 31 | s += "} // namespace kaldifeat\n\n" 32 | s += "#endif // KALDIFEAT_CSRC_WHISPER_V3_MEL_BANK_H_\n" 33 | 34 | with open("whisper-v3-mel-bank.h", "w") as f: 35 | f.write(s) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /kaldifeat/csrc/generate-whisper-melbank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import librosa 6 | import numpy as np 7 | 8 | 9 | def main(): 10 | m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=80) 11 | assert m.shape == (80, 201) 12 | s = "// Auto-generated. Do NOT edit!\n\n" 13 | s += "// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)\n\n" 14 | s += "\n" 15 | s += "#ifndef KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n" 16 | s += "#define KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n" 17 | s += "namespace kaldifeat {\n\n" 18 | s += f"constexpr int32_t kWhisperMelRows = {m.shape[0]};\n" 19 | s += f"constexpr int32_t kWhisperMelCols = {m.shape[1]};\n" 20 | s += "\n" 21 | s += "constexpr float kWhisperMelArray[] = {\n" 22 | sep = "" 23 | for i, f in enumerate(m.reshape(-1).tolist()): 24 | s += f"{sep}{f:.8f}" 25 | sep = ", " 26 | if i and i % 7 == 0: 27 | s += ",\n" 28 | sep = "" 29 | 30 | s += "};\n\n" 31 | s += "} // namespace kaldifeat\n\n" 32 | s += "#endif // KALDIFEAT_CSRC_WHISPER_MEL_BANK_H_\n" 33 | 34 | with open("whisper-mel-bank.h", "w") as f: 35 | f.write(s) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /kaldifeat/csrc/log.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/log.h 2 | // 3 | // Copyright (c) 2020 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_CSRC_LOG_H_ 6 | #define KALDIFEAT_CSRC_LOG_H_ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace kaldifeat { 14 | 15 | enum class LogLevel { 16 | kInfo = 0, 17 | kWarn = 1, 18 | kError = 2, // abort the program 19 | }; 20 | 21 | class Logger { 22 | public: 23 | Logger(const char *filename, const char *func_name, uint32_t line_num, 24 | LogLevel level) 25 | : filename_(filename), 26 | func_name_(func_name), 27 | line_num_(line_num), 28 | level_(level) { 29 | os_ << filename << ":" << func_name << ":" << line_num << "\n"; 30 | switch (level_) { 31 | case LogLevel::kInfo: 32 | os_ << "[I] "; 33 | break; 34 | case LogLevel::kWarn: 35 | os_ << "[W] "; 36 | break; 37 | case LogLevel::kError: 38 | os_ << "[E] "; 39 | break; 40 | } 41 | } 42 | 43 | template 44 | Logger &operator<<(const T &val) { 45 | os_ << val; 46 | return *this; 47 | } 48 | 49 | ~Logger() { 50 | std::cerr << os_.str() << "\n"; 51 | if (level_ == LogLevel::kError) abort(); 52 | } 53 | 54 | private: 55 | std::ostringstream os_; 56 | const char *filename_; 57 | const char *func_name_; 58 | uint32_t line_num_; 59 | LogLevel level_; 60 | }; 61 | 62 | class Voidifier { 63 | public: 64 | void operator&(const Logger &)const {} 65 | }; 66 | 67 | #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \ 68 | defined(__PRETTY_FUNCTION__) 69 | // for clang and GCC 70 | #define KALDIFEAT_FUNC __PRETTY_FUNCTION__ 71 | #else 72 | // for other compilers 73 | #define KALDIFEAT_FUNC __func__ 74 | #endif 75 | 76 | #define KALDIFEAT_LOG \ 77 | kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ 78 | kaldifeat::LogLevel::kInfo) 79 | 80 | #define KALDIFEAT_WARN \ 81 | kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ 82 | kaldifeat::LogLevel::kWarn) 83 | 84 | #define KALDIFEAT_ERR \ 85 | kaldifeat::Logger(__FILE__, KALDIFEAT_FUNC, __LINE__, \ 86 | kaldifeat::LogLevel::kError) 87 | 88 | #define KALDIFEAT_ASSERT(x) \ 89 | (x) ? (void)0 \ 90 | : kaldifeat::Voidifier() & KALDIFEAT_ERR << "Check failed!\n" \ 91 | << "x: " << #x 92 | 93 | } // namespace kaldifeat 94 | 95 | #endif // KALDIFEAT_CSRC_LOG_H_ 96 | -------------------------------------------------------------------------------- /kaldifeat/csrc/matrix-functions.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/matrix-functions.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/matrix/matrix-functions.cc 6 | 7 | #include "kaldifeat/csrc/matrix-functions.h" 8 | 9 | #include 10 | 11 | #include "kaldifeat/csrc/log.h" 12 | 13 | namespace kaldifeat { 14 | 15 | void ComputeDctMatrix(torch::Tensor *mat) { 16 | KALDIFEAT_ASSERT(mat->dim() == 2); 17 | 18 | int32_t num_rows = mat->size(0); 19 | int32_t num_cols = mat->size(1); 20 | 21 | KALDIFEAT_ASSERT(num_rows == num_cols); 22 | KALDIFEAT_ASSERT(num_rows > 0); 23 | 24 | int32_t stride = mat->stride(0); 25 | 26 | // normalizer for X_0 27 | float normalizer = std::sqrt(1.0f / num_cols); 28 | 29 | // mat[0, :] = normalizer 30 | mat->index({0, "..."}) = normalizer; 31 | 32 | // normalizer for other elements 33 | normalizer = std::sqrt(2.0f / num_cols); 34 | 35 | float *data = mat->data_ptr(); 36 | for (int32_t r = 1; r < num_rows; ++r) { 37 | float *this_row = data + r * stride; 38 | for (int32_t c = 0; c < num_cols; ++c) { 39 | float v = std::cos(static_cast(M_PI) / num_cols * (c + 0.5) * r); 40 | this_row[c] = normalizer * v; 41 | } 42 | } 43 | } 44 | 45 | } // namespace kaldifeat 46 | -------------------------------------------------------------------------------- /kaldifeat/csrc/matrix-functions.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/matrix-functions.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/matrix/matrix-functions.h 6 | 7 | #ifndef KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_ 8 | #define KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_ 9 | 10 | #include "torch/script.h" 11 | 12 | namespace kaldifeat { 13 | 14 | /// ComputeDctMatrix computes a matrix corresponding to the DCT, such that 15 | /// M * v equals the DCT of vector v. M must be square at input. 16 | /// This is the type = II DCT with normalization, corresponding to the 17 | /// following equations, where x is the signal and X is the DCT: 18 | /// X_0 = sqrt(1/N) \sum_{n = 0}^{N-1} x_n 19 | /// X_k = sqrt(2/N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k ) 20 | /// See also 21 | /// https://docs.scipy.org/doc/scipy/reference/generated/scipy.fftpack.dct.html 22 | void ComputeDctMatrix(torch::Tensor *M); 23 | 24 | } // namespace kaldifeat 25 | 26 | #endif // KALDIFEAT_CSRC_MATRIX_FUNCTIONS_H_ 27 | -------------------------------------------------------------------------------- /kaldifeat/csrc/online-feature-itf.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/online-feature-itf.h 2 | // 3 | // Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | // This file is copied/modified from kaldi/src/itf/online-feature-itf.h 6 | 7 | #ifndef KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_ 8 | #define KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_ 9 | 10 | #include 11 | #include 12 | 13 | #include "torch/script.h" 14 | 15 | namespace kaldifeat { 16 | 17 | class OnlineFeatureInterface { 18 | public: 19 | virtual ~OnlineFeatureInterface() = default; 20 | 21 | virtual int32_t Dim() const = 0; /// returns the feature dimension. 22 | // 23 | // Returns frame shift in seconds. Helps to estimate duration from frame 24 | // counts. 25 | virtual float FrameShiftInSeconds() const = 0; 26 | 27 | /// Returns the total number of frames, since the start of the utterance, that 28 | /// are now available. In an online-decoding context, this will likely 29 | /// increase with time as more data becomes available. 30 | virtual int32_t NumFramesReady() const = 0; 31 | 32 | /// Returns true if this is the last frame. Frame indices are zero-based, so 33 | /// the first frame is zero. IsLastFrame(-1) will return false, unless the 34 | /// file is empty (which is a case that I'm not sure all the code will handle, 35 | /// so be careful). This function may return false for some frame if we 36 | /// haven't yet decided to terminate decoding, but later true if we decide to 37 | /// terminate decoding. This function exists mainly to correctly handle end 38 | /// effects in feature extraction, and is not a mechanism to determine how 39 | /// many frames are in the decodable object (as it used to be, and for 40 | /// backward compatibility, still is, in the Decodable interface). 41 | virtual bool IsLastFrame(int32_t frame) const = 0; 42 | 43 | /// Gets the feature vector for this frame. Before calling this for a given 44 | /// frame, it is assumed that you called NumFramesReady() and it returned a 45 | /// number greater than "frame". Otherwise this call will likely crash with 46 | /// an assert failure. This function is not declared const, in case there is 47 | /// some kind of caching going on, but most of the time it shouldn't modify 48 | /// the class. 49 | /// 50 | /// The returned tensor has shape (1, Dim()). 51 | virtual torch::Tensor GetFrame(int32_t frame) = 0; 52 | 53 | /// This is like GetFrame() but for a collection of frames. There is a 54 | /// default implementation that just gets the frames one by one, but it 55 | /// may be overridden for efficiency by child classes (since sometimes 56 | /// it's more efficient to do things in a batch). 57 | /// 58 | /// The returned tensor has shape (frames.size(), Dim()). 59 | virtual std::vector GetFrames( 60 | const std::vector &frames) { 61 | std::vector features; 62 | features.reserve(frames.size()); 63 | 64 | for (auto i : frames) { 65 | torch::Tensor f = GetFrame(i); 66 | features.push_back(std::move(f)); 67 | } 68 | return features; 69 | #if 0 70 | return torch::cat(features, /*dim*/ 0); 71 | #endif 72 | } 73 | 74 | /// This would be called from the application, when you get more wave data. 75 | /// Note: the sampling_rate is typically only provided so the code can assert 76 | /// that it matches the sampling rate expected in the options. 77 | virtual void AcceptWaveform(float sampling_rate, 78 | const torch::Tensor &waveform) = 0; 79 | 80 | /// InputFinished() tells the class you won't be providing any 81 | /// more waveform. This will help flush out the last few frames 82 | /// of delta or LDA features (it will typically affect the return value 83 | /// of IsLastFrame. 84 | virtual void InputFinished() = 0; 85 | }; 86 | 87 | } // namespace kaldifeat 88 | 89 | #endif // KALDIFEAT_CSRC_ONLINE_FEATURE_ITF_H_ 90 | -------------------------------------------------------------------------------- /kaldifeat/csrc/online-feature-test.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/online-feature-test.h 2 | // 3 | // Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/csrc/online-feature.h" 6 | 7 | #include "gtest/gtest.h" 8 | 9 | namespace kaldifeat { 10 | 11 | TEST(RecyclingVector, TestUnlimited) { 12 | RecyclingVector v(-1); 13 | constexpr int32_t N = 100; 14 | for (int32_t i = 0; i != N; ++i) { 15 | torch::Tensor t = torch::tensor({i, i + 1, i + 2}); 16 | v.PushBack(t); 17 | } 18 | ASSERT_EQ(v.Size(), N); 19 | 20 | for (int32_t i = 0; i != N; ++i) { 21 | torch::Tensor t = v.At(i); 22 | torch::Tensor expected = torch::tensor({i, i + 1, i + 2}); 23 | EXPECT_TRUE(t.equal(expected)); 24 | } 25 | } 26 | 27 | TEST(RecyclingVector, Testlimited) { 28 | constexpr int32_t K = 3; 29 | constexpr int32_t N = 10; 30 | RecyclingVector v(K); 31 | for (int32_t i = 0; i != N; ++i) { 32 | torch::Tensor t = torch::tensor({i, i + 1, i + 2}); 33 | v.PushBack(t); 34 | } 35 | 36 | ASSERT_EQ(v.Size(), N); 37 | 38 | for (int32_t i = 0; i < N - K; ++i) { 39 | ASSERT_DEATH(v.At(i), ""); 40 | } 41 | 42 | for (int32_t i = N - K; i != N; ++i) { 43 | torch::Tensor t = v.At(i); 44 | torch::Tensor expected = torch::tensor({i, i + 1, i + 2}); 45 | EXPECT_TRUE(t.equal(expected)); 46 | } 47 | } 48 | 49 | } // namespace kaldifeat 50 | -------------------------------------------------------------------------------- /kaldifeat/csrc/test_kaldifeat.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/csrc/test_kaldifeat.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "torch/all.h" 6 | #include "torch/script.h" 7 | 8 | static void TestPreemph() { 9 | torch::Tensor a = torch::arange(0, 12).reshape({3, 4}).to(torch::kFloat); 10 | 11 | torch::Tensor b = 12 | a.index({"...", torch::indexing::Slice(1, torch::indexing::None, 13 | torch::indexing::None)}); 14 | 15 | torch::Tensor c = a.index({"...", torch::indexing::Slice(0, -1, 1)}); 16 | 17 | a.index({"...", torch::indexing::Slice(1, torch::indexing::None, 18 | torch::indexing::None)}) = 19 | b - 0.97 * c; 20 | 21 | a.index({"...", 0}) *= 0.97; 22 | 23 | std::cout << a << "\n"; 24 | std::cout << b << "\n"; 25 | std::cout << "c: \n" << c << "\n"; 26 | torch::Tensor d = b - 0.97 * c; 27 | std::cout << d << "\n"; 28 | } 29 | 30 | static void TestPad() { 31 | torch::Tensor a = torch::arange(0, 6).reshape({2, 3}).to(torch::kFloat); 32 | torch::Tensor b = torch::nn::functional::pad( 33 | a, torch::nn::functional::PadFuncOptions({0, 3}) 34 | .mode(torch::kConstant) 35 | .value(0)); 36 | std::cout << a << "\n"; 37 | std::cout << b << "\n"; 38 | } 39 | 40 | static void TestGetStrided() { 41 | // 0 1 2 3 4 5 42 | // 43 | // 44 | // 0 1 2 3 45 | // 2 3 4 5 46 | 47 | torch::Tensor a = torch::arange(0, 6).to(torch::kFloat); 48 | torch::Tensor b = a.as_strided({2, 4}, {2, 1}); 49 | // b = b.clone(); 50 | std::cout << a << "\n"; 51 | std::cout << b << "\n"; 52 | std::cout << b.mean(1).unsqueeze(1) << "\n"; 53 | b = b - b.mean(1).unsqueeze(1); 54 | std::cout << a << "\n"; 55 | std::cout << b << "\n"; 56 | } 57 | 58 | static void TestDither() { 59 | torch::Tensor a = torch::arange(0, 6).reshape({2, 3}).to(torch::kFloat); 60 | torch::Tensor b = torch::arange(0, 6).reshape({2, 3}).to(torch::kFloat) * 0.1; 61 | std::cout << a << "\n"; 62 | std::cout << b << "\n"; 63 | std::cout << (a + b * 2) << "\n"; 64 | } 65 | 66 | static void TestCat() { 67 | torch::Tensor a = torch::arange(0, 6).reshape({2, 3}).to(torch::kFloat); 68 | torch::Tensor b = torch::arange(0, 2).reshape({2, 1}).to(torch::kFloat) * 0.1; 69 | torch::Tensor c = torch::cat({a, b}, 1); 70 | torch::Tensor d = torch::cat({b, a}, 1); 71 | torch::Tensor e = torch::cat({a, a}, 0); 72 | std::cout << a << "\n"; 73 | std::cout << b << "\n"; 74 | std::cout << c << "\n"; 75 | std::cout << d << "\n"; 76 | std::cout << e << "\n"; 77 | } 78 | 79 | int main() { 80 | TestCat(); 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /kaldifeat/csrc/whisper-fbank.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 3 | * 4 | * See LICENSE for clarification regarding multiple authors 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #include "kaldifeat/csrc/whisper-fbank.h" 20 | 21 | #include 22 | #include 23 | 24 | #include "kaldifeat/csrc/mel-computations.h" 25 | #include "kaldifeat/csrc/whisper-mel-bank.h" 26 | #include "kaldifeat/csrc/whisper-v3-mel-bank.h" 27 | 28 | #ifndef M_2PI 29 | #define M_2PI 6.283185307179586476925286766559005 30 | #endif 31 | 32 | namespace kaldifeat { 33 | 34 | WhisperFbankComputer::WhisperFbankComputer(const WhisperFbankOptions &opts) 35 | : opts_(opts) { 36 | if (opts.num_mels == 80) { 37 | mel_banks_ = std::make_unique(kWhisperMelArray, kWhisperMelRows, 38 | kWhisperMelCols, opts.device); 39 | } else if (opts.num_mels == 128) { 40 | mel_banks_ = std::make_unique( 41 | kWhisperV3MelArray, kWhisperV3MelRows, kWhisperV3MelCols, opts.device); 42 | } else { 43 | KALDIFEAT_ERR << "Unsupported num_mels: " << opts.num_mels 44 | << ". Support only 80 and 128"; 45 | } 46 | 47 | opts_.frame_opts.samp_freq = 16000; 48 | opts_.frame_opts.frame_shift_ms = 10; 49 | opts_.frame_opts.frame_length_ms = 25; 50 | opts_.frame_opts.dither = 0; 51 | opts_.frame_opts.preemph_coeff = 0; 52 | opts_.frame_opts.remove_dc_offset = false; 53 | opts_.frame_opts.window_type = "hann"; 54 | opts_.frame_opts.round_to_power_of_two = false; 55 | opts_.frame_opts.snip_edges = false; 56 | } 57 | 58 | torch::Tensor WhisperFbankComputer::Compute( 59 | torch::Tensor /*signal_raw_log_energy*/, float /*vtln_warp*/, 60 | const torch::Tensor &signal_frame) { 61 | KALDIFEAT_ASSERT(signal_frame.dim() == 2); 62 | KALDIFEAT_ASSERT(signal_frame.size(1) == opts_.frame_opts.PaddedWindowSize()); 63 | 64 | // note spectrum is in magnitude, not power, because of `abs()` 65 | #if defined(KALDIFEAT_HAS_FFT_NAMESPACE) 66 | // signal_frame shape: [x, 512] 67 | // power shape [x, 257] 68 | torch::Tensor power = torch::fft::rfft(signal_frame).abs().pow(2); 69 | #else 70 | // signal_frame shape [x, 512] 71 | // real_imag shape [x, 257, 2], 72 | // where [..., 0] is the real part 73 | // [..., 1] is the imaginary part 74 | torch::Tensor real_imag = torch::rfft(signal_frame, 1); 75 | torch::Tensor real = real_imag.index({"...", 0}); 76 | torch::Tensor imag = real_imag.index({"...", 1}); 77 | torch::Tensor power = (real.square() + imag.square()); 78 | #endif 79 | 80 | torch::Tensor mel_energies = mel_banks_->Compute(power); 81 | torch::Tensor log_spec = torch::clamp_min(mel_energies, 1e-10).log10(); 82 | log_spec = torch::maximum(log_spec, log_spec.max() - 8.0); 83 | torch::Tensor mel = (log_spec + 4.0) / 4.0; 84 | 85 | return mel; 86 | } 87 | 88 | } // namespace kaldifeat 89 | -------------------------------------------------------------------------------- /kaldifeat/csrc/whisper-fbank.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 3 | * 4 | * See LICENSE for clarification regarding multiple authors 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | #ifndef KALDIFEAT_CSRC_WHISPER_FBANK_H_ 20 | #define KALDIFEAT_CSRC_WHISPER_FBANK_H_ 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include "kaldifeat/csrc/feature-common.h" 27 | #include "kaldifeat/csrc/feature-window.h" 28 | #include "kaldifeat/csrc/mel-computations.h" 29 | 30 | namespace kaldifeat { 31 | 32 | struct WhisperFbankOptions { 33 | FrameExtractionOptions frame_opts; 34 | // for large v3, please use 128 35 | int32_t num_mels = 80; 36 | 37 | torch::Device device{"cpu"}; 38 | std::string ToString() const { 39 | std::ostringstream os; 40 | os << "WhisperFbankOptions("; 41 | os << "frame_opts=" << frame_opts.ToString() << ", "; 42 | os << "num_mels=" << num_mels << ", "; 43 | os << "device=\"" << device << "\")"; 44 | return os.str(); 45 | } 46 | }; 47 | 48 | class WhisperFbankComputer { 49 | public: 50 | // note: Only frame_opts.device is used. All other fields from frame_opts 51 | // are ignored 52 | explicit WhisperFbankComputer(const WhisperFbankOptions &opts = {}); 53 | 54 | int32_t Dim() const { return opts_.num_mels; } 55 | 56 | const FrameExtractionOptions &GetFrameOptions() const { 57 | return opts_.frame_opts; 58 | } 59 | 60 | const WhisperFbankOptions &GetOptions() const { return opts_; } 61 | 62 | torch::Tensor Compute(torch::Tensor /*signal_raw_log_energy*/, 63 | float /*vtln_warp*/, const torch::Tensor &signal_frame); 64 | 65 | // if true, compute log_energy_pre_window but after dithering and dc removal 66 | bool NeedRawLogEnergy() const { return false; } 67 | using Options = WhisperFbankOptions; 68 | 69 | private: 70 | WhisperFbankOptions opts_; 71 | std::unique_ptr mel_banks_; 72 | }; 73 | 74 | using WhisperFbank = OfflineFeatureTpl; 75 | 76 | } // namespace kaldifeat 77 | 78 | #endif // KALDIFEAT_CSRC_WHISPER_FBANK_H_ 79 | -------------------------------------------------------------------------------- /kaldifeat/python/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(csrc) 2 | 3 | if(kaldifeat_BUILD_TESTS) 4 | add_subdirectory(tests) 5 | endif() 6 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_definitions(-DTORCH_API_INCLUDE_EXTENSION_H) 2 | pybind11_add_module(_kaldifeat 3 | feature-fbank.cc 4 | feature-mfcc.cc 5 | feature-plp.cc 6 | feature-spectrogram.cc 7 | feature-window.cc 8 | kaldifeat.cc 9 | mel-computations.cc 10 | online-feature.cc 11 | utils.cc 12 | whisper-fbank.cc 13 | ) 14 | 15 | if(APPLE) 16 | execute_process( 17 | COMMAND "${PYTHON_EXECUTABLE}" -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())" 18 | OUTPUT_STRIP_TRAILING_WHITESPACE 19 | OUTPUT_VARIABLE PYTHON_SITE_PACKAGE_DIR 20 | ) 21 | message(STATUS "PYTHON_SITE_PACKAGE_DIR: ${PYTHON_SITE_PACKAGE_DIR}") 22 | target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${PYTHON_SITE_PACKAGE_DIR}") 23 | endif() 24 | 25 | if(NOT WIN32) 26 | target_link_libraries(_kaldifeat PRIVATE "-Wl,-rpath,${kaldifeat_rpath_origin}/kaldifeat/${CMAKE_INSTALL_LIBDIR}") 27 | endif() 28 | 29 | target_link_libraries(_kaldifeat PRIVATE kaldifeat_core) 30 | if(UNIX AND NOT APPLE) 31 | target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/libtorch_python.so) 32 | # target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARY}) 33 | elseif(WIN32) 34 | target_link_libraries(_kaldifeat PUBLIC ${TORCH_DIR}/lib/torch_python.lib) 35 | # target_link_libraries(_kaldifeat PUBLIC ${PYTHON_LIBRARIES}) 36 | endif() 37 | 38 | install(TARGETS _kaldifeat 39 | DESTINATION ../ 40 | ) 41 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | filter=-runtime/references 2 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-fbank.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-fbank.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/feature-fbank.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/feature-fbank.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | static void PybindFbankOptions(py::module &m) { 16 | using PyClass = FbankOptions; 17 | py::class_(m, "FbankOptions") 18 | .def(py::init<>()) 19 | .def(py::init([](const MelBanksOptions &mel_opts, 20 | const FrameExtractionOptions &frame_opts = 21 | FrameExtractionOptions(), 22 | bool use_energy = false, float energy_floor = 0.0f, 23 | bool raw_energy = true, bool htk_compat = false, 24 | bool use_log_fbank = true, bool use_power = true, 25 | py::object device = 26 | py::str("cpu")) -> std::unique_ptr { 27 | auto opts = std::make_unique(); 28 | opts->frame_opts = frame_opts; 29 | opts->mel_opts = mel_opts; 30 | opts->use_energy = use_energy; 31 | opts->energy_floor = energy_floor; 32 | opts->raw_energy = raw_energy; 33 | opts->htk_compat = htk_compat; 34 | opts->use_log_fbank = use_log_fbank; 35 | opts->use_power = use_power; 36 | 37 | std::string s = static_cast(device); 38 | opts->device = torch::Device(s); 39 | 40 | return opts; 41 | }), 42 | py::arg("mel_opts"), 43 | py::arg("frame_opts") = FrameExtractionOptions(), 44 | py::arg("use_energy") = false, py::arg("energy_floor") = 0.0f, 45 | py::arg("raw_energy") = true, py::arg("htk_compat") = false, 46 | py::arg("use_log_fbank") = true, py::arg("use_power") = true, 47 | py::arg("device") = py::str("cpu")) 48 | .def_readwrite("frame_opts", &PyClass::frame_opts) 49 | .def_readwrite("mel_opts", &PyClass::mel_opts) 50 | .def_readwrite("use_energy", &PyClass::use_energy) 51 | .def_readwrite("energy_floor", &PyClass::energy_floor) 52 | .def_readwrite("raw_energy", &PyClass::raw_energy) 53 | .def_readwrite("htk_compat", &PyClass::htk_compat) 54 | .def_readwrite("use_log_fbank", &PyClass::use_log_fbank) 55 | .def_readwrite("use_power", &PyClass::use_power) 56 | .def_property( 57 | "device", 58 | [](const PyClass &self) -> py::object { 59 | py::object ans = py::module_::import("torch").attr("device"); 60 | return ans(self.device.str()); 61 | }, 62 | [](PyClass &self, py::object obj) -> void { 63 | std::string s = static_cast(obj); 64 | self.device = torch::Device(s); 65 | }) 66 | .def("__str__", 67 | [](const PyClass &self) -> std::string { return self.ToString(); }) 68 | .def("as_dict", 69 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 70 | .def_static( 71 | "from_dict", 72 | [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); }) 73 | .def(py::pickle( 74 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 75 | [](py::dict dict) -> PyClass { return FbankOptionsFromDict(dict); })); 76 | } 77 | 78 | static void PybindFbank(py::module &m) { 79 | using PyClass = Fbank; 80 | py::class_(m, "Fbank") 81 | .def(py::init(), py::arg("opts")) 82 | .def("dim", &PyClass::Dim) 83 | .def_property_readonly("options", &PyClass::GetOptions) 84 | .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"), 85 | py::arg("vtln_warp"), py::call_guard()) 86 | .def(py::pickle( 87 | [](const PyClass &self) -> py::dict { 88 | return AsDict(self.GetOptions()); 89 | }, 90 | [](py::dict dict) -> std::unique_ptr { 91 | return std::make_unique(FbankOptionsFromDict(dict)); 92 | })); 93 | } 94 | 95 | void PybindFeatureFbank(py::module &m) { 96 | PybindFbankOptions(m); 97 | PybindFbank(m); 98 | } 99 | 100 | } // namespace kaldifeat 101 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-fbank.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-fbank.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_FBANK_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_FEATURE_FBANK_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindFeatureFbank(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_FEATURE_FBANK_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-mfcc.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-mfcc.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/feature-mfcc.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/feature-mfcc.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | void PybindMfccOptions(py::module &m) { 16 | using PyClass = MfccOptions; 17 | py::class_(m, "MfccOptions") 18 | .def(py::init<>()) 19 | .def(py::init([](const MelBanksOptions &mel_opts, 20 | const FrameExtractionOptions &frame_opts = 21 | FrameExtractionOptions(), 22 | int32_t num_ceps = 13, bool use_energy = true, 23 | float energy_floor = 0.0, bool raw_energy = true, 24 | float cepstral_lifter = 22.0, bool htk_compat = false, 25 | py::object device = 26 | py::str("cpu")) -> std::unique_ptr { 27 | auto opts = std::make_unique(); 28 | opts->frame_opts = frame_opts; 29 | opts->mel_opts = mel_opts; 30 | opts->num_ceps = num_ceps; 31 | opts->use_energy = use_energy; 32 | opts->energy_floor = energy_floor; 33 | opts->raw_energy = raw_energy; 34 | opts->cepstral_lifter = cepstral_lifter; 35 | opts->htk_compat = htk_compat; 36 | 37 | std::string s = static_cast(device); 38 | opts->device = torch::Device(s); 39 | 40 | return opts; 41 | }), 42 | py::arg("mel_opts"), 43 | py::arg("frame_opts") = FrameExtractionOptions(), 44 | py::arg("num_ceps") = 13, py::arg("use_energy") = true, 45 | py::arg("energy_floor") = 0.0f, py::arg("raw_energy") = true, 46 | py::arg("cepstral_lifter") = 22.0, py::arg("htk_compat") = false, 47 | py::arg("device") = py::str("cpu")) 48 | .def_readwrite("frame_opts", &PyClass::frame_opts) 49 | .def_readwrite("mel_opts", &PyClass::mel_opts) 50 | .def_readwrite("num_ceps", &PyClass::num_ceps) 51 | .def_readwrite("use_energy", &PyClass::use_energy) 52 | .def_readwrite("energy_floor", &PyClass::energy_floor) 53 | .def_readwrite("raw_energy", &PyClass::raw_energy) 54 | .def_readwrite("cepstral_lifter", &PyClass::cepstral_lifter) 55 | .def_readwrite("htk_compat", &PyClass::htk_compat) 56 | .def_property( 57 | "device", 58 | [](const PyClass &self) -> py::object { 59 | py::object ans = py::module_::import("torch").attr("device"); 60 | return ans(self.device.str()); 61 | }, 62 | [](PyClass &self, py::object obj) -> void { 63 | std::string s = static_cast(obj); 64 | self.device = torch::Device(s); 65 | }) 66 | .def("__str__", 67 | [](const PyClass &self) -> std::string { return self.ToString(); }) 68 | .def("as_dict", 69 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 70 | .def_static( 71 | "from_dict", 72 | [](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); }) 73 | .def(py::pickle( 74 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 75 | [](py::dict dict) -> PyClass { return MfccOptionsFromDict(dict); })); 76 | } 77 | 78 | static void PybindMfcc(py::module &m) { 79 | using PyClass = Mfcc; 80 | py::class_(m, "Mfcc") 81 | .def(py::init(), py::arg("opts")) 82 | .def("dim", &PyClass::Dim) 83 | .def_property_readonly("options", &PyClass::GetOptions) 84 | .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"), 85 | py::arg("vtln_warp"), py::call_guard()) 86 | .def(py::pickle( 87 | [](const PyClass &self) -> py::dict { 88 | return AsDict(self.GetOptions()); 89 | }, 90 | [](py::dict dict) -> std::unique_ptr { 91 | return std::make_unique(MfccOptionsFromDict(dict)); 92 | })); 93 | } 94 | 95 | void PybindFeatureMfcc(py::module &m) { 96 | PybindMfccOptions(m); 97 | PybindMfcc(m); 98 | } 99 | 100 | } // namespace kaldifeat 101 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-mfcc.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-mfcc.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_MFCC_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_FEATURE_MFCC_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindFeatureMfcc(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_FEATURE_MFCC_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-plp.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-plp.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindFeaturePlp(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_FEATURE_PLP_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-spectrogram.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-spectrogram.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/feature-spectrogram.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/feature-spectrogram.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | static void PybindSpectrogramOptions(py::module &m) { 16 | using PyClass = SpectrogramOptions; 17 | py::class_(m, "SpectrogramOptions") 18 | .def(py::init([](const FrameExtractionOptions &frame_opts = 19 | FrameExtractionOptions(), 20 | float energy_floor = 0.0, bool raw_energy = true, 21 | bool return_raw_fft = false, 22 | py::object device = py::str( 23 | "cpu")) -> std::unique_ptr { 24 | auto opts = std::make_unique(); 25 | opts->frame_opts = frame_opts; 26 | opts->energy_floor = energy_floor; 27 | opts->raw_energy = raw_energy; 28 | opts->return_raw_fft = return_raw_fft; 29 | 30 | std::string s = static_cast(device); 31 | opts->device = torch::Device(s); 32 | 33 | return opts; 34 | }), 35 | py::arg("frame_opts") = FrameExtractionOptions(), 36 | py::arg("energy_floor") = 0.0, py::arg("raw_energy") = true, 37 | py::arg("return_raw_fft") = false, 38 | py::arg("device") = py::str("cpu")) 39 | .def_readwrite("frame_opts", &PyClass::frame_opts) 40 | .def_readwrite("energy_floor", &PyClass::energy_floor) 41 | .def_readwrite("raw_energy", &PyClass::raw_energy) 42 | // .def_readwrite("return_raw_fft", &PyClass::return_raw_fft) // not 43 | // implemented yet 44 | .def_property( 45 | "device", 46 | [](const PyClass &self) -> py::object { 47 | py::object ans = py::module_::import("torch").attr("device"); 48 | return ans(self.device.str()); 49 | }, 50 | [](PyClass &self, py::object obj) -> void { 51 | std::string s = static_cast(obj); 52 | self.device = torch::Device(s); 53 | }) 54 | .def("__str__", 55 | [](const PyClass &self) -> std::string { return self.ToString(); }) 56 | .def("as_dict", 57 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 58 | .def_static("from_dict", 59 | [](py::dict dict) -> PyClass { 60 | return SpectrogramOptionsFromDict(dict); 61 | }) 62 | .def(py::pickle( 63 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 64 | [](py::dict dict) -> PyClass { 65 | return SpectrogramOptionsFromDict(dict); 66 | })); 67 | } 68 | 69 | static void PybindSpectrogram(py::module &m) { 70 | using PyClass = Spectrogram; 71 | py::class_(m, "Spectrogram") 72 | .def(py::init(), py::arg("opts")) 73 | .def("dim", &PyClass::Dim) 74 | .def_property_readonly("options", &PyClass::GetOptions) 75 | .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"), 76 | py::arg("vtln_warp"), py::call_guard()) 77 | .def(py::pickle( 78 | [](const PyClass &self) -> py::dict { 79 | return AsDict(self.GetOptions()); 80 | }, 81 | [](py::dict dict) -> std::unique_ptr { 82 | return std::make_unique(SpectrogramOptionsFromDict(dict)); 83 | })); 84 | } 85 | 86 | void PybindFeatureSpectrogram(py::module &m) { 87 | PybindSpectrogramOptions(m); 88 | PybindSpectrogram(m); 89 | } 90 | 91 | } // namespace kaldifeat 92 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-spectrogram.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-spectrogram.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindFeatureSpectrogram(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_FEATURE_SPECTROGRAM_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-window.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-window.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/feature-window.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/feature-window.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | static void PybindFrameExtractionOptions(py::module &m) { 16 | using PyClass = FrameExtractionOptions; 17 | py::class_(m, "FrameExtractionOptions") 18 | .def( 19 | py::init([](float samp_freq = 16000, float frame_shift_ms = 10.0f, 20 | float frame_length_ms = 25.0f, float dither = 1.0f, 21 | float preemph_coeff = 0.97f, bool remove_dc_offset = true, 22 | const std::string &window_type = "povey", 23 | bool round_to_power_of_two = true, 24 | float blackman_coeff = 0.42f, bool snip_edges = true, 25 | int32_t max_feature_vectors = 26 | -1) -> std::unique_ptr { 27 | auto opts = std::make_unique(); 28 | 29 | opts->samp_freq = samp_freq; 30 | opts->frame_shift_ms = frame_shift_ms; 31 | opts->frame_length_ms = frame_length_ms; 32 | opts->dither = dither; 33 | opts->preemph_coeff = preemph_coeff; 34 | opts->remove_dc_offset = remove_dc_offset; 35 | opts->window_type = window_type; 36 | opts->round_to_power_of_two = round_to_power_of_two; 37 | opts->blackman_coeff = blackman_coeff; 38 | opts->snip_edges = snip_edges; 39 | opts->max_feature_vectors = max_feature_vectors; 40 | 41 | return opts; 42 | }), 43 | py::arg("samp_freq") = 16000, py::arg("frame_shift_ms") = 10.0f, 44 | py::arg("frame_length_ms") = 25.0f, py::arg("dither") = 1.0f, 45 | py::arg("preemph_coeff") = 0.97f, py::arg("remove_dc_offset") = true, 46 | py::arg("window_type") = "povey", 47 | py::arg("round_to_power_of_two") = true, 48 | py::arg("blackman_coeff") = 0.42f, py::arg("snip_edges") = true, 49 | py::arg("max_feature_vectors") = -1) 50 | .def_readwrite("samp_freq", &PyClass::samp_freq) 51 | .def_readwrite("frame_shift_ms", &PyClass::frame_shift_ms) 52 | .def_readwrite("frame_length_ms", &PyClass::frame_length_ms) 53 | .def_readwrite("dither", &PyClass::dither) 54 | .def_readwrite("preemph_coeff", &PyClass::preemph_coeff) 55 | .def_readwrite("remove_dc_offset", &PyClass::remove_dc_offset) 56 | .def_readwrite("window_type", &PyClass::window_type) 57 | .def_readwrite("round_to_power_of_two", &PyClass::round_to_power_of_two) 58 | .def_readwrite("blackman_coeff", &PyClass::blackman_coeff) 59 | .def_readwrite("snip_edges", &PyClass::snip_edges) 60 | .def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors) 61 | .def("as_dict", 62 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 63 | .def_static("from_dict", 64 | [](py::dict dict) -> PyClass { 65 | return FrameExtractionOptionsFromDict(dict); 66 | }) 67 | #if 0 68 | .def_readwrite("allow_downsample", 69 | &PyClass::allow_downsample) 70 | .def_readwrite("allow_upsample", &PyClass::allow_upsample) 71 | #endif 72 | .def("__str__", 73 | [](const PyClass &self) -> std::string { return self.ToString(); }) 74 | .def(py::pickle( 75 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 76 | [](py::dict dict) -> PyClass { 77 | return FrameExtractionOptionsFromDict(dict); 78 | })); 79 | 80 | m.def("num_frames", &NumFrames, py::arg("num_samples"), py::arg("opts"), 81 | py::arg("flush") = true); 82 | 83 | m.def("get_strided", &GetStrided, py::arg("wave"), py::arg("opts")); 84 | } 85 | 86 | void PybindFeatureWindow(py::module &m) { 87 | PybindFrameExtractionOptions(m); 88 | 89 | m.def("num_frames", &NumFrames, py::arg("num_samples"), py::arg("opts"), 90 | py::arg("flush") = true); 91 | 92 | m.def("get_strided", &GetStrided, py::arg("wave"), py::arg("opts")); 93 | } 94 | 95 | } // namespace kaldifeat 96 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/feature-window.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/feature-window.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_FEATURE_WINDOW_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_FEATURE_WINDOW_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindFeatureWindow(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_FEATURE_WINDOW_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/kaldifeat.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/kaldifeat.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/kaldifeat.h" 6 | 7 | #include "kaldifeat/csrc/feature-fbank.h" 8 | #include "kaldifeat/python/csrc/feature-fbank.h" 9 | #include "kaldifeat/python/csrc/feature-mfcc.h" 10 | #include "kaldifeat/python/csrc/feature-plp.h" 11 | #include "kaldifeat/python/csrc/feature-spectrogram.h" 12 | #include "kaldifeat/python/csrc/feature-window.h" 13 | #include "kaldifeat/python/csrc/mel-computations.h" 14 | #include "kaldifeat/python/csrc/online-feature.h" 15 | #include "kaldifeat/python/csrc/whisper-fbank.h" 16 | #include "torch/torch.h" 17 | 18 | namespace kaldifeat { 19 | 20 | PYBIND11_MODULE(_kaldifeat, m) { 21 | m.doc() = "Python wrapper for kaldifeat"; 22 | 23 | PybindFeatureWindow(m); 24 | PybindMelComputations(m); 25 | PybindFeatureFbank(m); 26 | PybindWhisperFbank(&m); 27 | PybindFeatureMfcc(m); 28 | PybindFeaturePlp(m); 29 | PybindFeatureSpectrogram(m); 30 | PybindOnlineFeature(m); 31 | } 32 | 33 | } // namespace kaldifeat 34 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/kaldifeat.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/kaldifeat.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_ 7 | 8 | #include "pybind11/pybind11.h" 9 | #include "torch/torch.h" 10 | namespace py = pybind11; 11 | 12 | #endif // KALDIFEAT_PYTHON_CSRC_KALDIFEAT_H_ 13 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/mel-computations.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/mel-computations.cc 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/mel-computations.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/mel-computations.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | static void PybindMelBanksOptions(py::module &m) { 16 | using PyClass = MelBanksOptions; 17 | py::class_(m, "MelBanksOptions") 18 | .def(py::init( 19 | [](int32_t num_bins = 25, float low_freq = 20, 20 | float high_freq = 0, float vtln_low = 100, 21 | float vtln_high = -500, 22 | bool debug_mel = false) -> std::unique_ptr { 23 | auto opts = std::make_unique(); 24 | 25 | opts->num_bins = num_bins; 26 | opts->low_freq = low_freq; 27 | opts->high_freq = high_freq; 28 | opts->vtln_low = vtln_low; 29 | opts->vtln_high = vtln_high; 30 | 31 | return opts; 32 | }), 33 | py::arg("num_bins") = 25, py::arg("low_freq") = 20, 34 | py::arg("high_freq") = 0, py::arg("vtln_low") = 100, 35 | py::arg("vtln_high") = -500, py::arg("debug_mel") = false) 36 | .def_readwrite("num_bins", &PyClass::num_bins) 37 | .def_readwrite("low_freq", &PyClass::low_freq) 38 | .def_readwrite("high_freq", &PyClass::high_freq) 39 | .def_readwrite("vtln_low", &PyClass::vtln_low) 40 | .def_readwrite("vtln_high", &PyClass::vtln_high) 41 | .def_readwrite("debug_mel", &PyClass::debug_mel) 42 | .def_readwrite("htk_mode", &PyClass::htk_mode) 43 | .def("__str__", 44 | [](const PyClass &self) -> std::string { return self.ToString(); }) 45 | .def("as_dict", 46 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 47 | .def_static("from_dict", 48 | [](py::dict dict) -> PyClass { 49 | return MelBanksOptionsFromDict(dict); 50 | }) 51 | .def(py::pickle( 52 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 53 | [](py::dict dict) -> PyClass { 54 | return MelBanksOptionsFromDict(dict); 55 | })); 56 | } 57 | 58 | void PybindMelComputations(py::module &m) { PybindMelBanksOptions(m); } 59 | 60 | } // namespace kaldifeat 61 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/mel-computations.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/mel-computations.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_MEL_COMPUTATIONS_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_MEL_COMPUTATIONS_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindMelComputations(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_MEL_COMPUTATIONS_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/online-feature.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/online-feature.cc 2 | // 3 | // Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/online-feature.h" 6 | 7 | #include 8 | 9 | #include "kaldifeat/csrc/online-feature.h" 10 | namespace kaldifeat { 11 | 12 | template 13 | void PybindOnlineFeatureTpl(py::module &m, const std::string &class_name, 14 | const std::string &class_help_doc = "") { 15 | using PyClass = OnlineGenericBaseFeature; 16 | using Options = typename C::Options; 17 | py::class_(m, class_name.c_str(), class_help_doc.c_str()) 18 | .def(py::init(), py::arg("opts")) 19 | .def_property_readonly("dim", &PyClass::Dim) 20 | .def_property_readonly("frame_shift_in_seconds", 21 | &PyClass::FrameShiftInSeconds) 22 | .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady) 23 | .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame")) 24 | .def("get_frame", &PyClass::GetFrame, py::arg("frame")) 25 | .def("get_frames", &PyClass::GetFrames, py::arg("frames"), 26 | py::call_guard()) 27 | .def("accept_waveform", &PyClass::AcceptWaveform, 28 | py::arg("sampling_rate"), py::arg("waveform"), 29 | py::call_guard()) 30 | .def("input_finished", &PyClass::InputFinished); 31 | } 32 | 33 | void PybindOnlineFeature(py::module &m) { 34 | PybindOnlineFeatureTpl(m, "OnlineMfcc"); 35 | PybindOnlineFeatureTpl(m, "OnlineFbank"); 36 | PybindOnlineFeatureTpl(m, "OnlinePlp"); 37 | } 38 | 39 | } // namespace kaldifeat 40 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/online-feature.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/online-feature.h 2 | // 3 | // Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindOnlineFeature(py::module &m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_ONLINE_FEATURE_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/utils.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/utils.h 2 | // 3 | // Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_UTILS_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_UTILS_H_ 7 | 8 | #include "kaldifeat/csrc/feature-fbank.h" 9 | #include "kaldifeat/csrc/feature-mfcc.h" 10 | #include "kaldifeat/csrc/feature-plp.h" 11 | #include "kaldifeat/csrc/feature-spectrogram.h" 12 | #include "kaldifeat/csrc/feature-window.h" 13 | #include "kaldifeat/csrc/mel-computations.h" 14 | #include "kaldifeat/csrc/whisper-fbank.h" 15 | #include "kaldifeat/python/csrc/kaldifeat.h" 16 | 17 | /* 18 | * This file contains code about `from_dict` and 19 | * `as_dict` for various options in kaldifeat. 20 | * 21 | * Regarding `from_dict`, users don't need to provide 22 | * all the fields in the options. If some fields 23 | * are not provided, it just uses the default one. 24 | * 25 | * If the provided dict in `from_dict` is empty, 26 | * all fields use their default values. 27 | */ 28 | 29 | namespace kaldifeat { 30 | 31 | FrameExtractionOptions FrameExtractionOptionsFromDict(py::dict dict); 32 | py::dict AsDict(const FrameExtractionOptions &opts); 33 | 34 | MelBanksOptions MelBanksOptionsFromDict(py::dict dict); 35 | py::dict AsDict(const MelBanksOptions &opts); 36 | 37 | FbankOptions FbankOptionsFromDict(py::dict dict); 38 | py::dict AsDict(const FbankOptions &opts); 39 | 40 | WhisperFbankOptions WhisperFbankOptionsFromDict(py::dict dict); 41 | py::dict AsDict(const WhisperFbankOptions &opts); 42 | 43 | MfccOptions MfccOptionsFromDict(py::dict dict); 44 | py::dict AsDict(const MfccOptions &opts); 45 | 46 | SpectrogramOptions SpectrogramOptionsFromDict(py::dict dict); 47 | py::dict AsDict(const SpectrogramOptions &opts); 48 | 49 | PlpOptions PlpOptionsFromDict(py::dict dict); 50 | py::dict AsDict(const PlpOptions &opts); 51 | 52 | } // namespace kaldifeat 53 | 54 | #endif // KALDIFEAT_PYTHON_CSRC_UTILS_H_ 55 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/whisper-fbank.cc: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/whisper-fbank.cc 2 | // 3 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #include "kaldifeat/python/csrc/whisper-fbank.h" 6 | 7 | #include 8 | #include 9 | 10 | #include "kaldifeat/csrc/whisper-fbank.h" 11 | #include "kaldifeat/python/csrc/utils.h" 12 | 13 | namespace kaldifeat { 14 | 15 | static void PybindWhisperFbankOptions(py::module *m) { 16 | using PyClass = WhisperFbankOptions; 17 | py::class_(*m, "WhisperFbankOptions") 18 | .def(py::init<>()) 19 | .def(py::init([](const FrameExtractionOptions &frame_opts = 20 | FrameExtractionOptions(), 21 | int32_t num_mels = 80, 22 | py::object device = py::str( 23 | "cpu")) -> std::unique_ptr { 24 | auto opts = std::make_unique(); 25 | opts->frame_opts = frame_opts; 26 | opts->num_mels = num_mels; 27 | 28 | std::string s = static_cast(device); 29 | opts->device = torch::Device(s); 30 | 31 | return opts; 32 | }), 33 | py::arg("frame_opts") = FrameExtractionOptions(), 34 | py::arg("num_mels") = 80, py::arg("device") = py::str("cpu")) 35 | .def_readwrite("frame_opts", &PyClass::frame_opts) 36 | .def_readwrite("num_mels", &PyClass::num_mels) 37 | .def_property( 38 | "device", 39 | [](const PyClass &self) -> py::object { 40 | py::object ans = py::module_::import("torch").attr("device"); 41 | return ans(self.device.str()); 42 | }, 43 | [](PyClass &self, py::object obj) -> void { 44 | std::string s = static_cast(obj); 45 | self.device = torch::Device(s); 46 | }) 47 | .def("__str__", 48 | [](const PyClass &self) -> std::string { return self.ToString(); }) 49 | .def("as_dict", 50 | [](const PyClass &self) -> py::dict { return AsDict(self); }) 51 | .def_static("from_dict", 52 | [](py::dict dict) -> PyClass { 53 | return WhisperFbankOptionsFromDict(dict); 54 | }) 55 | .def(py::pickle( 56 | [](const PyClass &self) -> py::dict { return AsDict(self); }, 57 | [](py::dict dict) -> PyClass { 58 | return WhisperFbankOptionsFromDict(dict); 59 | })); 60 | } 61 | 62 | static void PybindWhisperFbankImpl(py::module *m) { 63 | using PyClass = WhisperFbank; 64 | py::class_(*m, "WhisperFbank") 65 | .def(py::init(), py::arg("opts")) 66 | .def("dim", &PyClass::Dim) 67 | .def_property_readonly("options", &PyClass::GetOptions) 68 | .def("compute_features", &PyClass::ComputeFeatures, py::arg("wave"), 69 | py::arg("vtln_warp"), py::call_guard()) 70 | .def(py::pickle( 71 | [](const PyClass &self) -> py::dict { 72 | return AsDict(self.GetOptions()); 73 | }, 74 | [](py::dict dict) -> std::unique_ptr { 75 | return std::make_unique(WhisperFbankOptionsFromDict(dict)); 76 | })); 77 | } 78 | 79 | void PybindWhisperFbank(py::module *m) { 80 | PybindWhisperFbankOptions(m); 81 | PybindWhisperFbankImpl(m); 82 | } 83 | 84 | } // namespace kaldifeat 85 | -------------------------------------------------------------------------------- /kaldifeat/python/csrc/whisper-fbank.h: -------------------------------------------------------------------------------- 1 | // kaldifeat/python/csrc/whisper-fbank.h 2 | // 3 | // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | #ifndef KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_ 6 | #define KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_ 7 | 8 | #include "kaldifeat/python/csrc/kaldifeat.h" 9 | 10 | namespace kaldifeat { 11 | 12 | void PybindWhisperFbank(py::module *m); 13 | 14 | } // namespace kaldifeat 15 | 16 | #endif // KALDIFEAT_PYTHON_CSRC_WHISPER_FBANK_H_ 17 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .torch_version import kaldifeat_torch_version 4 | 5 | if torch.__version__.split("+")[0] != kaldifeat_torch_version.split("+")[0]: 6 | raise ImportError( 7 | f"kaldifeat was built using PyTorch {kaldifeat_torch_version}\n" 8 | f"But you are using PyTorch {torch.__version__} to run it" 9 | ) 10 | 11 | from pathlib import Path as _Path 12 | 13 | from _kaldifeat import ( 14 | FbankOptions, 15 | FrameExtractionOptions, 16 | MelBanksOptions, 17 | MfccOptions, 18 | PlpOptions, 19 | SpectrogramOptions, 20 | WhisperFbankOptions, 21 | num_frames, 22 | ) 23 | 24 | from .fbank import Fbank, OnlineFbank 25 | from .mfcc import Mfcc, OnlineMfcc 26 | from .offline_feature import OfflineFeature 27 | from .online_feature import OnlineFeature 28 | from .plp import OnlinePlp, Plp 29 | from .spectrogram import Spectrogram 30 | from .whisper_fbank import WhisperFbank 31 | 32 | cmake_prefix_path = _Path(__file__).parent / "share" / "cmake" 33 | del _Path 34 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/fbank.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | 4 | import _kaldifeat 5 | 6 | from .offline_feature import OfflineFeature 7 | from .online_feature import OnlineFeature 8 | 9 | 10 | class Fbank(OfflineFeature): 11 | def __init__(self, opts: _kaldifeat.FbankOptions): 12 | super().__init__(opts) 13 | self.computer = _kaldifeat.Fbank(opts) 14 | 15 | 16 | class OnlineFbank(OnlineFeature): 17 | def __init__(self, opts: _kaldifeat.FbankOptions): 18 | super().__init__(opts) 19 | self.computer = _kaldifeat.OnlineFbank(opts) 20 | 21 | def __setstate__(self, state): 22 | self.opts = _kaldifeat.FbankOptions.from_dict(state) 23 | self.computer = _kaldifeat.OnlineFbank(self.opts) 24 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/mfcc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | 4 | import _kaldifeat 5 | 6 | from .offline_feature import OfflineFeature 7 | from .online_feature import OnlineFeature 8 | 9 | 10 | class Mfcc(OfflineFeature): 11 | def __init__(self, opts: _kaldifeat.MfccOptions): 12 | super().__init__(opts) 13 | self.computer = _kaldifeat.Mfcc(opts) 14 | 15 | 16 | class OnlineMfcc(OnlineFeature): 17 | def __init__(self, opts: _kaldifeat.MfccOptions): 18 | super().__init__(opts) 19 | self.computer = _kaldifeat.OnlineMfcc(opts) 20 | 21 | def __setstate__(self, state): 22 | self.opts = _kaldifeat.MfccOptions.from_dict(state) 23 | self.computer = _kaldifeat.OnlineMfcc(self.opts) 24 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/online_feature.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | from typing import List 4 | 5 | import torch 6 | 7 | 8 | class OnlineFeature(object): 9 | """Offline feature is a base class of other feature computers, 10 | e.g., Fbank, Mfcc. 11 | 12 | This class has two fields: 13 | 14 | (1) opts. It contains the options for the feature computer. 15 | (2) computer. The actual feature computer. It should be 16 | instantiated by subclasses. 17 | 18 | Caution: 19 | It supports only CPU at present. 20 | """ 21 | 22 | def __init__(self, opts): 23 | assert opts.device.type == "cpu" 24 | 25 | self.opts = opts 26 | 27 | # self.computer is expected to be set by subclasses 28 | self.computer = None 29 | 30 | @property 31 | def num_frames_ready(self) -> int: 32 | """Return the number of ready frames. 33 | 34 | It can be updated by :method:`accept_waveform`. 35 | 36 | Note: 37 | If you set ``opts.frame_opts.max_feature_vectors``, then 38 | the valid frame indexes are in the range. 39 | ``[num_frames_ready - max_feature_vectors, num_frames_ready)`` 40 | 41 | If you leave ``opts.frame_opts.max_feature_vectors`` to its default 42 | value, then the range is ``[0, num_frames_ready)`` 43 | """ 44 | return self.computer.num_frames_ready 45 | 46 | def is_last_frame(self, frame: int) -> bool: 47 | """Return True if the given frame is the last frame.""" 48 | return self.computer.is_last_frame(frame) 49 | 50 | def get_frame(self, frame: int) -> torch.Tensor: 51 | """Get the frame by its index. 52 | Args: 53 | frame: 54 | The frame index. If ``opts.frame_opts.max_feature_vectors`` is 55 | -1, then its valid values are in the range 56 | ``[0, num_frames_ready)``. Otherwise, the range is 57 | ``[num_frames_ready - max_feature_vectors, num_frames_ready)``. 58 | Returns: 59 | Return a 2-D tensor with shape ``(1, feature_dim)`` 60 | """ 61 | return self.computer.get_frame(frame) 62 | 63 | def get_frames(self, frames: List[int]) -> List[torch.Tensor]: 64 | """Get frames at the given frame indexes. 65 | Args: 66 | frames: 67 | Frames whose indexes are in this list are returned. 68 | Returns: 69 | Return a list of feature frames at the given indexes. 70 | """ 71 | return self.computer.get_frames(frames) 72 | 73 | def accept_waveform( 74 | self, sampling_rate: float, waveform: torch.Tensor 75 | ) -> None: 76 | """Send audio samples to the extractor. 77 | Args: 78 | sampling_rate: 79 | The sampling rate of the given audio samples. It has to be equal 80 | to ``opts.frame_opts.samp_freq``. 81 | waveform: 82 | A 1-D tensor of shape (num_samples,). Its dtype is torch.float32 83 | and has to be on CPU. 84 | """ 85 | self.computer.accept_waveform(sampling_rate, waveform) 86 | 87 | def input_finished(self) -> None: 88 | """Tell the extractor that no more audio samples will be available. 89 | After calling this function, you cannot invoke ``accept_waveform`` 90 | again. 91 | """ 92 | self.computer.input_finished() 93 | 94 | @property 95 | def dim(self) -> int: 96 | """Return the feature dimension of this extractor""" 97 | return self.computer.dim 98 | 99 | @property 100 | def frame_shift_in_seconds(self) -> int: 101 | """Return frame shift in seconds of this extractor""" 102 | return self.computer.frame_shift_in_seconds 103 | 104 | def __getstate__(self): 105 | return self.opts.as_dict() 106 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/plp.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | 4 | import _kaldifeat 5 | 6 | from .offline_feature import OfflineFeature 7 | from .online_feature import OnlineFeature 8 | 9 | 10 | class Plp(OfflineFeature): 11 | def __init__(self, opts: _kaldifeat.PlpOptions): 12 | super().__init__(opts) 13 | self.computer = _kaldifeat.Plp(opts) 14 | 15 | 16 | class OnlinePlp(OnlineFeature): 17 | def __init__(self, opts: _kaldifeat.PlpOptions): 18 | super().__init__(opts) 19 | self.computer = _kaldifeat.OnlinePlp(opts) 20 | 21 | def __setstate__(self, state): 22 | self.opts = _kaldifeat.PlpOptions.from_dict(state) 23 | self.computer = _kaldifeat.OnlinePlp(self.opts) 24 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/spectrogram.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | 4 | import _kaldifeat 5 | 6 | from .offline_feature import OfflineFeature 7 | 8 | 9 | class Spectrogram(OfflineFeature): 10 | def __init__(self, opts: _kaldifeat.SpectrogramOptions): 11 | super().__init__(opts) 12 | self.computer = _kaldifeat.Spectrogram(opts) 13 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/torch_version.py.in: -------------------------------------------------------------------------------- 1 | # Auto generated by the toplevel CMakeLists.txt. 2 | # 3 | # DO NOT EDIT. 4 | 5 | # The torch version used to build kaldifeat. We will check it against the 6 | # torch version that is used to run kaldifeat. If they are not the same, 7 | # `import kaldifeat` will throw. 8 | # 9 | # Some example values are: 10 | # - 1.10.0+cu102 11 | # - 1.5.0+cpu 12 | kaldifeat_torch_version = "@TORCH_VERSION@" 13 | -------------------------------------------------------------------------------- /kaldifeat/python/kaldifeat/whisper_fbank.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | 4 | import _kaldifeat 5 | 6 | from .offline_feature import OfflineFeature 7 | 8 | 9 | class WhisperFbank(OfflineFeature): 10 | def __init__(self, opts: _kaldifeat.WhisperFbankOptions): 11 | super().__init__(opts) 12 | self.computer = _kaldifeat.WhisperFbank(opts) 13 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(kaldifeat_add_py_test source) 2 | get_filename_component(name ${source} NAME_WE) 3 | set(name "${name}_py") 4 | 5 | add_test(NAME ${name} 6 | COMMAND 7 | "${PYTHON_EXECUTABLE}" 8 | "${CMAKE_CURRENT_SOURCE_DIR}/${source}" 9 | ) 10 | 11 | get_filename_component(kaldifeat_path ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) 12 | 13 | set_property(TEST ${name} 14 | PROPERTY ENVIRONMENT "PYTHONPATH=${kaldifeat_path}:$:$ENV{PYTHONPATH}" 15 | ) 16 | endfunction() 17 | 18 | # please sort the files in alphabetic order 19 | set(py_test_files 20 | test_fbank.py 21 | test_fbank_options.py 22 | test_frame_extraction_options.py 23 | test_mel_bank_options.py 24 | test_mfcc.py 25 | test_mfcc_options.py 26 | test_plp.py 27 | test_plp_options.py 28 | test_spectrogram.py 29 | test_spectrogram_options.py 30 | ) 31 | 32 | foreach(source IN LISTS py_test_files) 33 | kaldifeat_add_py_test(${source}) 34 | endforeach() 35 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/Makefile: -------------------------------------------------------------------------------- 1 | 2 | .PHONY: test 3 | test: 4 | python3 ./test_fbank.py 5 | python3 ./test_fbank_options.py 6 | python3 ./test_frame_extraction_options.py 7 | python3 ./test_mel_bank_options.py 8 | python3 ./test_mfcc.py 9 | python3 ./test_mfcc_options.py 10 | python3 ./test_plp.py 11 | python3 ./test_plp_options.py 12 | python3 ./test_spectrogram.py 13 | python3 ./test_spectrogram_options.py 14 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csukuangfj/kaldifeat/0ecdee6e88ed82d9504f9332fde92e142531ee6e/kaldifeat/python/tests/__init__.py -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_data/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | KALDI_ROOT=/root/fangjun/open-source/kaldi 6 | export PATH=${KALDI_ROOT}/src/featbin:$PATH 7 | 8 | if [ ! -f test.wav ]; then 9 | # generate a wav of 1.2 seconds, containing a sine-wave 10 | # swept from 300 Hz to 3300 Hz 11 | sox -n -r 16000 -b 16 test.wav synth 1.2 sine 300-3300 12 | fi 13 | 14 | if [ ! -f test2.wav ]; then 15 | # generate a wav of 0.5 seconds, containing a sine-wave 16 | # swept from 300 Hz to 3300 Hz 17 | sox -n -r 16000 -b 16 test2.wav synth 0.5 sine 300-3300 18 | fi 19 | 20 | echo "1 test.wav" > test.scp 21 | 22 | # We disable dither for testing 23 | 24 | if [ ! -f test.txt ]; then 25 | compute-fbank-feats --dither=0 scp:test.scp ark,t:test.txt 26 | fi 27 | 28 | if [ ! -f test-mfcc.txt ]; then 29 | compute-mfcc-feats --dither=0 scp:test.scp ark,t:test-mfcc.txt 30 | fi 31 | 32 | if [ ! -f test-spectrogram.txt ]; then 33 | compute-spectrogram-feats --dither=0 scp:test.scp ark,t:test-spectrogram.txt 34 | fi 35 | 36 | if [ ! -f test-plp.txt ]; then 37 | compute-plp-feats --dither=0 scp:test.scp ark,t:test-plp.txt 38 | fi 39 | 40 | if [ ! -f test-plp-no-snip-edges.txt ]; then 41 | compute-plp-feats --dither=0 --snip-edges=0 scp:test.scp ark,t:test-plp-no-snip-edges.txt 42 | fi 43 | 44 | if [ ! -f test-plp-htk-10-cpes.txt ]; then 45 | compute-plp-feats --dither=0 --htk-compat=1 --num-ceps=10 scp:test.scp ark,t:test-plp-htk-10-ceps.txt 46 | fi 47 | 48 | if [ ! -f test-spectrogram-no-snip-edges.txt ]; then 49 | compute-spectrogram-feats --dither=0 --snip-edges=0 scp:test.scp ark,t:test-spectrogram-no-snip-edges.txt 50 | fi 51 | 52 | if [ ! -f test-mfcc-no-snip-edges.txt ]; then 53 | compute-mfcc-feats --dither=0 --snip-edges=0 scp:test.scp ark,t:test-mfcc-no-snip-edges.txt 54 | fi 55 | 56 | if [ ! -f test-htk.txt ]; then 57 | compute-fbank-feats --dither=0 --use-energy=1 --htk-compat=1 scp:test.scp ark,t:test-htk.txt 58 | fi 59 | 60 | if [ ! -f test-with-energy.txt ]; then 61 | compute-fbank-feats --dither=0 --use-energy=1 scp:test.scp ark,t:test-with-energy.txt 62 | fi 63 | 64 | if [ ! -f test-40.txt ]; then 65 | compute-fbank-feats --dither=0 --num-mel-bins=40 scp:test.scp ark,t:test-40.txt 66 | fi 67 | 68 | if [ ! -f test-40-no-snip-edges.txt ]; then 69 | compute-fbank-feats --dither=0 --num-mel-bins=40 --snip-edges=0 \ 70 | scp:test.scp ark,t:test-40-no-snip-edges.txt 71 | fi 72 | 73 | if [ ! -f test-1hour.wav ]; then 74 | # generate a wav of one hour, containing a sine-wave 75 | # swept from 300 Hz to 3300 Hz 76 | sox -n -r 16000 -b 16 test-1hour.wav synth 3600 sine 300-3300 77 | fi 78 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_data/test.scp: -------------------------------------------------------------------------------- 1 | 1 test.wav 2 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_data/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csukuangfj/kaldifeat/0ecdee6e88ed82d9504f9332fde92e142531ee6e/kaldifeat/python/tests/test_data/test.wav -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_data/test2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csukuangfj/kaldifeat/0ecdee6e88ed82d9504f9332fde92e142531ee6e/kaldifeat/python/tests/test_data/test2.wav -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_frame_extraction_options.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import pickle 6 | 7 | import kaldifeat 8 | 9 | 10 | def test_default(): 11 | opts = kaldifeat.FrameExtractionOptions() 12 | print(opts) 13 | assert opts.samp_freq == 16000 14 | assert opts.frame_shift_ms == 10.0 15 | assert opts.frame_length_ms == 25.0 16 | assert opts.dither == 1.0 17 | assert abs(opts.preemph_coeff - 0.97) < 1e-6 18 | assert opts.remove_dc_offset is True 19 | assert opts.window_type == "povey" 20 | assert opts.round_to_power_of_two is True 21 | assert abs(opts.blackman_coeff - 0.42) < 1e-6 22 | assert opts.snip_edges is True 23 | 24 | 25 | def test_set_get(): 26 | opts = kaldifeat.FrameExtractionOptions(samp_freq=22150) 27 | assert opts.samp_freq == 22150 28 | 29 | opts.samp_freq = 44100 30 | assert opts.samp_freq == 44100 31 | 32 | opts.frame_shift_ms = 20.5 33 | assert opts.frame_shift_ms == 20.5 34 | 35 | opts.frame_length_ms = 1 36 | assert opts.frame_length_ms == 1 37 | 38 | opts.dither = 0.5 39 | assert opts.dither == 0.5 40 | 41 | opts.preemph_coeff = 0.25 42 | assert opts.preemph_coeff == 0.25 43 | 44 | opts.remove_dc_offset = False 45 | assert opts.remove_dc_offset is False 46 | 47 | opts.window_type = "hanning" 48 | assert opts.window_type == "hanning" 49 | 50 | opts.round_to_power_of_two = False 51 | assert opts.round_to_power_of_two is False 52 | 53 | opts.blackman_coeff = 0.25 54 | assert opts.blackman_coeff == 0.25 55 | 56 | opts.snip_edges = False 57 | assert opts.snip_edges is False 58 | 59 | 60 | def test_from_empty_dict(): 61 | opts = kaldifeat.FrameExtractionOptions.from_dict({}) 62 | opts2 = kaldifeat.FrameExtractionOptions() 63 | 64 | assert str(opts) == str(opts2) 65 | 66 | 67 | def test_from_dict_partial(): 68 | d = {"samp_freq": 10, "frame_shift_ms": 2} 69 | 70 | opts = kaldifeat.FrameExtractionOptions.from_dict(d) 71 | 72 | opts2 = kaldifeat.FrameExtractionOptions() 73 | assert str(opts) != str(opts2) 74 | 75 | opts2.samp_freq = 10 76 | assert str(opts) != str(opts2) 77 | 78 | opts2.frame_shift_ms = 2 79 | assert str(opts) == str(opts2) 80 | 81 | opts2.frame_shift_ms = 3 82 | assert str(opts) != str(opts2) 83 | 84 | 85 | def test_from_dict_full_and_as_dict(): 86 | opts = kaldifeat.FrameExtractionOptions() 87 | opts.samp_freq = 20 88 | opts.frame_length_ms = 100 89 | 90 | d = opts.as_dict() 91 | for key, value in d.items(): 92 | assert value == getattr(opts, key) 93 | 94 | opts2 = kaldifeat.FrameExtractionOptions.from_dict(d) 95 | assert str(opts2) == str(opts) 96 | 97 | d["window_type"] = "hanning" 98 | opts3 = kaldifeat.FrameExtractionOptions.from_dict(d) 99 | assert opts3.window_type == "hanning" 100 | 101 | 102 | def test_pickle(): 103 | opts = kaldifeat.FrameExtractionOptions() 104 | opts.samp_freq = 44100 105 | opts.dither = 5.5 106 | data = pickle.dumps(opts) 107 | 108 | opts2 = pickle.loads(data) 109 | assert str(opts) == str(opts2) 110 | 111 | 112 | def main(): 113 | test_default() 114 | test_set_get() 115 | test_from_empty_dict() 116 | test_from_dict_partial() 117 | test_from_dict_full_and_as_dict() 118 | test_pickle() 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_mel_bank_options.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import pickle 6 | 7 | import kaldifeat 8 | 9 | 10 | def test_default(): 11 | opts = kaldifeat.MelBanksOptions() 12 | print(opts) 13 | assert opts.num_bins == 25 14 | assert opts.low_freq == 20 15 | assert opts.high_freq == 0 16 | assert opts.vtln_low == 100 17 | assert opts.vtln_high == -500 18 | assert opts.debug_mel is False 19 | assert opts.htk_mode is False 20 | 21 | 22 | def test_set_get(): 23 | opts = kaldifeat.MelBanksOptions(num_bins=100) 24 | assert opts.num_bins == 100 25 | 26 | opts.num_bins = 200 27 | assert opts.num_bins == 200 28 | 29 | opts.low_freq = 22 30 | assert opts.low_freq == 22 31 | 32 | opts.high_freq = 1 33 | assert opts.high_freq == 1 34 | 35 | opts.vtln_low = 101 36 | assert opts.vtln_low == 101 37 | 38 | opts.vtln_high = -100 39 | assert opts.vtln_high == -100 40 | 41 | opts.debug_mel = True 42 | assert opts.debug_mel is True 43 | 44 | opts.htk_mode = True 45 | assert opts.htk_mode is True 46 | 47 | 48 | def test_from_empty_dict(): 49 | opts = kaldifeat.MelBanksOptions.from_dict({}) 50 | opts2 = kaldifeat.MelBanksOptions() 51 | 52 | assert str(opts) == str(opts2) 53 | 54 | 55 | def test_from_dict_partial(): 56 | d = {"num_bins": 10, "debug_mel": True} 57 | 58 | opts = kaldifeat.MelBanksOptions.from_dict(d) 59 | 60 | opts2 = kaldifeat.MelBanksOptions() 61 | assert str(opts) != str(opts2) 62 | 63 | opts2.num_bins = 10 64 | assert str(opts) != str(opts2) 65 | 66 | opts2.debug_mel = True 67 | assert str(opts) == str(opts2) 68 | 69 | opts2.debug_mel = False 70 | assert str(opts) != str(opts2) 71 | 72 | 73 | def test_from_dict_full_and_as_dict(): 74 | opts = kaldifeat.MelBanksOptions() 75 | opts.num_bins = 80 76 | opts.vtln_high = 2 77 | 78 | d = opts.as_dict() 79 | for key, value in d.items(): 80 | assert value == getattr(opts, key) 81 | 82 | opts2 = kaldifeat.MelBanksOptions.from_dict(d) 83 | assert str(opts2) == str(opts) 84 | 85 | d["htk_mode"] = True 86 | opts3 = kaldifeat.MelBanksOptions.from_dict(d) 87 | assert opts3.htk_mode is True 88 | 89 | 90 | def test_pickle(): 91 | opts = kaldifeat.MelBanksOptions() 92 | opts.num_bins = 100 93 | opts.low_freq = 22 94 | data = pickle.dumps(opts) 95 | 96 | opts2 = pickle.loads(data) 97 | assert str(opts) == str(opts2) 98 | 99 | 100 | def main(): 101 | test_default() 102 | test_set_get() 103 | test_from_empty_dict() 104 | test_from_dict_partial() 105 | test_from_dict_full_and_as_dict() 106 | test_pickle() 107 | 108 | 109 | if __name__ == "__main__": 110 | main() 111 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_mfcc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import pickle 6 | from pathlib import Path 7 | 8 | import torch 9 | from utils import get_devices, read_ark_txt, read_wave 10 | 11 | import kaldifeat 12 | 13 | cur_dir = Path(__file__).resolve().parent 14 | 15 | 16 | def test_online_mfcc( 17 | opts: kaldifeat.MfccOptions, 18 | wave: torch.Tensor, 19 | cpu_features: torch.Tensor, 20 | ): 21 | """ 22 | Args: 23 | opts: 24 | The options to create the online mfcc extractor. 25 | wave: 26 | The input 1-D waveform. 27 | cpu_features: 28 | The groud truth features that are computed offline 29 | """ 30 | online_mfcc = kaldifeat.OnlineMfcc(opts) 31 | 32 | num_processed_frames = 0 33 | i = 0 # current sample index to feed 34 | while not online_mfcc.is_last_frame(num_processed_frames - 1): 35 | while num_processed_frames < online_mfcc.num_frames_ready: 36 | # There are new frames to be processed 37 | frame = online_mfcc.get_frame(num_processed_frames) 38 | assert torch.allclose( 39 | frame.squeeze(0), cpu_features[num_processed_frames], atol=1e-3 40 | ) 41 | num_processed_frames += 1 42 | 43 | # Simulate streaming . Send a random number of audio samples 44 | # to the extractor 45 | num_samples = torch.randint(300, 1000, (1,)).item() 46 | 47 | samples = wave[i : (i + num_samples)] # noqa 48 | i += num_samples 49 | if len(samples) == 0: 50 | online_mfcc.input_finished() 51 | continue 52 | 53 | online_mfcc.accept_waveform(16000, samples) 54 | 55 | assert num_processed_frames == online_mfcc.num_frames_ready 56 | assert num_processed_frames == cpu_features.size(0) 57 | 58 | 59 | def test_mfcc_default(): 60 | print("=====test_mfcc_default=====") 61 | filename = cur_dir / "test_data/test.wav" 62 | wave = read_wave(filename) 63 | gt = read_ark_txt(cur_dir / "test_data/test-mfcc.txt") 64 | 65 | cpu_features = None 66 | for device in get_devices(): 67 | print("device", device) 68 | opts = kaldifeat.MfccOptions() 69 | opts.device = device 70 | opts.frame_opts.dither = 0 71 | mfcc = kaldifeat.Mfcc(opts) 72 | 73 | features = mfcc(wave.to(device)) 74 | if device.type == "cpu": 75 | cpu_features = features 76 | 77 | assert torch.allclose(features.cpu(), gt, atol=1e-1) 78 | 79 | opts = kaldifeat.MfccOptions() 80 | opts.frame_opts.dither = 0 81 | 82 | test_online_mfcc(opts, wave, cpu_features) 83 | 84 | 85 | def test_mfcc_no_snip_edges(): 86 | print("=====test_mfcc_no_snip_edges=====") 87 | filename = cur_dir / "test_data/test.wav" 88 | wave = read_wave(filename) 89 | gt = read_ark_txt(cur_dir / "test_data/test-mfcc-no-snip-edges.txt") 90 | 91 | cpu_features = None 92 | for device in get_devices(): 93 | print("device", device) 94 | opts = kaldifeat.MfccOptions() 95 | opts.device = device 96 | opts.frame_opts.dither = 0 97 | opts.frame_opts.snip_edges = False 98 | 99 | mfcc = kaldifeat.Mfcc(opts) 100 | 101 | features = mfcc(wave.to(device)) 102 | if device.type == "cpu": 103 | cpu_features = features 104 | 105 | assert torch.allclose(features.cpu(), gt, rtol=1e-1) 106 | 107 | opts = kaldifeat.MfccOptions() 108 | opts.frame_opts.dither = 0 109 | opts.frame_opts.snip_edges = False 110 | 111 | test_online_mfcc(opts, wave, cpu_features) 112 | 113 | 114 | def test_pickle(): 115 | for device in get_devices(): 116 | opts = kaldifeat.MfccOptions() 117 | opts.device = device 118 | opts.frame_opts.dither = 0 119 | opts.frame_opts.snip_edges = False 120 | 121 | mfcc = kaldifeat.Mfcc(opts) 122 | data = pickle.dumps(mfcc) 123 | mfcc2 = pickle.loads(data) 124 | 125 | assert str(mfcc.opts) == str(mfcc2.opts) 126 | 127 | opts = kaldifeat.MfccOptions() 128 | opts.frame_opts.dither = 0 129 | opts.frame_opts.snip_edges = False 130 | 131 | mfcc = kaldifeat.OnlineMfcc(opts) 132 | data = pickle.dumps(mfcc) 133 | mfcc2 = pickle.loads(data) 134 | 135 | assert str(mfcc.opts) == str(mfcc2.opts) 136 | 137 | 138 | if __name__ == "__main__": 139 | test_mfcc_default() 140 | test_mfcc_no_snip_edges() 141 | test_pickle() 142 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_spectrogram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang) 4 | 5 | import pickle 6 | from pathlib import Path 7 | 8 | from utils import get_devices, read_ark_txt, read_wave 9 | 10 | import kaldifeat 11 | 12 | cur_dir = Path(__file__).resolve().parent 13 | 14 | 15 | def test_spectrogram_default(): 16 | print("=====test_spectrogram_default=====") 17 | for device in get_devices(): 18 | print("device", device) 19 | opts = kaldifeat.SpectrogramOptions() 20 | opts.device = device 21 | opts.frame_opts.dither = 0 22 | spectrogram = kaldifeat.Spectrogram(opts) 23 | filename = cur_dir / "test_data/test.wav" 24 | wave = read_wave(filename).to(opts.device) 25 | 26 | features = spectrogram(wave) 27 | gt = read_ark_txt(cur_dir / "test_data/test-spectrogram.txt") 28 | 29 | # assert torch.allclose(features.cpu(), gt, atol=1.1) 30 | print(features[1, 145:148], gt[1, 145:148]) # they are different 31 | 32 | 33 | def test_spectrogram_no_snip_edges(): 34 | print("=====test_spectrogram_no_snip_edges=====") 35 | for device in get_devices(): 36 | print("device", device) 37 | opts = kaldifeat.SpectrogramOptions() 38 | opts.device = device 39 | opts.frame_opts.dither = 0 40 | opts.frame_opts.snip_edges = False 41 | spectrogram = kaldifeat.Spectrogram(opts) 42 | filename = cur_dir / "test_data/test.wav" 43 | wave = read_wave(filename).to(opts.device) 44 | 45 | features = spectrogram(wave) 46 | gt = read_ark_txt( 47 | cur_dir / "test_data/test-spectrogram-no-snip-edges.txt" 48 | ) 49 | 50 | # assert torch.allclose(features.cpu(), gt, atol=1.5) 51 | print(features[1, 145:148], gt[1, 145:148]) # they are different 52 | 53 | 54 | def test_pickle(): 55 | for device in get_devices(): 56 | opts = kaldifeat.SpectrogramOptions() 57 | opts.device = device 58 | opts.frame_opts.dither = 0 59 | opts.frame_opts.snip_edges = False 60 | 61 | spec = kaldifeat.Spectrogram(opts) 62 | data = pickle.dumps(spec) 63 | spec2 = pickle.loads(data) 64 | 65 | assert str(spec.opts) == str(spec2.opts) 66 | 67 | 68 | if __name__ == "__main__": 69 | test_spectrogram_default() 70 | test_spectrogram_no_snip_edges() 71 | test_pickle() 72 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_whisper_fbank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang) 3 | 4 | import librosa 5 | import torch 6 | import kaldifeat 7 | 8 | 9 | def get_ground_truth(x): 10 | N_FFT = 400 11 | HOP_LENGTH = 160 12 | 13 | m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=80) 14 | m = torch.from_numpy(m) 15 | # print(m.shape) # [80, 201] 16 | window = torch.hann_window(N_FFT) 17 | stft = torch.stft(x, N_FFT, HOP_LENGTH, window=window, return_complex=True) 18 | # print(stft.shape) # [201, 301] 19 | magnitudes = stft[..., :-1].abs() ** 2 20 | # print(magnitudes.shape) # [201, 300] 21 | 22 | mel_spec = m @ magnitudes 23 | # print(mel_spec.shape) # [80, 300] 24 | 25 | log_spec = torch.clamp(mel_spec, min=1e-10).log10() 26 | log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) 27 | log_spec = (log_spec + 4.0) / 4.0 28 | return log_spec.t() 29 | 30 | 31 | def test_whisper_fbank(): 32 | x = torch.rand(16000 * 3) 33 | 34 | gt = get_ground_truth(x) 35 | print(gt.shape) # [300, 80] 36 | 37 | opts = kaldifeat.WhisperFbankOptions(device="cpu") 38 | whisper_fbank = kaldifeat.WhisperFbank(opts) 39 | y = whisper_fbank(x) # [298, 80] 40 | print(y.shape) # [298, 80] 41 | 42 | # print(gt[:5, :5]) 43 | # print(y[:5, :5]) 44 | 45 | 46 | if __name__ == "__main__": 47 | torch.manual_seed(20231108) 48 | test_whisper_fbank() 49 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/test_whisper_v3_fbank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang) 3 | 4 | import librosa 5 | import torch 6 | import kaldifeat 7 | 8 | 9 | def get_ground_truth(x): 10 | N_FFT = 400 11 | HOP_LENGTH = 160 12 | 13 | m = librosa.filters.mel(sr=16000, n_fft=400, n_mels=128) 14 | m = torch.from_numpy(m) 15 | # print(m.shape) # [128, 201] 16 | window = torch.hann_window(N_FFT) 17 | stft = torch.stft(x, N_FFT, HOP_LENGTH, window=window, return_complex=True) 18 | # print(stft.shape) # [201, 301] 19 | magnitudes = stft[..., :-1].abs() ** 2 20 | # print(magnitudes.shape) # [201, 300] 21 | 22 | mel_spec = m @ magnitudes 23 | # print(mel_spec.shape) # [128, 300] 24 | 25 | log_spec = torch.clamp(mel_spec, min=1e-10).log10() 26 | log_spec = torch.maximum(log_spec, log_spec.max() - 8.0) 27 | log_spec = (log_spec + 4.0) / 4.0 28 | return log_spec.t() 29 | 30 | 31 | def test_whisper_v3_fbank(): 32 | x = torch.rand(16000 * 3) 33 | 34 | gt = get_ground_truth(x) 35 | print(gt.shape) # [300, 128] 36 | 37 | opts = kaldifeat.WhisperFbankOptions(num_mels=128, device="cpu") 38 | print(opts) 39 | whisper_fbank = kaldifeat.WhisperFbank(opts) 40 | y = whisper_fbank(x) # [298, 128] 41 | print(y.shape) # [298, 128] 42 | 43 | print(gt[:5, :5]) 44 | print(y[:5, :5]) 45 | 46 | 47 | if __name__ == "__main__": 48 | torch.manual_seed(20231109) 49 | test_whisper_v3_fbank() 50 | -------------------------------------------------------------------------------- /kaldifeat/python/tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 Xiaomi Corporation (authors: Fangjun Kuang) 2 | 3 | from typing import List 4 | 5 | import numpy as np 6 | import soundfile as sf 7 | import torch 8 | 9 | 10 | def read_wave(filename) -> torch.Tensor: 11 | """Read a wave file and return it as a 1-D tensor. 12 | 13 | Note: 14 | You don't need to scale it to [-32768, 32767]. 15 | We use scaling here to follow the approach in Kaldi. 16 | 17 | Args: 18 | filename: 19 | Filename of a sound file. 20 | Returns: 21 | Return a 1-D tensor containing audio samples. 22 | """ 23 | with sf.SoundFile(filename) as sf_desc: 24 | sampling_rate = sf_desc.samplerate 25 | assert sampling_rate == 16000 26 | data = sf_desc.read(dtype=np.float32, always_2d=False) 27 | data *= 32768 28 | return torch.from_numpy(data) 29 | 30 | 31 | def read_ark_txt(filename) -> torch.Tensor: 32 | # test_data_dir = cur_dir / "test_data" 33 | # filename = test_data_dir / filename 34 | features = [] 35 | with open(filename) as f: 36 | for line in f: 37 | if "[" in line: 38 | continue 39 | line = line.strip("").split() 40 | data = [float(d) for d in line if d != "]"] 41 | features.append(data) 42 | ans = torch.tensor(features) 43 | return ans 44 | 45 | 46 | def get_devices() -> List[torch.device]: 47 | ans = [torch.device("cpu")] 48 | if torch.cuda.is_available(): 49 | ans.append(torch.device("cuda", 0)) 50 | if torch.cuda.device_count() > 1: 51 | ans.append(torch.device("cuda", 1)) 52 | return ans 53 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | -------------------------------------------------------------------------------- /scripts/build_conda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # To use this script, we assume that you have installed cudatoolkit locally. 18 | # That is, `which nvcc` should give the path to nvcc 19 | # 20 | # We also assume that cudnn is installed locally. 21 | # 22 | # The following environment variables are supposed to be set by users 23 | # 24 | # - KALDIFEAT_CUDA_VERSION 25 | # It represents the cuda version. Example: 26 | # 27 | # export KALDIFEAT_CUDA_VERSION=10.1 28 | # 29 | # Defaults to 10.1 if not set. 30 | # 31 | # - KALDIFEAT_TORCH_VERSION 32 | # The PyTorch version. Example: 33 | # 34 | # export KALDIFEAT_TORCH_VERSION=1.7.1 35 | # 36 | # Defaults to 1.7.1 if not set. 37 | # 38 | # - KALDIFEAT_CONDA_TOKEN 39 | # If not set, auto upload to anaconda.org is disabled. 40 | # 41 | # Its value is from https://anaconda.org/kaldifeat/settings/access 42 | # (You need to login as user kaldifeat to see its value) 43 | # 44 | # - KALDIFEAT_BUILD_TYPE 45 | # If not set, defaults to Release. 46 | 47 | set -e 48 | export CONDA_BUILD=1 49 | 50 | cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd) 51 | kaldifeat_dir=$(cd $cur_dir/.. && pwd) 52 | 53 | cd $kaldifeat_dir 54 | 55 | export KALDIFEAT_ROOT_DIR=$kaldifeat_dir 56 | echo "KALDIFEAT_ROOT_DIR: $KALDIFEAT_ROOT_DIR" 57 | 58 | KALDIFEAT_PYTHON_VERSION=$(python -c "import sys; print('.'.join(sys.version.split('.')[:2]))") 59 | 60 | if [ -z $KALDIFEAT_CUDA_VERSION ]; then 61 | echo "env var KALDIFEAT_CUDA_VERSION is not set, defaults to 10.1" 62 | KALDIFEAT_CUDA_VERSION=10.1 63 | fi 64 | 65 | if [ -z $KALDIFEAT_TORCH_VERSION ]; then 66 | echo "env var KALDIFEAT_TORCH_VERSION is not set, defaults to 1.7.1" 67 | KALDIFEAT_TORCH_VERSION=1.7.1 68 | fi 69 | 70 | if [ -z $KALDIFEAT_BUILD_TYPE ]; then 71 | echo "env var KALDIFEAT_BUILD_TYPE is not set, defaults to Release" 72 | KALDIFEAT_BUILD_TYPE=Release 73 | fi 74 | 75 | export KALDIFEAT_IS_FOR_CONDA=1 76 | 77 | # Example value: 3.8 78 | export KALDIFEAT_PYTHON_VERSION 79 | 80 | # Example value: 10.1 81 | export KALDIFEAT_CUDA_VERSION 82 | 83 | # Example value: 1.7.1 84 | export KALDIFEAT_TORCH_VERSION 85 | 86 | export KALDIFEAT_BUILD_TYPE 87 | 88 | if [ ! -z $KALDIFEAT_IS_GITHUB_ACTIONS ]; then 89 | export KALDIFEAT_IS_GITHUB_ACTIONS 90 | conda remove -q pytorch cudatoolkit 91 | conda clean -q -a 92 | else 93 | export KALDIFEAT_IS_GITHUB_ACTIONS=0 94 | fi 95 | 96 | if [ -z $KALDIFEAT_CONDA_TOKEN ]; then 97 | echo "Auto upload to anaconda.org is disabled since KALDIFEAT_CONDA_TOKEN is not set" 98 | conda build --no-test --no-anaconda-upload -c pytorch -c conda-forge ./scripts/conda/kaldifeat 99 | else 100 | conda build --no-test -c pytorch -c conda-forge --token $KALDIFEAT_CONDA_TOKEN ./scripts/conda/kaldifeat 101 | fi 102 | -------------------------------------------------------------------------------- /scripts/build_conda_cpu.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # The following environment variables are supposed to be set by users 18 | # 19 | # - KALDIFEAT_TORCH_VERSION 20 | # The PyTorch version. Example: 21 | # 22 | # export KALDIFEAT_TORCH_VERSION=1.7.1 23 | # 24 | # Defaults to 1.7.1 if not set. 25 | # 26 | # - KALDIFEAT_CONDA_TOKEN 27 | # If not set, auto upload to anaconda.org is disabled. 28 | # 29 | # Its value is from https://anaconda.org/kaldifeat/settings/access 30 | # (You need to login as user kaldifeat to see its value) 31 | # 32 | # - KALDIFEAT_BUILD_TYPE 33 | # If not set, defaults to Release. 34 | 35 | set -e 36 | export CONDA_BUILD=1 37 | 38 | cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd) 39 | kaldifeat_dir=$(cd $cur_dir/.. && pwd) 40 | 41 | cd $kaldifeat_dir 42 | 43 | export KALDIFEAT_ROOT_DIR=$kaldifeat_dir 44 | echo "KALDIFEAT_ROOT_DIR: $KALDIFEAT_ROOT_DIR" 45 | 46 | KALDIFEAT_PYTHON_VERSION=$(python -c "import sys; print('.'.join(sys.version.split('.')[:2]))") 47 | 48 | if [ -z $KALDIFEAT_TORCH_VERSION ]; then 49 | echo "env var KALDIFEAT_TORCH_VERSION is not set, defaults to 1.7.1" 50 | KALDIFEAT_TORCH_VERSION=1.7.1 51 | fi 52 | 53 | if [ -z $KALDIFEAT_BUILD_TYPE ]; then 54 | echo "env var KALDIFEAT_BUILD_TYPE is not set, defaults to Release" 55 | KALDIFEAT_BUILD_TYPE=Release 56 | fi 57 | 58 | export KALDIFEAT_IS_FOR_CONDA=1 59 | 60 | # Example value: 3.8 61 | export KALDIFEAT_PYTHON_VERSION 62 | 63 | # Example value: 1.7.1 64 | export KALDIFEAT_TORCH_VERSION 65 | 66 | export KALDIFEAT_BUILD_TYPE 67 | 68 | if [ ! -z $KALDIFEAT_IS_GITHUB_ACTIONS ]; then 69 | export KALDIFEAT_IS_GITHUB_ACTIONS 70 | conda remove -q pytorch 71 | conda clean -q -a 72 | else 73 | export KALDIFEAT_IS_GITHUB_ACTIONS=0 74 | fi 75 | 76 | if [ -z $KALDIFEAT_CONDA_TOKEN ]; then 77 | echo "Auto upload to anaconda.org is disabled since KALDIFEAT_CONDA_TOKEN is not set" 78 | conda build --no-test --no-anaconda-upload -c pytorch ./scripts/conda-cpu/kaldifeat 79 | else 80 | conda build --no-test -c pytorch --token $KALDIFEAT_CONDA_TOKEN ./scripts/conda-cpu/kaldifeat 81 | fi 82 | -------------------------------------------------------------------------------- /scripts/check_style_cpplint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # Usage: 18 | # 19 | # (1) To check files of the last commit 20 | # ./scripts/check_style_cpplint.sh 21 | # 22 | # (2) To check changed files not committed yet 23 | # ./scripts/check_style_cpplint.sh 1 24 | # 25 | # (3) To check all files in the project 26 | # ./scripts/check_style_cpplint.sh 2 27 | 28 | 29 | cpplint_version="1.5.4" 30 | cur_dir=$(cd $(dirname $BASH_SOURCE) && pwd) 31 | kaldifeat_dir=$(cd $cur_dir/.. && pwd) 32 | 33 | build_dir=$kaldifeat_dir/build 34 | mkdir -p $build_dir 35 | 36 | cpplint_src=$build_dir/cpplint-${cpplint_version}/cpplint.py 37 | 38 | if [ ! -d "$build_dir/cpplint-${cpplint_version}" ]; then 39 | pushd $build_dir 40 | if command -v wget &> /dev/null; then 41 | wget https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz 42 | elif command -v curl &> /dev/null; then 43 | curl -O -SL https://github.com/cpplint/cpplint/archive/${cpplint_version}.tar.gz 44 | else 45 | echo "Please install wget or curl to download cpplint" 46 | exit 1 47 | fi 48 | tar xf ${cpplint_version}.tar.gz 49 | rm ${cpplint_version}.tar.gz 50 | 51 | # cpplint will report the following error for: __host__ __device__ ( 52 | # 53 | # Extra space before ( in function call [whitespace/parens] [4] 54 | # 55 | # the following patch disables the above error 56 | sed -i "3490i\ not Search(r'__host__ __device__\\\s+\\\(', fncall) and" $cpplint_src 57 | popd 58 | fi 59 | 60 | source $kaldifeat_dir/scripts/utils.sh 61 | 62 | # return true if the given file is a c++ source file 63 | # return false otherwise 64 | function is_source_code_file() { 65 | case "$1" in 66 | *.cc|*.h|*.cu) 67 | echo true;; 68 | *) 69 | echo false;; 70 | esac 71 | } 72 | 73 | function check_style() { 74 | python3 $cpplint_src $1 || abort $1 75 | } 76 | 77 | function check_last_commit() { 78 | files=$(git diff HEAD^1 --name-only --diff-filter=ACDMRUXB) 79 | echo $files 80 | } 81 | 82 | function check_current_dir() { 83 | files=$(git status -s -uno --porcelain | awk '{ 84 | if (NF == 4) { 85 | # a file has been renamed 86 | print $NF 87 | } else { 88 | print $2 89 | }}') 90 | 91 | echo $files 92 | } 93 | 94 | function do_check() { 95 | case "$1" in 96 | 1) 97 | echo "Check changed files" 98 | files=$(check_current_dir) 99 | ;; 100 | 2) 101 | echo "Check all files" 102 | files=$(find $kaldifeat_dir/kaldifeat -name "*.h" -o -name "*.cc" -o -name "*.cu") 103 | ;; 104 | *) 105 | echo "Check last commit" 106 | files=$(check_last_commit) 107 | ;; 108 | esac 109 | 110 | for f in $files; do 111 | need_check=$(is_source_code_file $f) 112 | if $need_check; then 113 | [[ -f $f ]] && check_style $f 114 | fi 115 | done 116 | } 117 | 118 | function main() { 119 | do_check $1 120 | 121 | ok "Great! Style check passed!" 122 | } 123 | 124 | cd $kaldifeat_dir 125 | 126 | main $1 127 | -------------------------------------------------------------------------------- /scripts/conda-cpu/cpuonly/meta.yaml: -------------------------------------------------------------------------------- 1 | # this file is copied from 2 | # https://github.com/pytorch/builder/tree/master/conda/cpuonly 3 | package: 4 | name: cpuonly 5 | version: 1.0 6 | 7 | build: 8 | track_features: 9 | - cpuonly 10 | noarch: generic 11 | -------------------------------------------------------------------------------- /scripts/conda-cpu/kaldifeat/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: kaldifeat 3 | version: "1.25.5" 4 | 5 | source: 6 | path: "{{ environ.get('KALDIFEAT_ROOT_DIR') }}" 7 | 8 | build: 9 | number: 0 10 | string: cpu_py{{ environ.get('KALDIFEAT_PYTHON_VERSION') }}_torch{{ environ.get('KALDIFEAT_TORCH_VERSION') }} 11 | script: conda install -y -q -c pytorch pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }} cpuonly & {{ PYTHON }} setup.py install --single-version-externally-managed --record=record.txt 12 | features: 13 | - cpuonly # [not osx] 14 | 15 | requirements: 16 | build: 17 | - {{ compiler('c') }} # [win] 18 | - {{ compiler('cxx') }} # [win] 19 | 20 | host: 21 | - anaconda-client 22 | - cmake 23 | - python 24 | - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }} 25 | - cpuonly 26 | 27 | run: 28 | - python 29 | - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }} 30 | 31 | about: 32 | home: https://github.com/csukuangfj/kaldifeat 33 | license: Apache V2 34 | license_file: LICENSE 35 | summary: Kaldi-compatible feature extraction with PyTorch 36 | description: | 37 | Kaldi-compatible feature extraction with PyTorch, 38 | supporting CUDA, batch processing, chunk processing, and autograd 39 | -------------------------------------------------------------------------------- /scripts/conda/kaldifeat/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # Copyright 2021 Xiaomi Corp. (author: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | set -ex 18 | 19 | CONDA_ENV_DIR=$CONDA_PREFIX 20 | 21 | echo "KALDIFEAT_PYTHON_VERSION: $KALDIFEAT_PYTHON_VERSION" 22 | echo "KALDIFEAT_TORCH_VERSION: $KALDIFEAT_TORCH_VERSION" 23 | echo "KALDIFEAT_CUDA_VERSION: $KALDIFEAT_CUDA_VERSION" 24 | echo "KALDIFEAT_BUILD_TYPE: $KALDIFEAT_BUILD_TYPE" 25 | echo "KALDIFEAT_BUILD_VERSION: $KALDIFEAT_BUILD_VERSION" 26 | python3 --version 27 | 28 | echo "CC is: $CC" 29 | echo "GCC is: $GCC" 30 | echo "which nvcc: $(which nvcc)" 31 | echo "gcc version: $($CC --version)" 32 | echo "nvcc version: $(nvcc --version)" 33 | 34 | export KALDIFEAT_CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${KALDIFEAT_BUILD_TYPE}" 35 | export KALDIFEAT_MAKE_ARGS="-j2" 36 | 37 | python3 setup.py install --single-version-externally-managed --record=record.txt 38 | -------------------------------------------------------------------------------- /scripts/conda/kaldifeat/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: kaldifeat 3 | version: "1.25.5" 4 | 5 | source: 6 | path: "{{ environ.get('KALDIFEAT_ROOT_DIR') }}" 7 | 8 | build: 9 | number: 0 10 | string: cuda{{ environ.get('KALDIFEAT_CUDA_VERSION') }}_py{{ environ.get('KALDIFEAT_PYTHON_VERSION') }}_torch{{ environ.get('KALDIFEAT_TORCH_VERSION') }} 11 | script_env: 12 | - KALDIFEAT_IS_GITHUB_ACTIONS 13 | - KALDIFEAT_CUDA_VERSION 14 | - KALDIFEAT_TORCH_VERSION 15 | - KALDIFEAT_PYTHON_VERSION 16 | - KALDIFEAT_BUILD_TYPE 17 | - KALDIFEAT_BUILD_VERSION 18 | - KALDIFEAT_IS_FOR_CONDA 19 | 20 | requirements: 21 | host: 22 | - cmake=3.18 23 | - python 24 | - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }} 25 | - cudatoolkit={{ environ.get('KALDIFEAT_CUDA_VERSION') }} 26 | - gcc_linux-64=7 27 | run: 28 | - python 29 | - pytorch={{ environ.get('KALDIFEAT_TORCH_VERSION') }} 30 | - cudatoolkit={{ environ.get('KALDIFEAT_CUDA_VERSION') }} 31 | 32 | about: 33 | home: https://github.com/csukuangfj/kaldifeat 34 | license: Apache V2 35 | license_file: LICENSE 36 | summary: Kaldi-compatible feature extraction with PyTorch 37 | description: | 38 | Kaldi-compatible feature extraction with PyTorch, 39 | supporting CUDA, batch processing, chunk processing, and autograd 40 | -------------------------------------------------------------------------------- /scripts/github_actions/build-ubuntu-cpu-arm64.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | set -ex 4 | 5 | if [ -z $PYTHON_VERSION ]; then 6 | echo "Please set the environment variable PYTHON_VERSION" 7 | echo "Example: export PYTHON_VERSION=3.8" 8 | # Valid values: 3.8, 3.9, 3.10, 3.11 9 | exit 1 10 | fi 11 | 12 | if [ -z $TORCH_VERSION ]; then 13 | echo "Please set the environment variable TORCH_VERSION" 14 | echo "Example: export TORCH_VERSION=1.10.0" 15 | exit 1 16 | fi 17 | 18 | export PATH=$PYTHON_INSTALL_DIR/bin:$PATH 19 | export LD_LIBRARY_PATH=$PYTHON_INSTALL_DIR/lib:$LD_LIBRARY_PATH 20 | ls -lh $PYTHON_INSTALL_DIR/lib/ 21 | 22 | python3 --version 23 | which python3 24 | 25 | python3 -m pip install scikit-build 26 | python3 -m pip install -U pip cmake 27 | python3 -m pip install wheel twine typing_extensions 28 | python3 -m pip install bs4 requests tqdm auditwheel 29 | 30 | echo "Installing torch" 31 | python3 -m pip install -qq torch==$TORCH_VERSION || \ 32 | python3 -m pip install -qq torch==$TORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html || \ 33 | python3 -m pip install -qq torch==$TORCH_VERSION -f https://download.pytorch.org/whl/torch/ 34 | 35 | rm -rf ~/.cache/pip 36 | yum clean all 37 | 38 | cd /var/www 39 | 40 | export CMAKE_CUDA_COMPILER_LAUNCHER= 41 | export KALDIFEAT_CMAKE_ARGS=" -DPYTHON_EXECUTABLE=$PYTHON_INSTALL_DIR/bin/python3 " 42 | export KALDIFEAT_MAKE_ARGS=" -j " 43 | 44 | nvcc --version || true 45 | rm -rf /usr/local/cuda* 46 | nvcc --version || true 47 | 48 | if [[ x"$IS_2_28" == x"1" ]]; then 49 | plat=manylinux_2_28_aarch64 50 | else 51 | plat=manylinux_2_17_aarch64 52 | fi 53 | 54 | python3 setup.py bdist_wheel 55 | 56 | auditwheel --verbose repair \ 57 | --exclude libc10.so \ 58 | --exclude libc10_cuda.so \ 59 | --exclude libcuda.so.1 \ 60 | --exclude libcudart.so.${CUDA_VERSION} \ 61 | --exclude libnvToolsExt.so.1 \ 62 | --exclude libnvrtc.so.${CUDA_VERSION} \ 63 | --exclude libtorch.so \ 64 | --exclude libtorch_cpu.so \ 65 | --exclude libtorch_cuda.so \ 66 | --exclude libtorch_python.so \ 67 | \ 68 | --exclude libcudnn.so.8 \ 69 | --exclude libcublas.so.11 \ 70 | --exclude libcublasLt.so.11 \ 71 | --exclude libcudart.so.11.0 \ 72 | --exclude libnvrtc.so.11.2 \ 73 | --exclude libtorch_cuda_cu.so \ 74 | --exclude libtorch_cuda_cpp.so \ 75 | --plat $plat \ 76 | dist/*.whl 77 | 78 | ls -lh /var/www 79 | -------------------------------------------------------------------------------- /scripts/github_actions/build-ubuntu-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | set -ex 4 | 5 | if [ -z $PYTHON_VERSION ]; then 6 | echo "Please set the environment variable PYTHON_VERSION" 7 | echo "Example: export PYTHON_VERSION=3.8" 8 | # Valid values: 3.8, 3.9, 3.10, 3.11 9 | exit 1 10 | fi 11 | 12 | if [ -z $TORCH_VERSION ]; then 13 | echo "Please set the environment variable TORCH_VERSION" 14 | echo "Example: export TORCH_VERSION=1.10.0" 15 | exit 1 16 | fi 17 | 18 | export PATH=$PYTHON_INSTALL_DIR/bin:$PATH 19 | export LD_LIBRARY_PATH=$PYTHON_INSTALL_DIR/lib:$LD_LIBRARY_PATH 20 | ls -lh $PYTHON_INSTALL_DIR/lib/ 21 | 22 | python3 --version 23 | which python3 24 | 25 | python3 -m pip install scikit-build 26 | python3 -m pip install -U pip cmake 27 | python3 -m pip install wheel twine typing_extensions 28 | python3 -m pip install bs4 requests tqdm auditwheel 29 | 30 | echo "Installing torch" 31 | python3 -m pip install -qq torch==$TORCH_VERSION+cpu -f https://download.pytorch.org/whl/torch_stable.html || \ 32 | python3 -m pip install -qq torch==$TORCH_VERSION+cpu -f https://download.pytorch.org/whl/torch/ 33 | 34 | rm -rf ~/.cache/pip 35 | yum clean all 36 | 37 | cd /var/www 38 | 39 | export CMAKE_CUDA_COMPILER_LAUNCHER= 40 | export KALDIFEAT_CMAKE_ARGS=" -DPYTHON_EXECUTABLE=$PYTHON_INSTALL_DIR/bin/python3 " 41 | export KALDIFEAT_MAKE_ARGS=" -j " 42 | 43 | nvcc --version || true 44 | rm -rf /usr/local/cuda* 45 | nvcc --version || true 46 | 47 | python3 setup.py bdist_wheel 48 | if [[ x"$IS_2_28" == x"1" ]]; then 49 | plat=manylinux_2_28_x86_64 50 | else 51 | plat=manylinux_2_17_x86_64 52 | fi 53 | 54 | auditwheel --verbose repair \ 55 | --exclude libc10.so \ 56 | --exclude libc10_cuda.so \ 57 | --exclude libcuda.so.1 \ 58 | --exclude libcudart.so.${CUDA_VERSION} \ 59 | --exclude libnvToolsExt.so.1 \ 60 | --exclude libnvrtc.so.${CUDA_VERSION} \ 61 | --exclude libtorch.so \ 62 | --exclude libtorch_cpu.so \ 63 | --exclude libtorch_cuda.so \ 64 | --exclude libtorch_python.so \ 65 | \ 66 | --exclude libcudnn.so.8 \ 67 | --exclude libcublas.so.11 \ 68 | --exclude libcublasLt.so.11 \ 69 | --exclude libcudart.so.11.0 \ 70 | --exclude libnvrtc.so.11.2 \ 71 | --exclude libtorch_cuda_cu.so \ 72 | --exclude libtorch_cuda_cpp.so \ 73 | --plat $plat \ 74 | dist/*.whl 75 | 76 | ls -lh /var/www 77 | -------------------------------------------------------------------------------- /scripts/github_actions/install_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | echo "cuda version: $cuda" 18 | 19 | case "$cuda" in 20 | 10.0) 21 | url=https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux 22 | ;; 23 | 10.1) 24 | # WARNING: there are bugs in 25 | # https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.105_418.39_linux.run 26 | # with GCC 7. Please use the following version 27 | url=http://developer.download.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.243_418.87.00_linux.run 28 | ;; 29 | 10.2) 30 | url=http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda_10.2.89_440.33.01_linux.run 31 | ;; 32 | 11.0) 33 | url=http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_450.51.05_linux.run 34 | ;; 35 | 11.1) 36 | # url=https://developer.download.nvidia.com/compute/cuda/11.1.0/local_installers/cuda_11.1.0_455.23.05_linux.run 37 | url=https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda_11.1.1_455.32.00_linux.run 38 | ;; 39 | 11.3) 40 | # url=https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda_11.3.0_465.19.01_linux.run 41 | url=https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run 42 | ;; 43 | 11.5) 44 | url=https://developer.download.nvidia.com/compute/cuda/11.5.2/local_installers/cuda_11.5.2_495.29.05_linux.run 45 | ;; 46 | 11.6) 47 | url=https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run 48 | ;; 49 | 11.7) 50 | url=https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run 51 | ;; 52 | 11.8) 53 | url=https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run 54 | ;; 55 | 12.1) 56 | url=https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run 57 | ;; 58 | 12.4) 59 | url=https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run 60 | ;; 61 | *) 62 | echo "Unknown cuda version: $cuda" 63 | exit 1 64 | ;; 65 | esac 66 | 67 | function retry() { 68 | $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) 69 | } 70 | 71 | retry curl -LSs -O $url 72 | filename=$(basename $url) 73 | echo "filename: $filename" 74 | chmod +x ./$filename 75 | 76 | ls -lh 77 | ls -lh /usr/local 78 | 79 | sudo ./$filename \ 80 | --silent \ 81 | --toolkit \ 82 | --no-opengl-libs \ 83 | --no-drm \ 84 | --no-man-page 85 | 86 | rm -fv ./$filename 87 | 88 | export CUDA_HOME=/usr/local/cuda 89 | export PATH=$CUDA_HOME/bin:$PATH 90 | export LD_LIBRARY_PATH=$CUDA_HOME/lib:$LD_LIBRARY_PATH 91 | export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH 92 | ls -lh $CUDA_HOME 93 | -------------------------------------------------------------------------------- /scripts/github_actions/install_cudnn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang) 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | case $cuda in 18 | 10.0) 19 | filename=cudnn-10.0-linux-x64-v7.6.5.32.tgz 20 | ;; 21 | 10.1) 22 | filename=cudnn-10.1-linux-x64-v8.0.2.39.tgz 23 | ;; 24 | 10.2) 25 | filename=cudnn-10.2-linux-x64-v8.0.2.39.tgz 26 | ;; 27 | 11.0) 28 | filename=cudnn-11.0-linux-x64-v8.0.5.39.tgz 29 | ;; 30 | 11.1) 31 | filename=cudnn-11.1-linux-x64-v8.0.4.30.tgz 32 | ;; 33 | 11.3) 34 | filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz 35 | ;; 36 | 11.5) 37 | filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz 38 | ;; 39 | 11.6) 40 | filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz 41 | ;; 42 | 11.7) 43 | filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz 44 | ;; 45 | 11.8) 46 | filename=cudnn-11.3-linux-x64-v8.2.0.53.tgz 47 | ;; 48 | 12.1) 49 | filename=cudnn-linux-x86_64-8.9.5.29_cuda12-archive.tar.xz 50 | ;; 51 | 12.4) 52 | filename=cudnn-linux-x86_64-8.9.7.29_cuda12-archive.tar.xz 53 | ;; 54 | *) 55 | echo "Unsupported cuda version: $cuda" 56 | exit 1 57 | ;; 58 | esac 59 | 60 | command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nPlease install 'git-lfs' first."; exit 2; } 61 | 62 | GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/cudnn 63 | cd cudnn 64 | git lfs pull --include="$filename" 65 | 66 | sudo tar xf ./$filename --strip-components=1 -C /usr/local/cuda 67 | 68 | # save disk space 69 | git lfs prune && cd .. && rm -rf cudnn 70 | 71 | sudo sed -i '59i#define CUDNN_MAJOR 8' /usr/local/cuda/include/cudnn.h 72 | -------------------------------------------------------------------------------- /scripts/github_actions/run-nightly-build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright 2022 Xiaomi Corp. (authors: Fangjun Kuang) 3 | 4 | import subprocess 5 | from datetime import datetime, timedelta 6 | 7 | 8 | def get_last_commit_date() -> datetime: 9 | date = ( 10 | subprocess.check_output( 11 | [ 12 | "git", 13 | "log", 14 | "-1", 15 | "--format=%ad", 16 | "--date=unix", 17 | ] 18 | ) 19 | .decode("ascii") 20 | .strip() 21 | ) 22 | return datetime.utcfromtimestamp(int(date)) 23 | 24 | 25 | def main(): 26 | last_commit_date_utc = get_last_commit_date() 27 | now_utc = datetime.utcnow() 28 | if last_commit_date_utc + timedelta(days=1) > now_utc: 29 | print("true") 30 | else: 31 | print("false") 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /scripts/utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | default='\033[0m' 4 | bold='\033[1m' 5 | red='\033[31m' 6 | green='\033[32m' 7 | 8 | function ok() { 9 | printf "${bold}${green}[OK]${default} $1\n" 10 | } 11 | 12 | function error() { 13 | printf "${bold}${red}[FAILED]${default} $1\n" 14 | } 15 | 16 | function abort() { 17 | printf "${bold}${red}[FAILED]${default} $1\n" 18 | exit 1 19 | } 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # Copyright (c) 2021 Xiaomi Corporation (author: Fangjun Kuang) 4 | 5 | import re 6 | 7 | import setuptools 8 | 9 | from cmake.cmake_extension import BuildExtension, bdist_wheel, cmake_extension 10 | import get_version 11 | 12 | 13 | get_package_version = get_version.get_package_version 14 | 15 | 16 | def read_long_description(): 17 | with open("README.md", encoding="utf8") as f: 18 | readme = f.read() 19 | return readme 20 | 21 | 22 | package_name = "kaldifeat" 23 | 24 | with open("kaldifeat/python/kaldifeat/__init__.py", "a") as f: 25 | f.write(f"__version__ = '{get_package_version()}'\n") 26 | 27 | setuptools.setup( 28 | name=package_name, 29 | version=get_package_version(), 30 | author="Fangjun Kuang", 31 | author_email="csukuangfj@gmail.com", 32 | package_dir={package_name: "kaldifeat/python/kaldifeat"}, 33 | packages=[package_name], 34 | url="https://github.com/csukuangfj/kaldifeat", 35 | long_description=read_long_description(), 36 | long_description_content_type="text/markdown", 37 | ext_modules=[cmake_extension("_kaldifeat")], 38 | cmdclass={"build_ext": BuildExtension, "bdist_wheel": bdist_wheel}, 39 | zip_safe=False, 40 | classifiers=[ 41 | "Programming Language :: C++", 42 | "Programming Language :: Python", 43 | "Programming Language :: Python :: 3", 44 | "Programming Language :: Python :: 3.6", 45 | "Programming Language :: Python :: 3.7", 46 | "Programming Language :: Python :: 3.8", 47 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 48 | ], 49 | python_requires=">=3.6.0", 50 | license="Apache licensed, as found in the LICENSE file", 51 | ) 52 | 53 | # remove the line __version__ from kaldifeat/python/kaldifeat/__init__.py 54 | with open("kaldifeat/python/kaldifeat/__init__.py", "r") as f: 55 | lines = f.readlines() 56 | 57 | with open("kaldifeat/python/kaldifeat/__init__.py", "w") as f: 58 | for line in lines: 59 | if "__version__" in line and "torch" not in line: 60 | # skip __version__ = "x.x.x" 61 | continue 62 | f.write(line) 63 | --------------------------------------------------------------------------------