├── .github
    └── workflows
    │   └── gen_whl_to_pypi.yml
├── .gitignore
├── LICENSE
├── README.md
├── cpp_onnx
    ├── CMakeLists.txt
    ├── CMakeSettings.json
    ├── api.md
    ├── images
    │   ├── demo.png
    │   └── threadnum.png
    ├── include
    │   ├── Audio.h
    │   ├── ComDefine.h
    │   ├── Model.h
    │   ├── librapidasrapi.h
    │   ├── webrtc_vad.h
    │   └── win_func.h
    ├── models
    │   ├── readme.md
    │   └── vocab.txt
    ├── readme.md
    ├── src
    │   ├── Audio.cpp
    │   ├── CMakeLists.txt
    │   ├── CommonStruct.h
    │   ├── FeatureExtract.cpp
    │   ├── FeatureExtract.h
    │   ├── FeatureQueue.cpp
    │   ├── FeatureQueue.h
    │   ├── Model.cpp
    │   ├── SpeechWrap.cpp
    │   ├── SpeechWrap.h
    │   ├── Tensor.h
    │   ├── Vocab.cpp
    │   ├── Vocab.h
    │   ├── alignedmem.cpp
    │   ├── alignedmem.h
    │   ├── commonfunc.h
    │   ├── librapidasrapi.cpp
    │   ├── paraformer_onnx.cpp
    │   ├── paraformer_onnx.h
    │   ├── precomp.h
    │   ├── predefine_coe.h
    │   ├── tmp.h
    │   ├── util.cpp
    │   └── util.h
    ├── tester
    │   ├── CMakeLists.txt
    │   └── tester.cpp
    ├── third_party
    │   └── webrtc
    │   │   ├── CMakeLists.txt
    │   │   ├── common_audio
    │   │       ├── signal_processing
    │   │       │   ├── complex_bit_reverse.c
    │   │       │   ├── complex_fft.c
    │   │       │   ├── complex_fft_tables.h
    │   │       │   ├── cross_correlation.c
    │   │       │   ├── division_operations.c
    │   │       │   ├── dot_product_with_scale.cc
    │   │       │   ├── dot_product_with_scale.h
    │   │       │   ├── downsample_fast.c
    │   │       │   ├── energy.c
    │   │       │   ├── get_scaling_square.c
    │   │       │   ├── include
    │   │       │   │   ├── real_fft.h
    │   │       │   │   ├── signal_processing_library.h
    │   │       │   │   └── spl_inl.h
    │   │       │   ├── min_max_operations.c
    │   │       │   ├── resample_48khz.c
    │   │       │   ├── resample_by_2_internal.c
    │   │       │   ├── resample_by_2_internal.h
    │   │       │   ├── resample_fractional.c
    │   │       │   ├── spl_init.c
    │   │       │   ├── spl_inl.c
    │   │       │   ├── spl_sqrt.c
    │   │       │   └── vector_scaling_operations.c
    │   │       ├── third_party
    │   │       │   └── spl_sqrt_floor
    │   │       │   │   ├── spl_sqrt_floor.c
    │   │       │   │   └── spl_sqrt_floor.h
    │   │       └── vad
    │   │       │   ├── include
    │   │       │       └── webrtc_vad.h
    │   │       │   ├── vad_core.c
    │   │       │   ├── vad_core.h
    │   │       │   ├── vad_filterbank.c
    │   │       │   ├── vad_filterbank.h
    │   │       │   ├── vad_gmm.c
    │   │       │   ├── vad_gmm.h
    │   │       │   ├── vad_sp.c
    │   │       │   ├── vad_sp.h
    │   │       │   └── webrtc_vad.c
    │   │   ├── rtc_base
    │   │       ├── checks.cc
    │   │       ├── checks.h
    │   │       ├── compile_assert_c.h
    │   │       ├── numerics
    │   │       │   └── safe_compare.h
    │   │       ├── sanitizer.h
    │   │       ├── system
    │   │       │   ├── arch.h
    │   │       │   └── inline.h
    │   │       └── type_traits.h
    │   │   ├── system_wrappers
    │   │       └── include
    │   │       │   └── cpu_features_wrapper.h
    │   │   └── typedefs.h
    ├── wave
    │   ├── asr_example.wav
    │   ├── long.wav
    │   ├── short.wav
    │   ├── test.pcm.bytes
    │   └── test.pcm.wav
    └── win
    │   ├── bin
    │       ├── x64
    │       │   ├── libfftw3-3.dll
    │       │   ├── libfftw3f-3.dll
    │       │   ├── libfftw3l-3.dll
    │       │   └── onnxruntime.dll
    │       └── x86
    │       │   ├── libfftw3-3.dll
    │       │   ├── libfftw3f-3.dll
    │       │   ├── libfftw3l-3.dll
    │       │   └── onnxruntime.dll
    │   ├── images
    │       └── sample.png
    │   ├── include
    │       ├── cpu_provider_factory.h
    │       ├── fftw3.h
    │       ├── onnxruntime_c_api.h
    │       ├── onnxruntime_cxx_api.h
    │       ├── onnxruntime_cxx_inline.h
    │       ├── onnxruntime_run_options_config_keys.h
    │       ├── onnxruntime_session_options_config_keys.h
    │       ├── provider_options.h
    │       └── tensorrt_provider_factory.h
    │   ├── lib
    │       ├── x64
    │       │   ├── libfftw3-3.def
    │       │   ├── libfftw3-3.exp
    │       │   ├── libfftw3-3.lib
    │       │   ├── libfftw3f-3.def
    │       │   ├── libfftw3f-3.exp
    │       │   ├── libfftw3f-3.lib
    │       │   ├── libfftw3l-3.def
    │       │   ├── libfftw3l-3.exp
    │       │   ├── libfftw3l-3.lib
    │       │   └── onnxruntime.lib
    │       └── x86
    │       │   ├── libfftw3-3.def
    │       │   ├── libfftw3-3.exp
    │       │   ├── libfftw3-3.lib
    │       │   ├── libfftw3f-3.def
    │       │   ├── libfftw3f-3.exp
    │       │   ├── libfftw3f-3.lib
    │       │   ├── libfftw3l-3.def
    │       │   ├── libfftw3l-3.exp
    │       │   ├── libfftw3l-3.lib
    │       │   └── onnxruntime.lib
    │   └── readme.md
└── python
    ├── .gitattributes
    ├── .pre-commit-config.yaml
    ├── README.md
    ├── demo.py
    ├── docs
        └── doc_whl.md
    ├── rapid_paraformer
        ├── __init__.py
        ├── kaldifeat
        │   ├── LICENSE
        │   ├── README.md
        │   ├── __init__.py
        │   ├── feature.py
        │   └── ivector.py
        ├── main.py
        └── utils.py
    ├── requirements.txt
    ├── setup.py
    ├── test_wavs
        ├── 0478_00017.wav
        └── asr_example_zh.wav
    └── tests
        └── test_infer.py


/.github/workflows/gen_whl_to_pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Push rapid_paraformer to pypi
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches: [ main ]
 6 |     # paths:
 7 |     #   - 'python/rapid_paraformer/**'
 8 |     #   - 'python/docs/doc_whl.md'
 9 |     #   - 'python/setup.py'
10 |     #   - '.github/workflows/gen_whl_to_pypi.yml'
11 |     tags:
12 |       - v*
13 | 
14 | # env:
15 | #   RESOURCES_URL: https://github.com/RapidAI/RapidLatexOCR/releases/download/v0.0.0/models.zip
16 | 
17 | jobs:
18 |   # UnitTesting:
19 |   #   runs-on: ubuntu-latest
20 |   #   steps:
21 |   #     - name: Pull latest code
22 |   #       uses: actions/checkout@v3
23 | 
24 |   #     - name: Set up Python 3.7
25 |   #       uses: actions/setup-python@v4
26 |   #       with:
27 |   #         python-version: '3.7'
28 |   #         architecture: 'x64'
29 | 
30 |   #     - name: Display Python version
31 |   #       run: python -c "import sys; print(sys.version)"
32 | 
33 |   #     - name: Download models
34 |   #       run: |
35 |   #         wget $RESOURCES_URL
36 |   #         ZIP_NAME=${RESOURCES_URL##*/}
37 |   #         DIR_NAME=${ZIP_NAME%.*}
38 |   #         unzip $ZIP_NAME
39 | 
40 |   #     - name: Unit testings with rapid_latex_ocr
41 |   #       run: |
42 |   #         pip install -r requirements.txt
43 |   #         pip install pytest
44 |   #         pytest tests/test*.py
45 | 
46 |   GenerateWHL_PushPyPi:
47 |     runs-on: ubuntu-latest
48 | 
49 |     steps:
50 |       - uses: actions/checkout@v3
51 | 
52 |       - name: Set up Python 3.7
53 |         uses: actions/setup-python@v4
54 |         with:
55 |           python-version: '3.7'
56 |           architecture: 'x64'
57 | 
58 |       - name: Run setup.py
59 |         run: |
60 |           cd python
61 |           pip install -r requirements.txt
62 |           python -m pip install --upgrade pip
63 |           pip install wheel get_pypi_latest_version
64 |           python setup.py bdist_wheel ${{ github.ref_name }}
65 | 
66 |       # - name: Publish distribution 📦 to Test PyPI
67 |       #   uses: pypa/gh-action-pypi-publish@v1.5.0
68 |       #   with:
69 |       #     password: ${{ secrets.TEST_PYPI_API_TOKEN }}
70 |       #     repository_url: https://test.pypi.org/legacy/
71 |       #     packages_dir:  dist/
72 | 
73 |       - name: Publish distribution 📦 to PyPI
74 |         uses: pypa/gh-action-pypi-publish@v1.5.0
75 |         with:
76 |           password: ${{ secrets.PYPI_API_TOKEN }}
77 |           packages_dir: python/dist/
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.onnx
  2 | *.json
  3 | 
  4 | # Created by .ignore support plugin (hsz.mobi)
  5 | ### Python template
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | .pytest_cache
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | pip-wheel-metadata/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | # *.manifest
 40 | # *.spec
 41 | *.res
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | #idea
139 | .vs
140 | .vscode
141 | .idea
142 | /models
143 | 
144 | #models
145 | 
146 | *.ttf
147 | *.ttc
148 | 
149 | 
150 | *.bin
151 | *.mapping
152 | *.xml
153 | 
154 | *.pdiparams
155 | *.pdiparams.info
156 | *.pdmodel
157 | 
158 | .DS_Store


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 RapidAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Rapid ASR
 2 | <p align="left">
 3 |     <a href="https://huggingface.co/spaces/SWHL/RapidASRDemo" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97-Hugging Face Demo-blue"></a>
 4 |     <a href="https://www.modelscope.cn/studios/liekkas/RapidASRDemo/summary" target="_blank"><img src="https://img.shields.io/badge/ModelScope-Demo-blue"></a>
 5 |     <a href=""><img src="https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg"></a>
 6 |     <a href=""><img src="https://img.shields.io/badge/Python->=3.6,<3.13-aff.svg"></a>
 7 |     <a href=""><img src="https://img.shields.io/badge/C++-aff.svg"></a>
 8 | </p>
 9 | 
10 | - 🎉 推出知识星球[RapidAI私享群](https://t.zsxq.com/0duLBZczw)，这里的提问会优先得到回答和支持，也会享受到RapidAI组织后续持续优质的服务。欢迎大家的加入。
11 | - Paraformer模型出自阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)。
12 | - 本仓库仅对模型做了转换，只采用ONNXRuntime推理引擎。该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR)。
13 | - 项目仍会持续更新，欢迎关注。
14 | - QQ群号：645751008
15 | 
16 | #### 📖文档导航
17 | - 语音识别：
18 |     - rapid_paraformer:
19 |         - [rapid_paraformer-Python](./python/README.md)
20 |         - [rapid_C++/C](./cpp_onnx/readme.md)
21 |     - [rapid_wenet](https://github.com/RapidAI/RapidASR/tree/rapid_wenet)
22 |         - [Python](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/python)
23 |         - [C++](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/cpp)
24 |     - [rapid_paddlespeech-Python](https://github.com/RapidAI/RapidASR/tree/rapid_paddlespeech)
25 | - 标点符号
26 |     - [RapidPunc](https://github.com/RapidAI/RapidPunc)
27 | 
28 | #### 📆TODO以及任务认领
29 | - 参见这里：[link](https://github.com/RapidAI/RapidASR/issues/15)
30 | 
31 | #### 🎨整体框架
32 | ```mermaid
33 | flowchart LR
34 | 
35 | A([wav]) --RapidVad--> B([各个小段的音频]) --RapidASR--> C([识别的文本内容]) --RapidPunc--> D([最终识别内容])
36 | ```
37 | 
38 | #### 📣更新日志
39 | <details>
40 | <summary>详情</summary>
41 | - 2023-08-21 v2.0.4 update:
42 |   - 添加whl包支持
43 |   - 更新文档
44 | - 2023-02-25
45 |    - 添加C++版本推理，使用onnxruntime引擎，预/后处理代码来自： [FastASR](https://github.com/chenkui164/FastASR)
46 | - 2023-02-14 v2.0.3 update:
47 |   - 修复librosa读取wav文件错误
48 |   - 修复fbank与torch下fbank提取结果不一致bug
49 | - 2023-02-11 v2.0.2 update:
50 |   - 模型和推理代码解耦（`rapid_paraformer`和`resources`）
51 |   - 支持批量推理（通过`resources/config.yaml`中`batch_size`指定）
52 |   - 增加多种输入方式（`Union[str, np.ndarray, List[str]]`）
53 | - 2023-02-10 v2.0.1 update:
54 |   - 添加对输入音频为噪音或者静音的文件推理结果捕捉。
55 | 
56 | </details>
57 | 


--------------------------------------------------------------------------------
/cpp_onnx/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | 
 3 | #-DONNXRUNTIME_DIR=D:\thirdpart\onnxruntime
 4 | project(FastASR)
 5 | 
 6 | set(CMAKE_CXX_STANDARD 11)
 7 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 8 | 
 9 | # for onnxruntime
10 | 
11 | IF(WIN32)
12 | 
13 | 
14 | 	if(CMAKE_CL_64)
15 | 		link_directories(${ONNXRUNTIME_DIR}\\lib)
16 | 	else()
17 | 		add_definitions(-D_WIN_X86)
18 | 	endif()
19 | ELSE()
20 | 
21 | 
22 | link_directories(${ONNXRUNTIME_DIR}/lib)
23 | 
24 | endif()
25 | 
26 | #option(FASTASR_BUILD_PYTHON_MODULE "build python module, using FastASR in Python" OFF)
27 | 
28 | add_subdirectory("./third_party/webrtc")
29 | add_subdirectory(src)
30 | add_subdirectory(tester)
31 | 


--------------------------------------------------------------------------------
/cpp_onnx/CMakeSettings.json:
--------------------------------------------------------------------------------
 1 | ﻿{
 2 |   "configurations": [
 3 |     {
 4 |       "name": "x64-Debug",
 5 |       "generator": "Ninja",
 6 |       "configurationType": "Debug",
 7 |       "inheritEnvironments": [ "msvc_x64_x64" ],
 8 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
 9 |       "installRoot": "${projectDir}\\out\\install\\${name}",
10 |       "buildCommandArgs": "",
11 |       "ctestCommandArgs": ""
12 |     },
13 |     {
14 |       "name": "x64-Release",
15 |       "generator": "Ninja",
16 |       "configurationType": "RelWithDebInfo",
17 |       "buildRoot": "${projectDir}\\out\\build\\${name}",
18 |       "installRoot": "${projectDir}\\out\\install\\${name}",
19 |       "cmakeCommandArgs": "",
20 |       "buildCommandArgs": "",
21 |       "ctestCommandArgs": "",
22 |       "inheritEnvironments": [ "msvc_x64_x64" ]
23 |     },
24 |     {
25 |       "name": "Linux-GCC-Debug",
26 |       "generator": "Unix Makefiles",
27 |       "configurationType": "Debug",
28 |       "cmakeExecutable": "cmake",
29 |       "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ],
30 |       "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1",
31 |       "buildCommandArgs": "",
32 |       "ctestCommandArgs": "",
33 |       "inheritEnvironments": [ "linux_x64" ],
34 |       "remoteMachineName": "${defaultRemoteMachineName}",
35 |       "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src",
36 |       "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}",
37 |       "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}",
38 |       "remoteCopySources": true,
39 |       "rsyncCommandArgs": "-t --delete",
40 |       "remoteCopyBuildOutput": false,
41 |       "remoteCopySourcesMethod": "rsync"
42 |     }
43 |   ]
44 | }


--------------------------------------------------------------------------------
/cpp_onnx/api.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 参考代码： [tester.cpp](tester/tester.cpp)
 4 | 
 5 | ```
 6 | 初始化程序库
 7 | _RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThread);
 8 | 
 9 | 
10 | 
11 | // if not give a fnCallback ,it should be NULL 
12 | 识别内存缓冲区，完整的wav文件数据，包括文件头
13 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
14 | 识别内存缓冲区，只包括采样点数据，不包括wav文件头
15 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
16 | 识别文件，只包括采样点数据，不包括wav文件头
17 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
18 | 
19 | 识别音频文件，完整的wav文件数据，包括文件头
20 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
21 | 
22 | 获取识别后的文本和相关数据
23 | _RAPIDASRAPI const char*	RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
24 | 
25 | 获取结果块个数
26 | _RAPIDASRAPI const int		RapidAsrGetRetNumber(RPASR_RESULT Result);
27 | 
28 | 释放返回的结果块内存
29 | _RAPIDASRAPI void			RapidAsrFreeResult(RPASR_RESULT Result);
30 | 
31 | 
32 | 使用完成后清理程序库
33 | _RAPIDASRAPI void			RapidAsrUninit(RPASR_HANDLE Handle);
34 | 
35 | 获取结果块中的数据所表示的音频长度，单位秒
36 | _RAPIDASRAPI const float	RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
37 | 
38 | 
39 | ```
40 | 


--------------------------------------------------------------------------------
/cpp_onnx/images/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/demo.png


--------------------------------------------------------------------------------
/cpp_onnx/images/threadnum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/threadnum.png


--------------------------------------------------------------------------------
/cpp_onnx/include/Audio.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef AUDIO_H
 3 | #define AUDIO_H
 4 | 
 5 | #include <ComDefine.h>
 6 | #include <queue>
 7 | #include <stdint.h>
 8 | 
 9 | using namespace std;
10 | 
11 | class AudioFrame {
12 |   private:
13 |     int start;
14 |     int end;
15 |     int len;
16 | 
17 |   public:
18 |     AudioFrame();
19 |     AudioFrame(int len);
20 | 
21 |     ~AudioFrame();
22 |     int set_start(int val);
23 |     int set_end(int val, int max_len);
24 |     int get_start();
25 |     int get_len();
26 |     int disp();
27 | };
28 | 
29 | class Audio {
30 |   private:
31 |     float *speech_data;
32 |     int16_t *speech_buff;
33 |     int speech_len;
34 |     int speech_align_len;
35 |     int16_t sample_rate;
36 |     int offset;
37 |     float align_size;
38 |     int data_type;
39 |     queue<AudioFrame *> frame_queue;
40 | 
41 |   public:
42 |     Audio(int data_type);
43 |     Audio(int data_type, int size);
44 |     ~Audio();
45 |     void disp();
46 |     bool loadwav(const char* filename);
47 |     bool loadwav(const char* buf, int nLen);
48 |     bool loadpcmwav(const char* buf, int nFileLen);
49 |     bool loadpcmwav(const char* filename);
50 |     int fetch_chunck(float *&dout, int len);
51 |     int fetch(float *&dout, int &len, int &flag);
52 |     void padding();
53 |     void split();
54 |     float get_time_len();
55 | 
56 |     int get_queue_size() { return (int)frame_queue.size(); }
57 | };
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/cpp_onnx/include/ComDefine.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef COMDEFINE_H
 3 | #define COMDEFINE_H
 4 | 
 5 | #define S_BEGIN  0
 6 | #define S_MIDDLE 1
 7 | #define S_END    2
 8 | #define S_ALL    3
 9 | #define S_ERR    4
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/cpp_onnx/include/Model.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef MODEL_H
 3 | #define MODEL_H
 4 | 
 5 | #include <string>
 6 | 
 7 | class Model {
 8 |   public:
 9 |     virtual ~Model(){};
10 |     virtual void reset() = 0;
11 |     virtual std::string forward_chunk(float *din, int len, int flag) = 0;
12 |     virtual std::string forward(float *din, int len, int flag) = 0;
13 |     virtual std::string rescoring() = 0;
14 | };
15 | 
16 | Model *create_model(const char *path,int nThread=0);
17 | #endif
18 | 


--------------------------------------------------------------------------------
/cpp_onnx/include/librapidasrapi.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | 
 4 | #ifdef WIN32
 5 | 
 6 | 
 7 | #ifdef _RPASR_API_EXPORT
 8 | 
 9 | #define  _RAPIDASRAPI __declspec(dllexport)
10 | #else
11 | #define  _RAPIDASRAPI __declspec(dllimport)
12 | #endif
13 | 	
14 | 
15 | #else
16 | #define _RAPIDASRAPI  
17 | #endif
18 | 
19 | 
20 | 
21 | 
22 | 
23 | #ifndef _WIN32
24 | 
25 | #define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__))
26 | 
27 | #else
28 | #define RPASR_CALLBCK_PREFIX __stdcall
29 | #endif
30 | 	
31 | 
32 | #ifdef __cplusplus 
33 | 
34 | extern "C" {
35 | #endif
36 | 
37 | typedef void* RPASR_HANDLE;
38 | 
39 | typedef void* RPASR_RESULT;
40 | 
41 | typedef unsigned char RPASR_BOOL;
42 | 
43 | #define RPASR_TRUE 1
44 | #define RPASR_FALSE 0
45 | #define QM_DEFAULT_THREAD_NUM  4
46 | 
47 | 
48 | typedef enum
49 | {
50 |  RASR_NONE=-1,
51 |  RASRM_CTC_GREEDY_SEARCH=0,
52 |  RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
53 |  RASRM_ATTENSION_RESCORING = 2,
54 |  
55 | }RPASR_MODE;
56 | 
57 | typedef enum {
58 | 
59 | 	RPASR_MODEL_PADDLE = 0,
60 | 	RPASR_MODEL_PADDLE_2 = 1,
61 | 	RPASR_MODEL_K2 = 2,
62 | 	RPASR_MODEL_PARAFORMER = 3,
63 | 
64 | }RPASR_MODEL_TYPE;
65 | 
66 | 
67 | typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
68 | 	
69 | 	// APIs for qmasr
70 | 
71 | _RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThread);
72 | 
73 | 
74 | 
75 | // if not give a fnCallback ,it should be NULL 
76 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
77 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
78 | 
79 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
80 | 
81 | _RAPIDASRAPI RPASR_RESULT	RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
82 | 
83 | _RAPIDASRAPI const char*	RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
84 | 
85 | _RAPIDASRAPI const int		RapidAsrGetRetNumber(RPASR_RESULT Result);
86 | _RAPIDASRAPI void			RapidAsrFreeResult(RPASR_RESULT Result);
87 | 
88 | 
89 | _RAPIDASRAPI void			RapidAsrUninit(RPASR_HANDLE Handle);
90 | 
91 | _RAPIDASRAPI const float	RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
92 | 
93 | #ifdef __cplusplus 
94 | 
95 | }
96 | #endif


--------------------------------------------------------------------------------
/cpp_onnx/include/webrtc_vad.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This header file includes the VAD API calls. Specific function calls are
13 |  * given below.
14 |  */
15 | 
16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18 | 
19 | #include <stddef.h>
20 | #include <stdint.h>
21 | 
22 | typedef struct WebRtcVadInst VadInst;
23 | 
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 | 
28 | // Creates an instance to the VAD structure.
29 | VadInst* WebRtcVad_Create(void);
30 | 
31 | // Frees the dynamic memory of a specified VAD instance.
32 | //
33 | // - handle [i] : Pointer to VAD instance that should be freed.
34 | void WebRtcVad_Free(VadInst* handle);
35 | 
36 | // Initializes a VAD instance.
37 | //
38 | // - handle [i/o] : Instance that should be initialized.
39 | //
40 | // returns        : 0 - (OK),
41 | //                 -1 - (null pointer or Default mode could not be set).
42 | int WebRtcVad_Init(VadInst* handle);
43 | 
44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
45 | // restrictive in reporting speech. Put in other words the probability of being
46 | // speech when the VAD returns 1 is increased with increasing mode. As a
47 | // consequence also the missed detection rate goes up.
48 | //
49 | // - handle [i/o] : VAD instance.
50 | // - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
51 | //
52 | // returns        : 0 - (OK),
53 | //                 -1 - (null pointer, mode could not be set or the VAD instance
54 | //                       has not been initialized).
55 | int WebRtcVad_set_mode(VadInst* handle, int mode);
56 | 
57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates
58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
59 | //
60 | // - handle       [i/o] : VAD Instance. Needs to be initialized by
61 | //                        WebRtcVad_Init() before call.
62 | // - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
63 | // - audio_frame  [i]   : Audio frame buffer.
64 | // - frame_length [i]   : Length of audio frame buffer in number of samples.
65 | //
66 | // returns              : 1 - (Active Voice),
67 | //                        0 - (Non-active Voice),
68 | //                       -1 - (Error)
69 | int WebRtcVad_Process(VadInst* handle,
70 |                       int fs,
71 |                       const int16_t* audio_frame,
72 |                       size_t frame_length);
73 | 
74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10,
75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
76 | //
77 | // - rate         [i] : Sampling frequency (Hz).
78 | // - frame_length [i] : Speech frame buffer length in number of samples.
79 | //
80 | // returns            : 0 - (valid combination), -1 - (invalid combination)
81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
82 | 
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | 
87 | #endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
88 | 


--------------------------------------------------------------------------------
/cpp_onnx/include/win_func.h:
--------------------------------------------------------------------------------
 1 | #include <time.h>
 2 | #ifdef WIN32
 3 | #include <windows.h>
 4 | #else
 5 | #include <sys/time.h>
 6 | #endif
 7 | #ifdef WIN32
 8 | int gettimeofday(struct timeval* tp, void* tzp)
 9 | {
10 | 	time_t clock;
11 | 	struct tm tm;
12 | 	SYSTEMTIME wtm;
13 | 
14 | 	GetLocalTime(&wtm);
15 | 	tm.tm_year = wtm.wYear - 1900;
16 | 	tm.tm_mon = wtm.wMonth - 1;
17 | 	tm.tm_mday = wtm.wDay;
18 | 	tm.tm_hour = wtm.wHour;
19 | 	tm.tm_min = wtm.wMinute;
20 | 	tm.tm_sec = wtm.wSecond;
21 | 	tm.tm_isdst = -1;
22 | 
23 | 	clock = mktime(&tm);
24 | 	tp->tv_sec = clock;
25 | 	tp->tv_usec = wtm.wMilliseconds * 1000;
26 | 	return (0);
27 | }
28 | #endif


--------------------------------------------------------------------------------
/cpp_onnx/models/readme.md:
--------------------------------------------------------------------------------
1 | Place model.onnx here!
2 | 


--------------------------------------------------------------------------------
/cpp_onnx/readme.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## 特别鸣谢
  3 | 
  4 | 本程序中的预处理及后处理代码，来自于：https://github.com/chenkui164/FastASR
  5 | 
  6 | 
  7 | ## 线程数与性能关系
  8 | 
  9 | 测试环境Rocky Linux 8，仅测试cpp版本结果（未测python版本），@acely 
 10 | 
 11 | 简述：
 12 | 在3台配置不同的机器上分别编译并测试，在fftw和onnxruntime版本都相同的前提下，识别同一个30分钟的音频文件，分别测试不同onnx线程数量的表现。
 13 | 
 14 | ![线程数关系](images/threadnum.png "Windows ASR")
 15 | 
 16 | 目前可以总结出大致规律：
 17 | 
 18 | 并非onnx线程数越多越好
 19 | 2线程比1线程提升显著，线程再多则提升较小
 20 | 线程数等于CPU物理核心数时效率最好
 21 | 实操建议：
 22 | 
 23 | 大部分场景用3-4线程性价比最高
 24 | 低配机器用2线程合适
 25 | 
 26 | 
 27 | 
 28 | ## API
 29 | [API文档](api.md)
 30 | 
 31 | ##  演示
 32 | 
 33 | ![Windows演示](images/demo.png "Windows ASR")
 34 | 
 35 | ## 注意
 36 | 本程序只支持 采样率16000hz, 位深16bit的 **单声道** 音频。
 37 | 
 38 | ## 快速使用
 39 | 
 40 | ### Windows
 41 | 
 42 |  安装Vs2022 打开cpp_onnx目录下的cmake工程，直接 build即可。 本仓库已经准备好所有相关依赖库。
 43 | 
 44 |  Windows下已经预置fftw3、onnxruntime及openblas库
 45 | 
 46 | 
 47 | ### Linux
 48 | See the bottom of this page: Building Guidance
 49 | 
 50 | 
 51 | ###  运行程序
 52 | 
 53 | tester  /path/to/models/dir /path/to/wave/file
 54 | 
 55 |  例如： tester /data/models  /data/test.wav
 56 | 
 57 | /data/models 需要包括如下两个文件： model.onnx 和vocab.txt
 58 | 
 59 | 
 60 | ## 支持平台
 61 | - Windows
 62 | - Linux/Unix
 63 | 
 64 | ## 依赖
 65 | - fftw3
 66 | - onnxruntime
 67 | 
 68 | ## 导出onnx格式模型文件
 69 | 安装 modelscope与FunASR，依赖：torch，torchaudio，安装过程[详细参考文档](https://github.com/alibaba-damo-academy/FunASR/wiki)
 70 | ```shell
 71 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
 72 | git clone https://github.com/alibaba/FunASR.git && cd FunASR
 73 | pip install --editable ./
 74 | ```
 75 | 导出onnx模型，[详见](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/export)，参考示例，从modelscope中模型导出：
 76 | 
 77 | ```
 78 | python -m funasr.export.export_model 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' "./export" true
 79 | ```
 80 | 
 81 | ## Building Guidance for Linux/Unix
 82 | 
 83 | ```
 84 | git clone https://github.com/RapidAI/RapidASR.git
 85 | cd RapidASR/cpp_onnx/
 86 | mkdir build
 87 | cd build
 88 | # download an appropriate onnxruntime from https://github.com/microsoft/onnxruntime/releases/tag/v1.14.0
 89 | # here we get a copy of onnxruntime for linux 64
 90 | wget https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz
 91 | # ls
 92 | # onnxruntime-linux-x64-1.14.0  onnxruntime-linux-x64-1.14.0.tgz
 93 | 
 94 | #install fftw3-dev
 95 | apt install libfftw3-dev
 96 | 
 97 | # build
 98 |  cmake  -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/mnt/c/Users/ma139/RapidASR/cpp_onnx/build/onnxruntime-linux-x64-1.14.0
 99 |  make
100 | 
101 |  # then in the subfolder tester of current direcotry, you will see a program, tester
102 | 
103 | ````
104 | 
105 | ### The structure of a qualified onnxruntime package.
106 | ```
107 | onnxruntime_xxx
108 | ├───include
109 | └───lib
110 | ```
111 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | file(GLOB files1 "*.cpp")
 3 | file(GLOB files4 "paraformer/*.cpp")
 4 | 
 5 | set(files ${files1} ${files2} ${files3} ${files4})
 6 | 
 7 | # message("${files}")
 8 | 
 9 | add_library(rapidasr ${files})
10 | 
11 | if(WIN32)
12 | 
13 |         set(EXTRA_LIBS libfftw3f-3 webrtcvad)
14 |         if(CMAKE_CL_64)
15 |             target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
16 |         else()
17 |             target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
18 |         endif()
19 |         target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
20 |         
21 |         target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
22 | else()
23 | 
24 |     set(EXTRA_LIBS fftw3f webrtcvad pthread)
25 |     target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
26 |     target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")
27 | 
28 |     target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include")
29 |     target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib")
30 | 
31 |     target_include_directories(rapidasr PUBLIC "/usr/include")
32 |     target_link_directories(rapidasr PUBLIC "/usr/lib64")
33 | 
34 |     target_include_directories(rapidasr PUBLIC  ${FFTW3F_INCLUDE_DIR})
35 |     target_link_directories(rapidasr PUBLIC ${FFTW3F_LIBRARY_DIR})
36 |     include_directories(${ONNXRUNTIME_DIR}/include)    
37 | endif()
38 | 
39 | include_directories(${CMAKE_SOURCE_DIR}/include)
40 | target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS})
41 | 
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/CommonStruct.h:
--------------------------------------------------------------------------------
1 | 
2 | #ifndef COMMONSTRUCT_H
3 | #define COMMONSTRUCT_H
4 | 
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureExtract.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef FEATUREEXTRACT_H
 3 | #define FEATUREEXTRACT_H
 4 | 
 5 | #include <fftw3.h>
 6 | #include <stdint.h>
 7 | 
 8 | #include "FeatureQueue.h"
 9 | #include "SpeechWrap.h"
10 | #include "Tensor.h"
11 | 
12 | class FeatureExtract {
13 |   private:
14 |     SpeechWrap speech;
15 |     FeatureQueue fqueue;
16 |     int mode;
17 | 
18 |     float *fft_input;
19 |     fftwf_complex *fft_out;
20 |     fftwf_plan p;
21 | 
22 |     void fftw_init();
23 |     void melspect(float *din, float *dout);
24 |     void global_cmvn(float *din);
25 | 
26 |   public:
27 |     FeatureExtract(int mode);
28 |     ~FeatureExtract();
29 |     int size();
30 |     int status();
31 |     void reset();
32 |     void insert(float *din, int len, int flag);
33 |     bool fetch(Tensor<float> *&dout);
34 | };
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureQueue.cpp:
--------------------------------------------------------------------------------
 1 | #include "precomp.h"
 2 | FeatureQueue::FeatureQueue()
 3 | {
 4 |     buff = new Tensor<float>(67, 80);
 5 |     window_size = 67;
 6 |     buff_idx = 0;
 7 | }
 8 | 
 9 | FeatureQueue::~FeatureQueue()
10 | {
11 |     delete buff;
12 | }
13 | 
14 | void FeatureQueue::reinit(int size)
15 | {
16 |     delete buff;
17 |     buff = new Tensor<float>(size, 80);
18 |     buff_idx = 0;
19 |     window_size = size;
20 | }
21 | 
22 | void FeatureQueue::reset()
23 | {
24 |     buff_idx = 0;
25 | }
26 | 
27 | void FeatureQueue::push(float *din, int flag)
28 | {
29 |     int offset = buff_idx * 80;
30 |     memcpy(buff->buff + offset, din, 80 * sizeof(float));
31 |     buff_idx++;
32 | 
33 |     if (flag == S_END) {
34 |         Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
35 |         memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
36 |         feature_queue.push(tmp);
37 |         buff_idx = 0;
38 |     } else if (buff_idx == window_size) {
39 |         feature_queue.push(buff);
40 |         Tensor<float> *tmp = new Tensor<float>(window_size, 80);
41 |         memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
42 |                3 * 80 * sizeof(float));
43 |         buff_idx = 3;
44 |         buff = tmp;
45 |     }
46 | }
47 | 
48 | Tensor<float> *FeatureQueue::pop()
49 | {
50 | 
51 |     Tensor<float> *tmp = feature_queue.front();
52 |     feature_queue.pop();
53 |     return tmp;
54 | }
55 | 
56 | int FeatureQueue::size()
57 | {
58 |     return feature_queue.size();
59 | }
60 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureQueue.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef FEATUREQUEUE_H
 3 | #define FEATUREQUEUE_H
 4 | 
 5 | #include "Tensor.h"
 6 | #include <queue>
 7 | #include <stdint.h>
 8 | using namespace std;
 9 | 
10 | 
11 | class FeatureQueue {
12 |   private:
13 |     queue<Tensor<float> *> feature_queue;
14 |     Tensor<float> *buff;
15 |     int buff_idx;
16 |     int window_size;
17 | 
18 |   public:
19 |     FeatureQueue();
20 |     ~FeatureQueue();
21 |     void reinit(int size);
22 |     void reset();
23 |     void push(float *din, int flag);
24 |     Tensor<float> *pop();
25 |     int size();
26 | };
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/Model.cpp:
--------------------------------------------------------------------------------
 1 | #include "precomp.h"
 2 | 
 3 | Model *create_model(const char *path,int nThread)
 4 | {
 5 |     Model *mm;
 6 | 
 7 | 
 8 |     mm = new paraformer::ModelImp(path, nThread);
 9 | 
10 |     return mm;
11 | }
12 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/SpeechWrap.cpp:
--------------------------------------------------------------------------------
 1 | #include "precomp.h"
 2 | 
 3 | SpeechWrap::SpeechWrap()
 4 | {
 5 |     cache_size = 0;
 6 | }
 7 | 
 8 | SpeechWrap::~SpeechWrap()
 9 | {
10 | }
11 | 
12 | void SpeechWrap::reset()
13 | {
14 |     cache_size = 0;
15 | }
16 | 
17 | void SpeechWrap::load(float *din, int len)
18 | {
19 |     in = din;
20 |     in_size = len;
21 |     total_size = cache_size + in_size;
22 | }
23 | 
24 | int SpeechWrap::size()
25 | {
26 |     return total_size;
27 | }
28 | 
29 | void SpeechWrap::update(int offset)
30 | {
31 |     int in_offset = offset - cache_size;
32 |     cache_size = (total_size - offset);
33 |     memcpy(cache, in + in_offset, cache_size * sizeof(float));
34 | }
35 | 
36 | float &SpeechWrap::operator[](int i)
37 | {
38 |     return i < cache_size ? cache[i] : in[i - cache_size];
39 | }
40 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/SpeechWrap.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef SPEECHWRAP_H
 3 | #define SPEECHWRAP_H
 4 | 
 5 | #include <stdint.h>
 6 | 
 7 | class SpeechWrap {
 8 |   private:
 9 |     float cache[400];
10 |     int cache_size;
11 |     float *in;
12 |     int in_size;
13 |     int total_size;
14 |     int next_cache_size;
15 | 
16 |   public:
17 |     SpeechWrap();
18 |     ~SpeechWrap();
19 |     void load(float *din, int len);
20 |     void update(int offset);
21 |     void reset();
22 |     int size();
23 |     float &operator[](int i);
24 | };
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/Tensor.h:
--------------------------------------------------------------------------------
  1 | #ifndef TENSOR_H
  2 | #define TENSOR_H
  3 | 
  4 | #include "alignedmem.h"
  5 | 
  6 | using namespace std;
  7 | 
  8 | template <typename T> class Tensor {
  9 |   private:
 10 |     void alloc_buff();
 11 |     void free_buff();
 12 |     int mem_size;
 13 | 
 14 |   public:
 15 |     T *buff;
 16 |     int size[4];
 17 |     int buff_size;
 18 |     Tensor(Tensor<T> *in);
 19 |     Tensor(int a);
 20 |     Tensor(int a, int b);
 21 |     Tensor(int a, int b, int c);
 22 |     Tensor(int a, int b, int c, int d);
 23 |     ~Tensor();
 24 |     void zeros();
 25 |     void shape();
 26 |     void disp();
 27 |     void dump(const char *mode);
 28 |     void concat(Tensor<T> *din, int dim);
 29 |     void resize(int a, int b, int c, int d);
 30 |     void add(float coe, Tensor<T> *in);
 31 |     void add(Tensor<T> *in);
 32 |     void add(Tensor<T> *in1, Tensor<T> *in2);
 33 |     void reload(Tensor<T> *in);
 34 | };
 35 | 
 36 | template <typename T> Tensor<T>::Tensor(int a) : size{1, 1, 1, a}
 37 | {
 38 |     alloc_buff();
 39 | }
 40 | 
 41 | template <typename T> Tensor<T>::Tensor(int a, int b) : size{1, 1, a, b}
 42 | {
 43 |     alloc_buff();
 44 | }
 45 | 
 46 | template <typename T> Tensor<T>::Tensor(int a, int b, int c) : size{1, a, b, c}
 47 | {
 48 | 
 49 |     alloc_buff();
 50 | }
 51 | 
 52 | template <typename T>
 53 | Tensor<T>::Tensor(int a, int b, int c, int d) : size{a, b, c, d}
 54 | {
 55 |     alloc_buff();
 56 | }
 57 | 
 58 | template <typename T> Tensor<T>::Tensor(Tensor<T> *in)
 59 | {
 60 |     memcpy(size, in->size, 4 * sizeof(int));
 61 |     alloc_buff();
 62 |     memcpy(buff, in->buff, in->buff_size * sizeof(T));
 63 | }
 64 | 
 65 | template <typename T> Tensor<T>::~Tensor()
 66 | {
 67 |     free_buff();
 68 | }
 69 | 
 70 | template <typename T> void Tensor<T>::alloc_buff()
 71 | {
 72 |     buff_size = size[0] * size[1] * size[2] * size[3];
 73 |     mem_size = buff_size;
 74 |     buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
 75 | }
 76 | 
 77 | template <typename T> void Tensor<T>::free_buff()
 78 | {
 79 |     aligned_free(buff);
 80 | }
 81 | 
 82 | template <typename T> void Tensor<T>::zeros()
 83 | {
 84 |     memset(buff, 0, buff_size * sizeof(T));
 85 | }
 86 | 
 87 | template <typename T> void Tensor<T>::shape()
 88 | {
 89 |     printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]);
 90 | }
 91 | 
 92 | // TODO:: fix it!!!!
 93 | template <typename T> void Tensor<T>::concat(Tensor<T> *din, int dim)
 94 | {
 95 |     memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T));
 96 |     buff_size += din->buff_size;
 97 |     size[dim] += din->size[dim];
 98 | }
 99 | 
100 | // TODO:: fix it!!!!
101 | template <typename T> void Tensor<T>::resize(int a, int b, int c, int d)
102 | {
103 |     size[0] = a;
104 |     size[1] = b;
105 |     size[2] = c;
106 |     size[3] = d;
107 |     buff_size = size[0] * size[1] * size[2] * size[3];
108 | }
109 | 
110 | template <typename T> void Tensor<T>::add(float coe, Tensor<T> *in)
111 | {
112 |     int i;
113 |     for (i = 0; i < buff_size; i++) {
114 |         buff[i] = buff[i] + coe * in->buff[i];
115 |     }
116 | }
117 | 
118 | template <typename T> void Tensor<T>::add(Tensor<T> *in)
119 | {
120 |     int i;
121 |     for (i = 0; i < buff_size; i++) {
122 |         buff[i] = buff[i] + in->buff[i];
123 |     }
124 | }
125 | 
126 | template <typename T> void Tensor<T>::add(Tensor<T> *in1, Tensor<T> *in2)
127 | {
128 |     int i;
129 |     for (i = 0; i < buff_size; i++) {
130 |         buff[i] = buff[i] + in1->buff[i] + in2->buff[i];
131 |     }
132 | }
133 | 
134 | template <typename T> void Tensor<T>::reload(Tensor<T> *in)
135 | {
136 |     memcpy(buff, in->buff, in->buff_size * sizeof(T));
137 | }
138 | 
139 | template <typename T> void Tensor<T>::disp()
140 | {
141 |     int i;
142 |     for (i = 0; i < buff_size; i++) {
143 |         cout << buff[i] << " ";
144 |     }
145 |     cout << endl;
146 | }
147 | 
148 | template <typename T> void Tensor<T>::dump(const char *mode)
149 | {
150 |     FILE *fp;
151 |     fp = fopen("tmp.bin", mode);
152 |     fwrite(buff, 1, buff_size * sizeof(T), fp);
153 |     fclose(fp);
154 | }
155 | #endif
156 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/Vocab.cpp:
--------------------------------------------------------------------------------
  1 | #include "Vocab.h"
  2 | 
  3 | #include <fstream>
  4 | #include <iostream>
  5 | #include <list>
  6 | #include <sstream>
  7 | #include <string>
  8 | 
  9 | using namespace std;
 10 | 
 11 | Vocab::Vocab(const char *filename)
 12 | {
 13 |     ifstream in(filename);
 14 |     string line;
 15 | 
 16 |     if (in) // 有该文件
 17 |     {
 18 |         while (getline(in, line)) // line中不包括每行的换行符
 19 |         {
 20 |             vocab.push_back(line);
 21 |         }
 22 |         // cout << vocab[1719] << endl;
 23 |     }
 24 |     // else // 没有该文件
 25 |     //{
 26 |     //     cout << "no such file" << endl;
 27 |     // }
 28 | }
 29 | Vocab::~Vocab()
 30 | {
 31 | }
 32 | 
 33 | string Vocab::vector2string(vector<int> in)
 34 | {
 35 |     int i;
 36 |     stringstream ss;
 37 |     for (auto it = in.begin(); it != in.end(); it++) {
 38 |         ss << vocab[*it];
 39 |     }
 40 | 
 41 |     return ss.str();
 42 | }
 43 | 
 44 | int str2int(string str)
 45 | {
 46 |     const char *ch_array = str.c_str();
 47 |     if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
 48 |         ((ch_array[2] & 0xc0) != 0x80))
 49 |         return 0;
 50 | 
 51 |     int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
 52 |               (ch_array[2] & 0x3f);
 53 |     return val;
 54 | }
 55 | 
 56 | bool Vocab::isChinese(string ch)
 57 | {
 58 |     if (ch.size() != 3) {
 59 |         return false;
 60 |     }
 61 | 
 62 |     int unicode = str2int(ch);
 63 |     if (unicode >= 19968 && unicode <= 40959) {
 64 |         return true;
 65 |     }
 66 | 
 67 |     return false;
 68 | }
 69 | 
 70 | 
 71 | string Vocab::vector2stringV2(vector<int> in)
 72 | {
 73 |     int i;
 74 |     list<string> words;
 75 | 
 76 |     int is_pre_english = false;
 77 |     int pre_english_len = 0;
 78 | 
 79 |     int is_combining = false;
 80 |     string combine = "";
 81 | 
 82 |     for (auto it = in.begin(); it != in.end(); it++) {
 83 |         string word = vocab[*it];
 84 | 
 85 |         // step1 space character skips
 86 |         if (word == "<s>" || word == "</s>" || word == "<unk>")
 87 |             continue;
 88 | 
 89 |         // step2 combie phoneme to full word
 90 |         {
 91 |             int sub_word = !(word.find("@@") == string::npos);
 92 | 
 93 |             // process word start and middle part
 94 |             if (sub_word) {
 95 |                 combine += word.erase(word.length() - 2);
 96 |                 is_combining = true;
 97 |                 continue;
 98 |             }
 99 |             // process word end part
100 |             else if (is_combining) {
101 |                 combine += word;
102 |                 is_combining = false;
103 |                 word = combine;
104 |                 combine = "";
105 |             }
106 |         }
107 | 
108 |         // step3 process english word deal with space , turn abbreviation to upper case
109 |         {
110 | 
111 |             // input word is chinese, not need process 
112 |             if (isChinese(word)) {
113 |                 words.push_back(word);
114 |                 is_pre_english = false;
115 |             }
116 |             // input word is english word
117 |             else {
118 | 
119 |                 // pre word is chinese
120 |                 if (!is_pre_english) {
121 |                     word[0] = word[0] - 32;
122 |                     words.push_back(word);
123 |                     pre_english_len = word.size();
124 | 
125 |                 }
126 | 
127 |                 // pre word is english word
128 |                 else {
129 | 
130 |                     // single letter turn to upper case
131 |                     if (word.size() == 1) {
132 |                         word[0] = word[0] - 32;
133 |                     }
134 | 
135 |                     if (pre_english_len > 1) {
136 |                         words.push_back(" ");
137 |                         words.push_back(word);
138 |                         pre_english_len = word.size();
139 |                     } 
140 |                     else {
141 |                         if (word.size() > 1) {
142 |                             words.push_back(" ");
143 |                         }
144 |                         words.push_back(word);
145 |                         pre_english_len = word.size();
146 |                     }
147 |                 }
148 | 
149 |                 is_pre_english = true;
150 | 
151 |             }
152 |         }
153 |     }
154 | 
155 |     // for (auto it = words.begin(); it != words.end(); it++) {
156 |     //     cout << *it << endl;
157 |     // }
158 | 
159 |     stringstream ss;
160 |     for (auto it = words.begin(); it != words.end(); it++) {
161 |         ss << *it;
162 |     }
163 | 
164 |     return ss.str();
165 | }
166 | 
167 | int Vocab::size()
168 | {
169 |     return vocab.size();
170 | }
171 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/Vocab.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef VOCAB_H
 3 | #define VOCAB_H
 4 | 
 5 | #include <stdint.h>
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | 
10 | class Vocab {
11 |   private:
12 |     vector<string> vocab;
13 |     bool isChinese(string ch);
14 |     bool isEnglish(string ch);
15 | 
16 |   public:
17 |     Vocab(const char *filename);
18 |     ~Vocab();
19 |     int size();
20 |     string vector2string(vector<int> in);
21 |     string vector2stringV2(vector<int> in);
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/alignedmem.cpp:
--------------------------------------------------------------------------------
 1 | #include "precomp.h"
 2 | void *aligned_malloc(size_t alignment, size_t required_bytes)
 3 | {
 4 |     void *p1;  // original block
 5 |     void **p2; // aligned block
 6 |     int offset = alignment - 1 + sizeof(void *);
 7 |     if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
 8 |         return NULL;
 9 |     }
10 |     p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
11 |     p2[-1] = p1;
12 |     return p2;
13 | }
14 | 
15 | void aligned_free(void *p)
16 | {
17 |     free(((void **)p)[-1]);
18 | }
19 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/alignedmem.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef ALIGNEDMEM_H
 3 | #define ALIGNEDMEM_H
 4 | 
 5 | 
 6 | 
 7 | extern void *aligned_malloc(size_t alignment, size_t required_bytes);
 8 | extern void aligned_free(void *p);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/commonfunc.h:
--------------------------------------------------------------------------------
 1 | #pragma once 
 2 | 
 3 | 
 4 | typedef struct
 5 | {
 6 |     std::string msg;
 7 |     float  snippet_time;
 8 | }RPASR_RECOG_RESULT;
 9 | 
10 | 
11 | #ifdef _WIN32
12 | #include <codecvt>
13 | 
14 | 
15 | 
16 | inline std::wstring string2wstring(const std::string& str, const std::string& locale)
17 | {
18 |     typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
19 |     std::wstring_convert<F> strCnv(new F(locale));
20 |     return strCnv.from_bytes(str);
21 | }
22 | 
23 | inline std::wstring  strToWstr(std::string str) {
24 |     if (str.length() == 0)
25 |         return L"";
26 |     return  string2wstring(str, "zh-CN");
27 | 
28 | }
29 | 
30 | #endif
31 | 
32 | 
33 | 
34 | inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
35 |     size_t numInputNodes = session->GetInputCount();
36 |     if (numInputNodes > 0) {
37 |         Ort::AllocatorWithDefaultOptions allocator;
38 |         {
39 |             auto t = session->GetInputNameAllocated(nIndex, allocator);
40 |             inputName = t.get();
41 | 
42 |         }
43 |     }
44 | }
45 | 
46 | inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
47 |     size_t numOutputNodes = session->GetOutputCount();
48 |     if (numOutputNodes > 0) {
49 |         Ort::AllocatorWithDefaultOptions allocator;
50 |         {
51 |             auto t = session->GetOutputNameAllocated(nIndex, allocator);
52 |             outputName = t.get();
53 | 
54 |         }
55 |     }
56 | }


--------------------------------------------------------------------------------
/cpp_onnx/src/librapidasrapi.cpp:
--------------------------------------------------------------------------------
  1 | #include "precomp.h"
  2 | #ifdef __cplusplus 
  3 | 
  4 | 
  5 | 
  6 | //  void __attribute__ ((visibility ("default"))) fun();
  7 | extern "C" {
  8 | #endif
  9 | 
 10 | 
 11 | 	// APIs for qmasr
 12 | 	_RAPIDASRAPI RPASR_HANDLE  RapidAsrInit(const char* szModelDir, int nThreadNum)
 13 | 	{
 14 | 
 15 | 
 16 | 		Model* mm = create_model(szModelDir, nThreadNum); 
 17 | 
 18 | 		return mm;
 19 | 	}
 20 | 
 21 | 
 22 | 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 23 | 	{
 24 | 
 25 | 
 26 | 		Model* pRecogObj = (Model*)handle;
 27 | 
 28 | 		if (!pRecogObj)
 29 | 			return nullptr;
 30 | 
 31 | 		Audio audio(1);
 32 | 		audio.loadwav(szBuf,nLen);
 33 | 		audio.split();
 34 | 
 35 | 		float* buff;
 36 | 		int len;
 37 | 		int flag=0;
 38 | 		RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
 39 | 		pResult->snippet_time = audio.get_time_len();
 40 | 		int nStep = 0;
 41 | 		int nTotal = audio.get_queue_size();
 42 | 		while (audio.fetch(buff, len, flag) > 0) {
 43 | 			pRecogObj->reset();
 44 | 			string msg = pRecogObj->forward(buff, len, flag);
 45 | 			pResult->msg += msg;
 46 | 			nStep++;
 47 | 			if (fnCallback)
 48 | 				fnCallback(nStep, nTotal);
 49 | 		}
 50 | 
 51 | 
 52 | 		return pResult;
 53 | 	}
 54 | 
 55 | 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 56 | 	{
 57 | 
 58 | 		Model* pRecogObj = (Model*)handle;
 59 | 
 60 | 		if (!pRecogObj)
 61 | 			return nullptr;
 62 | 
 63 | 		Audio audio(1);
 64 | 		audio.loadpcmwav(szBuf, nLen);
 65 | 		audio.split();
 66 | 
 67 | 		float* buff;
 68 | 		int len;
 69 | 		int flag = 0;
 70 | 		RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
 71 | 		pResult->snippet_time = audio.get_time_len();
 72 | 		int nStep = 0;
 73 | 		int nTotal = audio.get_queue_size();
 74 | 		while (audio.fetch(buff, len, flag) > 0) {
 75 | 			pRecogObj->reset();
 76 | 			string msg = pRecogObj->forward(buff, len, flag);
 77 | 			pResult->msg += msg;
 78 | 			nStep++;
 79 | 			if (fnCallback)
 80 | 				fnCallback(nStep, nTotal);
 81 | 		}
 82 | 
 83 | 
 84 | 		return pResult;
 85 | 
 86 | 	}
 87 | 
 88 | 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback)
 89 | 	{
 90 | 
 91 | 		Model* pRecogObj = (Model*)handle;
 92 | 
 93 | 		if (!pRecogObj)
 94 | 			return nullptr;
 95 | 
 96 | 		Audio audio(1);
 97 | 		audio.loadpcmwav(szFileName);
 98 | 		audio.split();
 99 | 
100 | 		float* buff;
101 | 		int len;
102 | 		int flag = 0;
103 | 		RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
104 | 		pResult->snippet_time = audio.get_time_len();
105 | 		int nStep = 0;
106 | 		int nTotal = audio.get_queue_size();
107 | 		while (audio.fetch(buff, len, flag) > 0) {
108 | 			pRecogObj->reset();
109 | 			string msg = pRecogObj->forward(buff, len, flag);
110 | 			pResult->msg += msg;
111 | 			nStep++;
112 | 			if (fnCallback)
113 | 				fnCallback(nStep, nTotal);
114 | 		}
115 | 
116 | 
117 | 		return pResult;
118 | 
119 | 	}
120 | 
121 | 	_RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback)
122 | 	{
123 | 		Model* pRecogObj = (Model*)handle;
124 | 
125 | 		if (!pRecogObj)
126 | 			return nullptr;
127 | 
128 | 		Audio audio(1);
129 | 		if(!audio.loadwav(szWavfile))
130 | 			return nullptr;
131 | 		audio.split();
132 | 
133 | 		float* buff;
134 | 		int len;
135 | 		int flag = 0;
136 | 		int nStep = 0;
137 | 		int nTotal = audio.get_queue_size();
138 | 		RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
139 | 		pResult->snippet_time = audio.get_time_len();
140 | 		while (audio.fetch(buff, len, flag) > 0) {
141 | 			pRecogObj->reset();
142 | 			string msg = pRecogObj->forward(buff, len, flag);
143 | 			pResult->msg+= msg;
144 | 			nStep++;
145 | 			if (fnCallback)
146 | 				fnCallback(nStep, nTotal);
147 | 		}
148 | 	
149 | 	
150 | 
151 | 
152 | 		return pResult;
153 | 	}
154 | 
155 | 	_RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result)
156 | 	{
157 | 		if (!Result)
158 | 			return 0;
159 | 
160 | 		return 1;
161 | 		
162 | 	}
163 | 
164 | 
165 | 	_RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result)
166 | 	{
167 | 		if (!Result)
168 | 			return 0.0f;
169 | 
170 | 		return ((RPASR_RECOG_RESULT*)Result)->snippet_time;
171 | 
172 | 	}
173 | 
174 | 	_RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex)
175 | 	{
176 | 		RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result;
177 | 		if(!pResult)
178 | 			return nullptr;
179 | 
180 | 		return pResult->msg.c_str();
181 | 	
182 | 	}
183 | 
184 | 	_RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result)
185 | 	{
186 | 
187 | 		if (Result)
188 | 		{
189 | 			delete (RPASR_RECOG_RESULT*)Result;
190 | 
191 | 		}
192 | 	}
193 | 
194 | 	_RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle)
195 | 	{
196 | 
197 | 		Model* pRecogObj = (Model*)handle;
198 | 
199 | 
200 | 		if (!pRecogObj)
201 | 			return;
202 | 
203 | 		delete pRecogObj;
204 | 
205 | 	}
206 | 
207 | 
208 | 
209 | #ifdef __cplusplus 
210 | 
211 | }
212 | #endif
213 | 
214 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/paraformer_onnx.cpp:
--------------------------------------------------------------------------------
  1 | #include "precomp.h"
  2 | 
  3 | using namespace std;
  4 | using namespace paraformer;
  5 | 
  6 | ModelImp::ModelImp(const char* path,int nNumThread)
  7 | {
  8 |     string model_path = pathAppend(path, "model.onnx");
  9 |     string vocab_path = pathAppend(path, "vocab.txt");
 10 | 
 11 |     fe = new FeatureExtract(3);
 12 | 
 13 |     sessionOptions.SetInterOpNumThreads(nNumThread);
 14 |     sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
 15 | 
 16 | #ifdef _WIN32
 17 |     wstring wstrPath = strToWstr(model_path);
 18 |     m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions);
 19 | #else
 20 |     m_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
 21 | #endif
 22 | 
 23 |     string strName;
 24 |     getInputName(m_session, strName);
 25 |     m_strInputNames.push_back(strName.c_str());
 26 |     getInputName(m_session, strName,1);
 27 |     m_strInputNames.push_back(strName);
 28 |     
 29 |     getOutputName(m_session, strName);
 30 |     m_strOutputNames.push_back(strName);
 31 |     getOutputName(m_session, strName,1);
 32 |     m_strOutputNames.push_back(strName);
 33 | 
 34 |     for (auto& item : m_strInputNames)
 35 |         m_szInputNames.push_back(item.c_str());
 36 |     for (auto& item : m_strOutputNames)
 37 |         m_szOutputNames.push_back(item.c_str());
 38 |     vocab = new Vocab(vocab_path.c_str());
 39 | }
 40 | 
 41 | ModelImp::~ModelImp()
 42 | {
 43 |     if(fe)
 44 |         delete fe;
 45 |     if (m_session)
 46 |     {
 47 |         delete m_session;
 48 |         m_session = nullptr;
 49 |     }
 50 |     if(vocab)
 51 |         delete vocab;
 52 | }
 53 | 
 54 | void ModelImp::reset()
 55 | {
 56 |     fe->reset();
 57 | }
 58 | 
 59 | void ModelImp::apply_lfr(Tensor<float>*& din)
 60 | {
 61 |     int mm = din->size[2];
 62 |     int ll = ceil(mm / 6.0);
 63 |     Tensor<float>* tmp = new Tensor<float>(ll, 560);
 64 |     int out_offset = 0;
 65 |     for (int i = 0; i < ll; i++) {
 66 |         for (int j = 0; j < 7; j++) {
 67 |             int idx = i * 6 + j - 3;
 68 |             if (idx < 0) {
 69 |                 idx = 0;
 70 |             }
 71 |             if (idx >= mm) {
 72 |                 idx = mm - 1;
 73 |             }
 74 |             memcpy(tmp->buff + out_offset, din->buff + idx * 80,
 75 |                 sizeof(float) * 80);
 76 |             out_offset += 80;
 77 |         }
 78 |     }
 79 |     delete din;
 80 |     din = tmp;
 81 | }
 82 | 
 83 | void ModelImp::apply_cmvn(Tensor<float>* din)
 84 | {
 85 |     const float* var;
 86 |     const float* mean;
 87 |     float scale = 22.6274169979695;
 88 |     int m = din->size[2];
 89 |     int n = din->size[3];
 90 | 
 91 |     var = (const float*)paraformer_cmvn_var_hex;
 92 |     mean = (const float*)paraformer_cmvn_mean_hex;
 93 |     for (int i = 0; i < m; i++) {
 94 |         for (int j = 0; j < n; j++) {
 95 |             int idx = i * n + j;
 96 |             din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
 97 |         }
 98 |     }
 99 | }
100 | 
101 | string ModelImp::greedy_search(float * in, int nLen )
102 | {
103 |     vector<int> hyps;
104 |     int Tmax = nLen;
105 |     for (int i = 0; i < Tmax; i++) {
106 |         int max_idx;
107 |         float max_val;
108 |         findmax(in + i * 8404, 8404, max_val, max_idx);
109 |         hyps.push_back(max_idx);
110 |     }
111 | 
112 |     return vocab->vector2stringV2(hyps);
113 | }
114 | 
115 | string ModelImp::forward(float* din, int len, int flag)
116 | {
117 | 
118 |     Tensor<float>* in;
119 |     fe->insert(din, len, flag);
120 |     fe->fetch(in);
121 |     apply_lfr(in);
122 |     apply_cmvn(in);
123 |     Ort::RunOptions run_option;
124 | 
125 |     std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
126 |     Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
127 |         in->buff,
128 |         in->buff_size,
129 |         input_shape_.data(),
130 |         input_shape_.size());
131 | 
132 |     std::vector<int32_t> feats_len{ in->size[2] };
133 |     std::vector<int64_t> feats_len_dim{ 1 };
134 |     Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
135 |         m_memoryInfo,
136 |         feats_len.data(),
137 |         feats_len.size() * sizeof(int32_t),
138 |         feats_len_dim.data(),
139 |         feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
140 |     std::vector<Ort::Value> input_onnx;
141 |     input_onnx.emplace_back(std::move(onnx_feats));
142 |     input_onnx.emplace_back(std::move(onnx_feats_len));
143 | 
144 |     string result;
145 |     try {
146 | 
147 |         auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
148 |         std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
149 | 
150 | 
151 |         int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
152 |         float* floatData = outputTensor[0].GetTensorMutableData<float>();
153 |         auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
154 |         result = greedy_search(floatData, *encoder_out_lens);
155 |     }
156 |     catch (...)
157 |     {
158 |         result = "";
159 |     }
160 | 
161 | 
162 |     if(in)
163 |         delete in;
164 | 
165 |     return result;
166 | }
167 | 
168 | string ModelImp::forward_chunk(float* din, int len, int flag)
169 | {
170 | 
171 |     printf("Not Imp!!!!!!\n");
172 |     return "Hello";
173 | }
174 | 
175 | string ModelImp::rescoring()
176 | {
177 |     printf("Not Imp!!!!!!\n");
178 |     return "Hello";
179 | }
180 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/paraformer_onnx.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | 
 4 | #ifndef PARAFORMER_MODELIMP_H
 5 | #define PARAFORMER_MODELIMP_H
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | namespace paraformer {
12 | 
13 |     class ModelImp : public Model {
14 |     private:
15 |         FeatureExtract* fe;
16 | 
17 |         Vocab* vocab;
18 | 
19 |         void apply_lfr(Tensor<float>*& din);
20 |         void apply_cmvn(Tensor<float>* din);
21 | 
22 |         
23 |         string greedy_search( float* in, int nLen);
24 | 
25 | #ifdef _WIN_X86
26 |         Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
27 | #else
28 |         Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
29 | #endif
30 | 
31 |         Ort::Session* m_session = nullptr;
32 |         Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "paraformer");
33 |         Ort::SessionOptions sessionOptions = Ort::SessionOptions();
34 | 
35 |         vector<string> m_strInputNames, m_strOutputNames;
36 |         vector<const char*> m_szInputNames;
37 |         vector<const char*> m_szOutputNames;
38 |         //string m_strInputName, m_strInputNameLen;
39 |         //string m_strOutputName, m_strOutputNameLen;
40 | 
41 |     public:
42 |         ModelImp(const char* path, int nNumThread=0);
43 |         ~ModelImp();
44 |         void reset();
45 |         string forward_chunk(float* din, int len, int flag);
46 |         string forward(float* din, int len, int flag);
47 |         string rescoring();
48 | 
49 |     };
50 | 
51 | } // namespace paraformer
52 | #endif
53 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/precomp.h:
--------------------------------------------------------------------------------
 1 | #pragma once 
 2 | // system 
 3 | 
 4 | #include <iostream>
 5 | #include <stdint.h>
 6 | #include <stdlib.h>
 7 | #include <string.h>
 8 | #include <stdio.h>
 9 | #include <deque>
10 | #include <iostream>
11 | #include <list>
12 | #include <locale.h>
13 | #include <vector>
14 | #include <string>
15 | #include <math.h>
16 | #include <numeric>
17 | 
18 | 
19 | #include <cstring>
20 | 
21 | using namespace std;
22 | // third part
23 | 
24 | #include <fftw3.h>
25 | #include "onnxruntime_run_options_config_keys.h"
26 | #include "onnxruntime_cxx_api.h"
27 | 
28 | 
29 | // mine
30 | 
31 | #include "commonfunc.h"
32 | #include <ComDefine.h>
33 | #include "predefine_coe.h"
34 | 
35 | #include <ComDefine.h>
36 | //#include "alignedmem.h"
37 | #include "Vocab.h"
38 | #include "Tensor.h"
39 | #include "util.h"
40 | #include "CommonStruct.h"
41 | #include "FeatureExtract.h"
42 | #include "FeatureQueue.h"
43 | #include "SpeechWrap.h"
44 | #include <Audio.h>
45 | #include "Model.h"
46 | #include "paraformer_onnx.h"
47 | #include "librapidasrapi.h"
48 | 
49 | 
50 | using namespace paraformer;
51 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/tmp.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef WENETPARAMS_H
  3 | #define WENETPARAMS_H
  4 | // #pragma pack(1)
  5 | 
  6 | #define vocab_size 5538
  7 | 
  8 | typedef struct {
  9 |     float conv0_weight[512 * 9];
 10 |     float conv0_bias[512];
 11 | 
 12 |     float conv1_weight[512 * 512 * 9];
 13 |     float conv1_bias[512];
 14 | 
 15 |     float out0_weight[9728 * 512];
 16 |     float out0_bias[512];
 17 | 
 18 | } EncEmbedParams;
 19 | 
 20 | typedef struct {
 21 |     float linear_q_weight[512 * 512];
 22 |     float linear_q_bias[512];
 23 |     float linear_k_weight[512 * 512];
 24 |     float linear_k_bias[512];
 25 |     float linear_v_weight[512 * 512];
 26 |     float linear_v_bias[512];
 27 |     float linear_out_weight[512 * 512];
 28 |     float linear_out_bias[512];
 29 | } SelfAttnParams;
 30 | 
 31 | typedef struct {
 32 |     SelfAttnParams linear0;
 33 |     float linear_pos_weight[512 * 512];
 34 |     float pos_bias_u[512];
 35 |     float pos_bias_v[512];
 36 | 
 37 | } EncSelfAttnParams;
 38 | 
 39 | typedef struct {
 40 |     float w1_weight[512 * 2048];
 41 |     float w1_bias[2048];
 42 |     float w2_weight[2048 * 512];
 43 |     float w2_bias[512];
 44 | } FeedForwardParams;
 45 | 
 46 | typedef struct {
 47 |     float weight[512];
 48 |     float bias[512];
 49 | } NormParams;
 50 | 
 51 | typedef struct {
 52 |     float pointwise_conv1_weight[1024 * 512];
 53 |     float pointwise_conv1_bias[1024];
 54 | 
 55 |     float depthwise_conv_weight[512 * 15];
 56 |     float depthwise_conv_bias[512];
 57 | 
 58 |     float pointwise_conv2_weight[512 * 512];
 59 |     float pointwise_conv2_bias[512];
 60 |     NormParams norm;
 61 | } EncConvParams;
 62 | 
 63 | typedef struct {
 64 |     EncSelfAttnParams self_attn;
 65 |     FeedForwardParams feedforward;
 66 |     FeedForwardParams feedforward_macaron;
 67 |     EncConvParams conv_module;
 68 |     NormParams norm_ff;
 69 |     NormParams norm_mha;
 70 |     NormParams norm_macaron;
 71 |     NormParams norm_conv;
 72 |     NormParams norm_final;
 73 |     // float concat_weight[1024 * 512];
 74 |     // float concat_bias[512];
 75 | } SubEncoderParams;
 76 | 
 77 | typedef struct {
 78 |     EncEmbedParams embed;
 79 |     SubEncoderParams sub_encoder[12];
 80 |     NormParams after_norm;
 81 | } EncoderParams;
 82 | 
 83 | typedef struct {
 84 |     SelfAttnParams self_attn;
 85 |     SelfAttnParams src_attn;
 86 |     FeedForwardParams feedward;
 87 |     NormParams norm1;
 88 |     NormParams norm2;
 89 |     NormParams norm3;
 90 |     // float concat_weight1[1024 * 512];
 91 |     // float concat_bias1[512];
 92 |     // float concat_weight2[1024 * 512];
 93 |     // float concat_bias2[512];
 94 | } SubDecoderParams;
 95 | 
 96 | typedef struct {
 97 |     float embed_weight[vocab_size * 512];
 98 |     SubDecoderParams sub_decoder[6];
 99 |     NormParams after_norm;
100 |     float output_weight[vocab_size * 512];
101 |     float output_bias[vocab_size];
102 | } DecoderParams;
103 | 
104 | typedef struct {
105 |     EncoderParams encoder;
106 |     float ctc_weight[512 * vocab_size];
107 |     float ctc_bias[vocab_size];
108 |     DecoderParams decoder;
109 | } WenetParams;
110 | 
111 | // #pragma pack()
112 | #endif
113 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/util.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "precomp.h"
  3 | 
  4 | float *loadparams(const char *filename)
  5 | {
  6 | 
  7 |     FILE *fp;
  8 |     fp = fopen(filename, "rb");
  9 |     fseek(fp, 0, SEEK_END);
 10 |     uint32_t nFileLen = ftell(fp);
 11 |     fseek(fp, 0, SEEK_SET);
 12 | 
 13 |     float *params_addr = (float *)aligned_malloc(32, nFileLen);
 14 |     int n = fread(params_addr, 1, nFileLen, fp);
 15 |     fclose(fp);
 16 | 
 17 |     return params_addr;
 18 | }
 19 | 
 20 | int val_align(int val, int align)
 21 | {
 22 |     float tmp = ceil((float)val / (float)align) * (float)align;
 23 |     return (int)tmp;
 24 | }
 25 | 
 26 | void disp_params(float *din, int size)
 27 | {
 28 |     int i;
 29 |     for (i = 0; i < size; i++) {
 30 |         printf("%f ", din[i]);
 31 |     }
 32 |     printf("\n");
 33 | }
 34 | void SaveDataFile(const char *filename, void *data, uint32_t len)
 35 | {
 36 |     FILE *fp;
 37 |     fp = fopen(filename, "wb+");
 38 |     fwrite(data, 1, len, fp);
 39 |     fclose(fp);
 40 | }
 41 | 
 42 | void basic_norm(Tensor<float> *&din, float norm)
 43 | {
 44 | 
 45 |     int Tmax = din->size[2];
 46 | 
 47 |     int i, j;
 48 |     for (i = 0; i < Tmax; i++) {
 49 |         float sum = 0;
 50 |         for (j = 0; j < 512; j++) {
 51 |             int ii = i * 512 + j;
 52 |             sum += din->buff[ii] * din->buff[ii];
 53 |         }
 54 |         float mean = sqrt(sum / 512 + norm);
 55 |         for (j = 0; j < 512; j++) {
 56 |             int ii = i * 512 + j;
 57 |             din->buff[ii] = din->buff[ii] / mean;
 58 |         }
 59 |     }
 60 | }
 61 | 
 62 | void findmax(float *din, int len, float &max_val, int &max_idx)
 63 | {
 64 |     int i;
 65 |     max_val = -INFINITY;
 66 |     max_idx = -1;
 67 |     for (i = 0; i < len; i++) {
 68 |         if (din[i] > max_val) {
 69 |             max_val = din[i];
 70 |             max_idx = i;
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | string pathAppend(const string &p1, const string &p2)
 76 | {
 77 | 
 78 |     char sep = '/';
 79 |     string tmp = p1;
 80 | 
 81 | #ifdef _WIN32
 82 |     sep = '\\';
 83 | #endif
 84 | 
 85 |     if (p1[p1.length()-1] != sep) { // Need to add a
 86 |         tmp += sep;               // path separator
 87 |         return (tmp + p2);
 88 |     } else
 89 |         return (p1 + p2);
 90 | }
 91 | 
 92 | void relu(Tensor<float> *din)
 93 | {
 94 |     int i;
 95 |     for (i = 0; i < din->buff_size; i++) {
 96 |         float val = din->buff[i];
 97 |         din->buff[i] = val < 0 ? 0 : val;
 98 |     }
 99 | }
100 | 
101 | void swish(Tensor<float> *din)
102 | {
103 |     int i;
104 |     for (i = 0; i < din->buff_size; i++) {
105 |         float val = din->buff[i];
106 |         din->buff[i] = val / (1 + exp(-val));
107 |     }
108 | }
109 | 
110 | void sigmoid(Tensor<float> *din)
111 | {
112 |     int i;
113 |     for (i = 0; i < din->buff_size; i++) {
114 |         float val = din->buff[i];
115 |         din->buff[i] = 1 / (1 + exp(-val));
116 |     }
117 | }
118 | 
119 | void doubleswish(Tensor<float> *din)
120 | {
121 |     int i;
122 |     for (i = 0; i < din->buff_size; i++) {
123 |         float val = din->buff[i];
124 |         din->buff[i] = val / (1 + exp(-val + 1));
125 |     }
126 | }
127 | 
128 | void softmax(float *din, int mask, int len)
129 | {
130 |     float *tmp = (float *)malloc(mask * sizeof(float));
131 |     int i;
132 |     float sum = 0;
133 |     float max = -INFINITY;
134 | 
135 |     for (i = 0; i < mask; i++) {
136 |         max = max < din[i] ? din[i] : max;
137 |     }
138 | 
139 |     for (i = 0; i < mask; i++) {
140 |         tmp[i] = exp(din[i] - max);
141 |         sum += tmp[i];
142 |     }
143 |     for (i = 0; i < mask; i++) {
144 |         din[i] = tmp[i] / sum;
145 |     }
146 |     free(tmp);
147 |     for (i = mask; i < len; i++) {
148 |         din[i] = 0;
149 |     }
150 | }
151 | 
152 | void log_softmax(float *din, int len)
153 | {
154 |     float *tmp = (float *)malloc(len * sizeof(float));
155 |     int i;
156 |     float sum = 0;
157 |     for (i = 0; i < len; i++) {
158 |         tmp[i] = exp(din[i]);
159 |         sum += tmp[i];
160 |     }
161 |     for (i = 0; i < len; i++) {
162 |         din[i] = log(tmp[i] / sum);
163 |     }
164 |     free(tmp);
165 | }
166 | 
167 | void glu(Tensor<float> *din, Tensor<float> *dout)
168 | {
169 |     int mm = din->buff_size / 1024;
170 |     int i, j;
171 |     for (i = 0; i < mm; i++) {
172 |         for (j = 0; j < 512; j++) {
173 |             int in_off = i * 1024 + j;
174 |             int out_off = i * 512 + j;
175 |             float a = din->buff[in_off];
176 |             float b = din->buff[in_off + 512];
177 |             dout->buff[out_off] = a / (1 + exp(-b));
178 |         }
179 |     }
180 | }
181 | 


--------------------------------------------------------------------------------
/cpp_onnx/src/util.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #ifndef UTIL_H
 4 | #define UTIL_H
 5 | 
 6 | using namespace std;
 7 | 
 8 | extern float *loadparams(const char *filename);
 9 | 
10 | extern void SaveDataFile(const char *filename, void *data, uint32_t len);
11 | extern void relu(Tensor<float> *din);
12 | extern void swish(Tensor<float> *din);
13 | extern void sigmoid(Tensor<float> *din);
14 | extern void doubleswish(Tensor<float> *din);
15 | 
16 | extern void softmax(float *din, int mask, int len);
17 | 
18 | extern void log_softmax(float *din, int len);
19 | extern int val_align(int val, int align);
20 | extern void disp_params(float *din, int size);
21 | 
22 | extern void basic_norm(Tensor<float> *&din, float norm);
23 | 
24 | extern void findmax(float *din, int len, float &max_val, int &max_idx);
25 | 
26 | extern void glu(Tensor<float> *din, Tensor<float> *dout);
27 | 
28 | string pathAppend(const string &p1, const string &p2);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cpp_onnx/tester/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | if(WIN32)
 4 |     if(CMAKE_CL_64)
 5 |         link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
 6 |     else()
 7 |         link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
 8 |     endif()
 9 | endif()
10 | 
11 | set(EXTRA_LIBS rapidasr)
12 | 
13 | 
14 | include_directories(${CMAKE_SOURCE_DIR}/include)
15 | set(EXECNAME "tester")
16 | 
17 | add_executable(${EXECNAME} "tester.cpp")
18 | target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/cpp_onnx/tester/tester.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef _WIN32
 3 | #include <sys/time.h>
 4 | #else
 5 | #include <win_func.h>
 6 | #endif
 7 | 
 8 | #include "librapidasrapi.h"
 9 | 
10 | #include <iostream>
11 | 
12 | using namespace std;
13 | 
14 | int main(int argc, char *argv[])
15 | {
16 | 
17 |     if (argc < 2)
18 |     {
19 |         printf("Usage: %s /path/to/model_dir /path/to/wav/file", argv[0]);
20 |         exit(-1);
21 |     }
22 |     struct timeval start, end;
23 |     gettimeofday(&start, NULL);
24 |     int nThreadNum = 4;
25 |     RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
26 | 
27 |     if (!AsrHanlde)
28 |     {
29 |         printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
30 |         exit(-1);
31 |     }
32 |     
33 |  
34 | 
35 |     gettimeofday(&end, NULL);
36 |     long seconds = (end.tv_sec - start.tv_sec);
37 |     long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
38 |     printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
39 | 
40 | 
41 | 
42 |     gettimeofday(&start, NULL);
43 | 
44 |     RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
45 |     gettimeofday(&end, NULL);
46 |     float snippet_time = 0.0f;
47 |     if (Result)
48 |     {
49 |         string msg = RapidAsrGetResult(Result, 0);
50 |         setbuf(stdout, NULL);
51 |         cout << "Result: \"";
52 |         cout << msg << endl;
53 |         cout << "\"." << endl;
54 |         snippet_time = RapidAsrGetRetSnippetTime(Result);
55 |         RapidAsrFreeResult(Result);
56 |     }
57 |     else
58 |     {
59 |         cout <<("no return data!");
60 |     }
61 |   
62 |     printf("Audio length %lfs.\n", (double)snippet_time);
63 | 
64 |     seconds = (end.tv_sec - start.tv_sec);
65 |     long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
66 |     printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
67 | 
68 |     printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
69 | 
70 |     RapidAsrUninit(AsrHanlde);
71 | 
72 |     return 0;
73 | }
74 | 
75 |     
76 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | if(WIN32)
 4 |     add_definitions(-DWEBRTC_WIN)
 5 | else()
 6 |     add_definitions(-DWEBRTC_POSIX)
 7 | endif()
 8 | 
 9 | 
10 | include_directories("..")
11 | 
12 | file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc")
13 | 
14 | message("${files}")
15 | 
16 | add_library(webrtcvad ${files})
17 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/complex_bit_reverse.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 12 | 
 13 | /* Tables for data buffer indexes that are bit reversed and thus need to be
 14 |  * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
 15 |  * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
 16 |  * operation. Same for index_8.
 17 |  */
 18 | 
 19 | /* Indexes for the case of stages == 7. */
 20 | static const int16_t index_7[112] = {
 21 |   1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
 22 |   12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
 23 |   23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
 24 |   37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
 25 |   51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
 26 |   81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
 27 |   103, 115, 111, 123
 28 | };
 29 | 
 30 | /* Indexes for the case of stages == 8. */
 31 | static const int16_t index_8[240] = {
 32 |   1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
 33 |   11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
 34 |   40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
 35 |   30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
 36 |   148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
 37 |   51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
 38 |   124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
 39 |   75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
 40 |   234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
 41 |   166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
 42 |   115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
 43 |   193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
 44 |   149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
 45 |   213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
 46 |   203, 211, 207, 243, 215, 235, 223, 251, 239, 247
 47 | };
 48 | 
 49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
 50 |   /* For any specific value of stages, we know exactly the indexes that are
 51 |    * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
 52 |    * stages are 7 and 8, so we use tables to save unnecessary iterations and
 53 |    * calculations for these two cases.
 54 |    */
 55 |   if (stages == 7 || stages == 8) {
 56 |     int m = 0;
 57 |     int length = 112;
 58 |     const int16_t* index = index_7;
 59 | 
 60 |     if (stages == 8) {
 61 |       length = 240;
 62 |       index = index_8;
 63 |     }
 64 | 
 65 |     /* Decimation in time. Swap the elements with bit-reversed indexes. */
 66 |     for (m = 0; m < length; m += 2) {
 67 |       /* We declare a int32_t* type pointer, to load both the 16-bit real
 68 |        * and imaginary elements from complex_data in one instruction, reducing
 69 |        * complexity.
 70 |        */
 71 |       int32_t* complex_data_ptr = (int32_t*)complex_data;
 72 |       int32_t temp = 0;
 73 | 
 74 |       temp = complex_data_ptr[index[m]];  /* Real and imaginary */
 75 |       complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
 76 |       complex_data_ptr[index[m + 1]] = temp;
 77 |     }
 78 |   }
 79 |   else {
 80 |     int m = 0, mr = 0, l = 0;
 81 |     int n = 1 << stages;
 82 |     int nn = n - 1;
 83 | 
 84 |     /* Decimation in time - re-order data */
 85 |     for (m = 1; m <= nn; ++m) {
 86 |       int32_t* complex_data_ptr = (int32_t*)complex_data;
 87 |       int32_t temp = 0;
 88 | 
 89 |       /* Find out indexes that are bit-reversed. */
 90 |       l = n;
 91 |       do {
 92 |         l >>= 1;
 93 |       } while (l > nn - mr);
 94 |       mr = (mr & (l - 1)) + l;
 95 | 
 96 |       if (mr <= m) {
 97 |         continue;
 98 |       }
 99 | 
100 |       /* Swap the elements with bit-reversed indexes.
101 |        * This is similar to the loop in the stages == 7 or 8 cases.
102 |        */
103 |       temp = complex_data_ptr[m];  /* Real and imaginary */
104 |       complex_data_ptr[m] = complex_data_ptr[mr];
105 |       complex_data_ptr[mr] = temp;
106 |     }
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/cross_correlation.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 | 
13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
15 |                                  const int16_t* seq1,
16 |                                  const int16_t* seq2,
17 |                                  size_t dim_seq,
18 |                                  size_t dim_cross_correlation,
19 |                                  int right_shifts,
20 |                                  int step_seq2) {
21 |   size_t i = 0, j = 0;
22 | 
23 |   for (i = 0; i < dim_cross_correlation; i++) {
24 |     int32_t corr = 0;
25 |     for (j = 0; j < dim_seq; j++)
26 |       corr += (seq1[j] * seq2[j]) >> right_shifts;
27 |     seq2 += step_seq2;
28 |     *cross_correlation++ = corr;
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/division_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains implementations of the divisions
 14 |  * WebRtcSpl_DivU32U16()
 15 |  * WebRtcSpl_DivW32W16()
 16 |  * WebRtcSpl_DivW32W16ResW16()
 17 |  * WebRtcSpl_DivResultInQ31()
 18 |  * WebRtcSpl_DivW32HiLow()
 19 |  *
 20 |  * The description header can be found in signal_processing_library.h
 21 |  *
 22 |  */
 23 | 
 24 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 25 | #include "webrtc/rtc_base/sanitizer.h"
 26 | 
 27 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
 28 | {
 29 |     // Guard against division with 0
 30 |     if (den != 0)
 31 |     {
 32 |         return (uint32_t)(num / den);
 33 |     } else
 34 |     {
 35 |         return (uint32_t)0xFFFFFFFF;
 36 |     }
 37 | }
 38 | 
 39 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
 40 | {
 41 |     // Guard against division with 0
 42 |     if (den != 0)
 43 |     {
 44 |         return (int32_t)(num / den);
 45 |     } else
 46 |     {
 47 |         return (int32_t)0x7FFFFFFF;
 48 |     }
 49 | }
 50 | 
 51 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
 52 | {
 53 |     // Guard against division with 0
 54 |     if (den != 0)
 55 |     {
 56 |         return (int16_t)(num / den);
 57 |     } else
 58 |     {
 59 |         return (int16_t)0x7FFF;
 60 |     }
 61 | }
 62 | 
 63 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
 64 | {
 65 |     int32_t L_num = num;
 66 |     int32_t L_den = den;
 67 |     int32_t div = 0;
 68 |     int k = 31;
 69 |     int change_sign = 0;
 70 | 
 71 |     if (num == 0)
 72 |         return 0;
 73 | 
 74 |     if (num < 0)
 75 |     {
 76 |         change_sign++;
 77 |         L_num = -num;
 78 |     }
 79 |     if (den < 0)
 80 |     {
 81 |         change_sign++;
 82 |         L_den = -den;
 83 |     }
 84 |     while (k--)
 85 |     {
 86 |         div <<= 1;
 87 |         L_num <<= 1;
 88 |         if (L_num >= L_den)
 89 |         {
 90 |             L_num -= L_den;
 91 |             div++;
 92 |         }
 93 |     }
 94 |     if (change_sign == 1)
 95 |     {
 96 |         div = -div;
 97 |     }
 98 |     return div;
 99 | }
100 | 
101 | int32_t RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
102 | WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
103 | {
104 |     int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
105 |     int32_t tmpW32;
106 | 
107 |     approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
108 |     // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
109 | 
110 |     // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
111 |     tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
112 |     // tmpW32 = den * approx
113 | 
114 |     tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
115 |     // UBSan: 2147483647 - -2 cannot be represented in type 'int'
116 | 
117 |     // Store tmpW32 in hi and low format
118 |     tmp_hi = (int16_t)(tmpW32 >> 16);
119 |     tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
120 | 
121 |     // tmpW32 = 1/den in Q29
122 |     tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
123 | 
124 |     // 1/den in hi and low format
125 |     tmp_hi = (int16_t)(tmpW32 >> 16);
126 |     tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
127 | 
128 |     // Store num in hi and low format
129 |     num_hi = (int16_t)(num >> 16);
130 |     num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
131 | 
132 |     // num * (1/den) by 32 bit multiplication (result in Q28)
133 | 
134 |     tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
135 |         (num_low * tmp_hi >> 15);
136 | 
137 |     // Put result in Q31 (convert from Q28)
138 |     tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
139 | 
140 |     return tmpW32;
141 | }
142 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
12 | 
13 | #include "webrtc/rtc_base/numerics/safe_conversions.h"
14 | 
15 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
16 |                                       const int16_t* vector2,
17 |                                       size_t length,
18 |                                       int scaling) {
19 |   int64_t sum = 0;
20 |   size_t i = 0;
21 | 
22 |   /* Unroll the loop to improve performance. */
23 |   for (i = 0; i + 3 < length; i += 4) {
24 |     sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
25 |     sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
26 |     sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
27 |     sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
28 |   }
29 |   for (; i < length; i++) {
30 |     sum += (vector1[i] * vector2[i]) >> scaling;
31 |   }
32 | 
33 |   return rtc::saturated_cast<int32_t>(sum);
34 | }
35 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | // Calculates the dot product between two (int16_t) vectors.
22 | //
23 | // Input:
24 | //      - vector1       : Vector 1
25 | //      - vector2       : Vector 2
26 | //      - vector_length : Number of samples used in the dot product
27 | //      - scaling       : The number of right bit shifts to apply on each term
28 | //                        during calculation to avoid overflow, i.e., the
29 | //                        output will be in Q(-|scaling|)
30 | //
31 | // Return value         : The dot product in Q(-scaling)
32 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
33 |                                       const int16_t* vector2,
34 |                                       size_t length,
35 |                                       int scaling);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif  // __cplusplus
40 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
41 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/downsample_fast.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 | 
13 | #include "webrtc/rtc_base/checks.h"
14 | #include "webrtc/rtc_base/sanitizer.h"
15 | 
16 | // TODO(Bjornv): Change the function parameter order to WebRTC code style.
17 | // C version of WebRtcSpl_DownsampleFast() for generic platforms.
18 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
19 |                               size_t data_in_length,
20 |                               int16_t* data_out,
21 |                               size_t data_out_length,
22 |                               const int16_t* __restrict coefficients,
23 |                               size_t coefficients_length,
24 |                               int factor,
25 |                               size_t delay) {
26 |   int16_t* const original_data_out = data_out;
27 |   size_t i = 0;
28 |   size_t j = 0;
29 |   int32_t out_s32 = 0;
30 |   size_t endpos = delay + factor * (data_out_length - 1) + 1;
31 | 
32 |   // Return error if any of the running conditions doesn't meet.
33 |   if (data_out_length == 0 || coefficients_length == 0
34 |                            || data_in_length < endpos) {
35 |     return -1;
36 |   }
37 | 
38 |   rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
39 |                            coefficients_length);
40 | 
41 |   for (i = delay; i < endpos; i += factor) {
42 |     out_s32 = 2048;  // Round value, 0.5 in Q12.
43 | 
44 |     for (j = 0; j < coefficients_length; j++) {
45 |       // Negative overflow is permitted here, because this is
46 |       // auto-regressive filters, and the state for each batch run is
47 |       // stored in the "negative" positions of the output vector.
48 |       rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
49 |           sizeof(data_in[0]), 1);
50 |       // out_s32 is in Q12 domain.
51 |       out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
52 |     }
53 | 
54 |     out_s32 >>= 12;  // Q0.
55 | 
56 |     // Saturate and store the output.
57 |     *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
58 |   }
59 | 
60 |   RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
61 |   rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
62 |                            data_out_length);
63 | 
64 |   return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/energy.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | 
12 | /*
13 |  * This file contains the function WebRtcSpl_Energy().
14 |  * The description header can be found in signal_processing_library.h
15 |  *
16 |  */
17 | 
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 | 
20 | int32_t WebRtcSpl_Energy(int16_t* vector,
21 |                          size_t vector_length,
22 |                          int* scale_factor)
23 | {
24 |     int32_t en = 0;
25 |     size_t i;
26 |     int scaling =
27 |         WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
28 |     size_t looptimes = vector_length;
29 |     int16_t *vectorptr = vector;
30 | 
31 |     for (i = 0; i < looptimes; i++)
32 |     {
33 |       en += (*vectorptr * *vectorptr) >> scaling;
34 |       vectorptr++;
35 |     }
36 |     *scale_factor = scaling;
37 | 
38 |     return en;
39 | }
40 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/get_scaling_square.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | 
12 | /*
13 |  * This file contains the function WebRtcSpl_GetScalingSquare().
14 |  * The description header can be found in signal_processing_library.h
15 |  *
16 |  */
17 | 
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 | 
20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
21 |                                    size_t in_vector_length,
22 |                                    size_t times)
23 | {
24 |     int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
25 |     size_t i;
26 |     int16_t smax = -1;
27 |     int16_t sabs;
28 |     int16_t *sptr = in_vector;
29 |     int16_t t;
30 |     size_t looptimes = in_vector_length;
31 | 
32 |     for (i = looptimes; i > 0; i--)
33 |     {
34 |         sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
35 |         smax = (sabs > smax ? sabs : smax);
36 |     }
37 |     t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
38 | 
39 |     if (smax == 0)
40 |     {
41 |         return 0; // Since norm(0) returns 0
42 |     } else
43 |     {
44 |         return (t > nbits) ? 0 : nbits - t;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/real_fft.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
13 | 
14 | #include <stdint.h>
15 | 
16 | // For ComplexFFT(), the maximum fft order is 10;
17 | // WebRTC APM uses orders of only 7 and 8.
18 | enum { kMaxFFTOrder = 10 };
19 | 
20 | struct RealFFT;
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
27 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
28 | 
29 | // Compute an FFT for a real-valued signal of length of 2^order,
30 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
31 | // specification structure, which must be initialized prior to calling the FFT
32 | // function with WebRtcSpl_CreateRealFFT().
33 | // The relationship between the input and output sequences can
34 | // be expressed in terms of the DFT, i.e.:
35 | //     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
36 | //     n=0,1,2,...N-1
37 | //     N=2^order.
38 | // The conjugate-symmetric output sequence is represented using a CCS vector,
39 | // which is of length N+2, and is organized as follows:
40 | //     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
41 | //     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
42 | // where R[n] and I[n], respectively, denote the real and imaginary components
43 | // for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
44 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
45 | // the foldover frequency.
46 | //
47 | // Input Arguments:
48 | //   self - pointer to preallocated and initialized FFT specification structure.
49 | //   real_data_in - the input signal. For an ARM Neon platform, it must be
50 | //                  aligned on a 32-byte boundary.
51 | //
52 | // Output Arguments:
53 | //   complex_data_out - the output complex signal with (2^order + 2) 16-bit
54 | //                      elements. For an ARM Neon platform, it must be different
55 | //                      from real_data_in, and aligned on a 32-byte boundary.
56 | //
57 | // Return Value:
58 | //   0  - FFT calculation is successful.
59 | //   -1 - Error with bad arguments (null pointers).
60 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
61 |                              const int16_t* real_data_in,
62 |                              int16_t* complex_data_out);
63 | 
64 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of
65 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
66 | // the specification structure, which must be initialized prior to calling the
67 | // FFT function with WebRtcSpl_CreateRealFFT().
68 | // For a transform of length M, the input sequence is represented using a packed
69 | // CCS vector of length M+2, which is explained in the comments for
70 | // WebRtcSpl_RealForwardFFTC above.
71 | //
72 | // Input Arguments:
73 | //   self - pointer to preallocated and initialized FFT specification structure.
74 | //   complex_data_in - the input complex signal with (2^order + 2) 16-bit
75 | //                     elements. For an ARM Neon platform, it must be aligned on
76 | //                     a 32-byte boundary.
77 | //
78 | // Output Arguments:
79 | //   real_data_out - the output real signal. For an ARM Neon platform, it must
80 | //                   be different to complex_data_in, and aligned on a 32-byte
81 | //                   boundary.
82 | //
83 | // Return Value:
84 | //   0 or a positive number - a value that the elements in the |real_data_out|
85 | //                            should be shifted left with in order to get
86 | //                            correct physical values.
87 | //   -1 - Error with bad arguments (null pointers).
88 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
89 |                              const int16_t* complex_data_in,
90 |                              int16_t* real_data_out);
91 | 
92 | #ifdef __cplusplus
93 | }
94 | #endif
95 | 
96 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
97 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/spl_inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // This header file includes the inline functions in
 12 | // the fix point signal processing library.
 13 | 
 14 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
 15 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
 16 | 
 17 | #include "webrtc/rtc_base/compile_assert_c.h"
 18 | 
 19 | extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
 20 | 
 21 | // Don't call this directly except in tests!
 22 | static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
 23 |   // Normalize n by rounding up to the nearest number that is a sequence of 0
 24 |   // bits followed by a sequence of 1 bits. This number has the same number of
 25 |   // leading zeros as the original n. There are exactly 33 such values.
 26 |   n |= n >> 1;
 27 |   n |= n >> 2;
 28 |   n |= n >> 4;
 29 |   n |= n >> 8;
 30 |   n |= n >> 16;
 31 | 
 32 |   // Multiply the modified n with a constant selected (by exhaustive search)
 33 |   // such that each of the 33 possible values of n give a product whose 6 most
 34 |   // significant bits are unique. Then look up the answer in the table.
 35 |   return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
 36 | }
 37 | 
 38 | // Don't call this directly except in tests!
 39 | static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
 40 |   const int leading_zeros = n >> 32 == 0 ? 32 : 0;
 41 |   return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
 42 |                              (uint32_t)(n >> (32 - leading_zeros)));
 43 | }
 44 | 
 45 | // Returns the number of leading zero bits in the argument.
 46 | static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
 47 | #ifdef __GNUC__
 48 |   RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
 49 |   return n == 0 ? 32 : __builtin_clz(n);
 50 | #else
 51 |   return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
 52 | #endif
 53 | }
 54 | 
 55 | // Returns the number of leading zero bits in the argument.
 56 | static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
 57 | #ifdef __GNUC__
 58 |   RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t));  // NOLINT
 59 |   return n == 0 ? 64 : __builtin_clzll(n);
 60 | #else
 61 |   return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
 62 | #endif
 63 | }
 64 | 
 65 | #ifdef WEBRTC_ARCH_ARM_V7
 66 | #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
 67 | #else
 68 | 
 69 | #if defined(MIPS32_LE)
 70 | #include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
 71 | #endif
 72 | 
 73 | #if !defined(MIPS_DSP_R1_LE)
 74 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
 75 |   int16_t out16 = (int16_t)value32;
 76 | 
 77 |   if (value32 > 32767)
 78 |     out16 = 32767;
 79 |   else if (value32 < -32768)
 80 |     out16 = -32768;
 81 | 
 82 |   return out16;
 83 | }
 84 | 
 85 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
 86 |   // Do the addition in unsigned numbers, since signed overflow is undefined
 87 |   // behavior.
 88 |   const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
 89 | 
 90 |   // a + b can't overflow if a and b have different signs. If they have the
 91 |   // same sign, a + b also has the same sign iff it didn't overflow.
 92 |   if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
 93 |     // The direction of the overflow is obvious from the sign of a + b.
 94 |     return sum < 0 ? INT32_MAX : INT32_MIN;
 95 |   }
 96 |   return sum;
 97 | }
 98 | 
 99 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
100 |   // Do the subtraction in unsigned numbers, since signed overflow is undefined
101 |   // behavior.
102 |   const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
103 | 
104 |   // a - b can't overflow if a and b have the same sign. If they have different
105 |   // signs, a - b has the same sign as a iff it didn't overflow.
106 |   if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
107 |     // The direction of the overflow is obvious from the sign of a - b.
108 |     return diff < 0 ? INT32_MAX : INT32_MIN;
109 |   }
110 |   return diff;
111 | }
112 | 
113 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
114 |   return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
115 | }
116 | 
117 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
118 |   return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
119 | }
120 | #endif  // #if !defined(MIPS_DSP_R1_LE)
121 | 
122 | #if !defined(MIPS32_LE)
123 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
124 |   return 32 - WebRtcSpl_CountLeadingZeros32(n);
125 | }
126 | 
127 | // Return the number of steps a can be left-shifted without overflow,
128 | // or 0 if a == 0.
129 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
130 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
131 | }
132 | 
133 | // Return the number of steps a can be left-shifted without overflow,
134 | // or 0 if a == 0.
135 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
136 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
137 | }
138 | 
139 | // Return the number of steps a can be left-shifted without overflow,
140 | // or 0 if a == 0.
141 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
142 |   const int32_t a32 = a;
143 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
144 | }
145 | 
146 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
147 |   return (a * b + c);
148 | }
149 | #endif  // #if !defined(MIPS32_LE)
150 | 
151 | #endif  // WEBRTC_ARCH_ARM_V7
152 | 
153 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
154 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/min_max_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /*
 12 |  * This file contains the implementation of functions
 13 |  * WebRtcSpl_MaxAbsValueW16C()
 14 |  * WebRtcSpl_MaxAbsValueW32C()
 15 |  * WebRtcSpl_MaxValueW16C()
 16 |  * WebRtcSpl_MaxValueW32C()
 17 |  * WebRtcSpl_MinValueW16C()
 18 |  * WebRtcSpl_MinValueW32C()
 19 |  * WebRtcSpl_MaxAbsIndexW16()
 20 |  * WebRtcSpl_MaxIndexW16()
 21 |  * WebRtcSpl_MaxIndexW32()
 22 |  * WebRtcSpl_MinIndexW16()
 23 |  * WebRtcSpl_MinIndexW32()
 24 |  *
 25 |  */
 26 | 
 27 | #include <stdlib.h>
 28 | 
 29 | #include "webrtc/rtc_base/checks.h"
 30 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 31 | 
 32 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
 33 | //   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
 34 | // TODO(kma): Move the next six functions into min_max_operations_c.c.
 35 | 
 36 | // Maximum absolute value of word16 vector. C version for generic platforms.
 37 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
 38 |   size_t i = 0;
 39 |   int absolute = 0, maximum = 0;
 40 | 
 41 |   RTC_DCHECK_GT(length, 0);
 42 | 
 43 |   for (i = 0; i < length; i++) {
 44 |     absolute = abs((int)vector[i]);
 45 | 
 46 |     if (absolute > maximum) {
 47 |       maximum = absolute;
 48 |     }
 49 |   }
 50 | 
 51 |   // Guard the case for abs(-32768).
 52 |   if (maximum > WEBRTC_SPL_WORD16_MAX) {
 53 |     maximum = WEBRTC_SPL_WORD16_MAX;
 54 |   }
 55 | 
 56 |   return (int16_t)maximum;
 57 | }
 58 | 
 59 | // Maximum absolute value of word32 vector. C version for generic platforms.
 60 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
 61 |   // Use uint32_t for the local variables, to accommodate the return value
 62 |   // of abs(0x80000000), which is 0x80000000.
 63 | 
 64 |   uint32_t absolute = 0, maximum = 0;
 65 |   size_t i = 0;
 66 | 
 67 |   RTC_DCHECK_GT(length, 0);
 68 | 
 69 |   for (i = 0; i < length; i++) {
 70 |     absolute = abs((int)vector[i]);
 71 |     if (absolute > maximum) {
 72 |       maximum = absolute;
 73 |     }
 74 |   }
 75 | 
 76 |   maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
 77 | 
 78 |   return (int32_t)maximum;
 79 | }
 80 | 
 81 | // Maximum value of word16 vector. C version for generic platforms.
 82 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
 83 |   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
 84 |   size_t i = 0;
 85 | 
 86 |   RTC_DCHECK_GT(length, 0);
 87 | 
 88 |   for (i = 0; i < length; i++) {
 89 |     if (vector[i] > maximum)
 90 |       maximum = vector[i];
 91 |   }
 92 |   return maximum;
 93 | }
 94 | 
 95 | // Maximum value of word32 vector. C version for generic platforms.
 96 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
 97 |   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
 98 |   size_t i = 0;
 99 | 
100 |   RTC_DCHECK_GT(length, 0);
101 | 
102 |   for (i = 0; i < length; i++) {
103 |     if (vector[i] > maximum)
104 |       maximum = vector[i];
105 |   }
106 |   return maximum;
107 | }
108 | 
109 | // Minimum value of word16 vector. C version for generic platforms.
110 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
111 |   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
112 |   size_t i = 0;
113 | 
114 |   RTC_DCHECK_GT(length, 0);
115 | 
116 |   for (i = 0; i < length; i++) {
117 |     if (vector[i] < minimum)
118 |       minimum = vector[i];
119 |   }
120 |   return minimum;
121 | }
122 | 
123 | // Minimum value of word32 vector. C version for generic platforms.
124 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
125 |   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
126 |   size_t i = 0;
127 | 
128 |   RTC_DCHECK_GT(length, 0);
129 | 
130 |   for (i = 0; i < length; i++) {
131 |     if (vector[i] < minimum)
132 |       minimum = vector[i];
133 |   }
134 |   return minimum;
135 | }
136 | 
137 | // Index of maximum absolute value in a word16 vector.
138 | size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
139 |   // Use type int for local variables, to accomodate the value of abs(-32768).
140 | 
141 |   size_t i = 0, index = 0;
142 |   int absolute = 0, maximum = 0;
143 | 
144 |   RTC_DCHECK_GT(length, 0);
145 | 
146 |   for (i = 0; i < length; i++) {
147 |     absolute = abs((int)vector[i]);
148 | 
149 |     if (absolute > maximum) {
150 |       maximum = absolute;
151 |       index = i;
152 |     }
153 |   }
154 | 
155 |   return index;
156 | }
157 | 
158 | // Index of maximum value in a word16 vector.
159 | size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
160 |   size_t i = 0, index = 0;
161 |   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
162 | 
163 |   RTC_DCHECK_GT(length, 0);
164 | 
165 |   for (i = 0; i < length; i++) {
166 |     if (vector[i] > maximum) {
167 |       maximum = vector[i];
168 |       index = i;
169 |     }
170 |   }
171 | 
172 |   return index;
173 | }
174 | 
175 | // Index of maximum value in a word32 vector.
176 | size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
177 |   size_t i = 0, index = 0;
178 |   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
179 | 
180 |   RTC_DCHECK_GT(length, 0);
181 | 
182 |   for (i = 0; i < length; i++) {
183 |     if (vector[i] > maximum) {
184 |       maximum = vector[i];
185 |       index = i;
186 |     }
187 |   }
188 | 
189 |   return index;
190 | }
191 | 
192 | // Index of minimum value in a word16 vector.
193 | size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
194 |   size_t i = 0, index = 0;
195 |   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
196 | 
197 |   RTC_DCHECK_GT(length, 0);
198 | 
199 |   for (i = 0; i < length; i++) {
200 |     if (vector[i] < minimum) {
201 |       minimum = vector[i];
202 |       index = i;
203 |     }
204 |   }
205 | 
206 |   return index;
207 | }
208 | 
209 | // Index of minimum value in a word32 vector.
210 | size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
211 |   size_t i = 0, index = 0;
212 |   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
213 | 
214 |   RTC_DCHECK_GT(length, 0);
215 | 
216 |   for (i = 0; i < length; i++) {
217 |     if (vector[i] < minimum) {
218 |       minimum = vector[i];
219 |       index = i;
220 |     }
221 |   }
222 | 
223 |   return index;
224 | }
225 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_48khz.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains resampling functions between 48 kHz and nb/wb.
 14 |  * The description header can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include <string.h>
 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 20 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
 21 | 
 22 | ////////////////////////////
 23 | ///// 48 kHz -> 16 kHz /////
 24 | ////////////////////////////
 25 | 
 26 | // 48 -> 16 resampler
 27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
 28 |                                     WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
 29 | {
 30 |     ///// 48 --> 48(LP) /////
 31 |     // int16_t  in[480]
 32 |     // int32_t out[480]
 33 |     /////
 34 |     WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
 35 | 
 36 |     ///// 48 --> 32 /////
 37 |     // int32_t  in[480]
 38 |     // int32_t out[320]
 39 |     /////
 40 |     // copy state to and from input array
 41 |     memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
 42 |     memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
 43 |     WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
 44 | 
 45 |     ///// 32 --> 16 /////
 46 |     // int32_t  in[320]
 47 |     // int16_t out[160]
 48 |     /////
 49 |     WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
 50 | }
 51 | 
 52 | // initialize state of 48 -> 16 resampler
 53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
 54 | {
 55 |     memset(state->S_48_48, 0, 16 * sizeof(int32_t));
 56 |     memset(state->S_48_32, 0, 8 * sizeof(int32_t));
 57 |     memset(state->S_32_16, 0, 8 * sizeof(int32_t));
 58 | }
 59 | 
 60 | ////////////////////////////
 61 | ///// 16 kHz -> 48 kHz /////
 62 | ////////////////////////////
 63 | 
 64 | // 16 -> 48 resampler
 65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
 66 |                                     WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
 67 | {
 68 |     ///// 16 --> 32 /////
 69 |     // int16_t  in[160]
 70 |     // int32_t out[320]
 71 |     /////
 72 |     WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
 73 | 
 74 |     ///// 32 --> 24 /////
 75 |     // int32_t  in[320]
 76 |     // int32_t out[240]
 77 |     // copy state to and from input array
 78 |     /////
 79 |     memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
 80 |     memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
 81 |     WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
 82 | 
 83 |     ///// 24 --> 48 /////
 84 |     // int32_t  in[240]
 85 |     // int16_t out[480]
 86 |     /////
 87 |     WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
 88 | }
 89 | 
 90 | // initialize state of 16 -> 48 resampler
 91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
 92 | {
 93 |     memset(state->S_16_32, 0, 8 * sizeof(int32_t));
 94 |     memset(state->S_32_24, 0, 8 * sizeof(int32_t));
 95 |     memset(state->S_24_48, 0, 8 * sizeof(int32_t));
 96 | }
 97 | 
 98 | ////////////////////////////
 99 | ///// 48 kHz ->  8 kHz /////
100 | ////////////////////////////
101 | 
102 | // 48 -> 8 resampler
103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
104 |                                    WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
105 | {
106 |     ///// 48 --> 24 /////
107 |     // int16_t  in[480]
108 |     // int32_t out[240]
109 |     /////
110 |     WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
111 | 
112 |     ///// 24 --> 24(LP) /////
113 |     // int32_t  in[240]
114 |     // int32_t out[240]
115 |     /////
116 |     WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
117 | 
118 |     ///// 24 --> 16 /////
119 |     // int32_t  in[240]
120 |     // int32_t out[160]
121 |     /////
122 |     // copy state to and from input array
123 |     memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
124 |     memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
125 |     WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
126 | 
127 |     ///// 16 --> 8 /////
128 |     // int32_t  in[160]
129 |     // int16_t out[80]
130 |     /////
131 |     WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
132 | }
133 | 
134 | // initialize state of 48 -> 8 resampler
135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
136 | {
137 |     memset(state->S_48_24, 0, 8 * sizeof(int32_t));
138 |     memset(state->S_24_24, 0, 16 * sizeof(int32_t));
139 |     memset(state->S_24_16, 0, 8 * sizeof(int32_t));
140 |     memset(state->S_16_8, 0, 8 * sizeof(int32_t));
141 | }
142 | 
143 | ////////////////////////////
144 | /////  8 kHz -> 48 kHz /////
145 | ////////////////////////////
146 | 
147 | // 8 -> 48 resampler
148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
149 |                                    WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
150 | {
151 |     ///// 8 --> 16 /////
152 |     // int16_t  in[80]
153 |     // int32_t out[160]
154 |     /////
155 |     WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
156 | 
157 |     ///// 16 --> 12 /////
158 |     // int32_t  in[160]
159 |     // int32_t out[120]
160 |     /////
161 |     // copy state to and from input array
162 |     memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
163 |     memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
164 |     WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
165 | 
166 |     ///// 12 --> 24 /////
167 |     // int32_t  in[120]
168 |     // int16_t out[240]
169 |     /////
170 |     WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
171 | 
172 |     ///// 24 --> 48 /////
173 |     // int32_t  in[240]
174 |     // int16_t out[480]
175 |     /////
176 |     WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
177 | }
178 | 
179 | // initialize state of 8 -> 48 resampler
180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
181 | {
182 |     memset(state->S_8_16, 0, 8 * sizeof(int32_t));
183 |     memset(state->S_16_12, 0, 8 * sizeof(int32_t));
184 |     memset(state->S_12_24, 0, 8 * sizeof(int32_t));
185 |     memset(state->S_24_48, 0, 8 * sizeof(int32_t));
186 | }
187 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This header file contains some internal resampling functions.
13 |  *
14 |  */
15 | 
16 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
17 | #define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
18 | 
19 | #include <stdint.h>
20 | 
21 | /*******************************************************************
22 |  * resample_by_2_fast.c
23 |  * Functions for internal use in the other resample functions
24 |  ******************************************************************/
25 | void WebRtcSpl_DownBy2IntToShort(int32_t* in,
26 |                                  int32_t len,
27 |                                  int16_t* out,
28 |                                  int32_t* state);
29 | 
30 | void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
31 |                                  int32_t len,
32 |                                  int32_t* out,
33 |                                  int32_t* state);
34 | 
35 | void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
36 |                                int32_t len,
37 |                                int32_t* out,
38 |                                int32_t* state);
39 | 
40 | void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
41 |                              int32_t len,
42 |                              int32_t* out,
43 |                              int32_t* state);
44 | 
45 | void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
46 |                                int32_t len,
47 |                                int16_t* out,
48 |                                int32_t* state);
49 | 
50 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
51 |                                int32_t len,
52 |                                int32_t* out,
53 |                                int32_t* state);
54 | 
55 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
56 |                              int32_t len,
57 |                              int32_t* out,
58 |                              int32_t* state);
59 | 
60 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
61 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_init.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /* The global function contained in this file initializes SPL function
 12 |  * pointers, currently only for ARM platforms.
 13 |  *
 14 |  * Some code came from common/rtcd.c in the WebM project.
 15 |  */
 16 | 
 17 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 18 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
 19 | 
 20 | /* Declare function pointers. */
 21 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
 22 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
 23 | MaxValueW16 WebRtcSpl_MaxValueW16;
 24 | MaxValueW32 WebRtcSpl_MaxValueW32;
 25 | MinValueW16 WebRtcSpl_MinValueW16;
 26 | MinValueW32 WebRtcSpl_MinValueW32;
 27 | CrossCorrelation WebRtcSpl_CrossCorrelation;
 28 | DownsampleFast WebRtcSpl_DownsampleFast;
 29 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
 30 | 
 31 | #if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
 32 | /* Initialize function pointers to the generic C version. */
 33 | static void InitPointersToC(void) {
 34 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
 35 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
 36 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
 37 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
 38 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
 39 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
 40 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
 41 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
 42 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 43 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 44 | }
 45 | #endif
 46 | 
 47 | #if defined(WEBRTC_HAS_NEON)
 48 | /* Initialize function pointers to the Neon version. */
 49 | static void InitPointersToNeon(void) {
 50 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
 51 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
 52 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
 53 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
 54 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
 55 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
 56 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
 57 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
 58 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 59 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 60 | }
 61 | #endif
 62 | 
 63 | #if defined(MIPS32_LE)
 64 | /* Initialize function pointers to the MIPS version. */
 65 | static void InitPointersToMIPS(void) {
 66 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
 67 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
 68 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
 69 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
 70 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
 71 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
 72 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
 73 | #if defined(MIPS_DSP_R1_LE)
 74 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
 75 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 76 |       WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
 77 | #else
 78 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
 79 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 80 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 81 | #endif
 82 | }
 83 | #endif
 84 | 
 85 | static void InitFunctionPointers(void) {
 86 | #if defined(WEBRTC_HAS_NEON)
 87 |   InitPointersToNeon();
 88 | #elif defined(MIPS32_LE)
 89 |   InitPointersToMIPS();
 90 | #else
 91 |   InitPointersToC();
 92 | #endif  /* WEBRTC_HAS_NEON */
 93 | }
 94 | 
 95 | #if defined(WEBRTC_POSIX)
 96 | #include <pthread.h>
 97 | 
 98 | static void once(void (*func)(void)) {
 99 |   static pthread_once_t lock = PTHREAD_ONCE_INIT;
100 |   pthread_once(&lock, func);
101 | }
102 | 
103 | #elif defined(_WIN32)
104 | #include <windows.h>
105 | 
106 | static void once(void (*func)(void)) {
107 |   /* Didn't use InitializeCriticalSection() since there's no race-free context
108 |    * in which to execute it.
109 |    *
110 |    * TODO(kma): Change to different implementation (e.g.
111 |    * InterlockedCompareExchangePointer) to avoid issues similar to
112 |    * http://code.google.com/p/webm/issues/detail?id=467.
113 |    */
114 |   static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
115 |   static int done = 0;
116 | 
117 |   EnterCriticalSection(&lock);
118 |   if (!done) {
119 |     func();
120 |     done = 1;
121 |   }
122 |   LeaveCriticalSection(&lock);
123 | }
124 | 
125 | /* There's no fallback version as an #else block here to ensure thread safety.
126 |  * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
127 |  * system should pick it up.
128 |  */
129 | #endif  /* WEBRTC_POSIX */
130 | 
131 | void WebRtcSpl_Init(void) {
132 |   once(InitFunctionPointers);
133 | }
134 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_inl.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include <stdint.h>
12 | 
13 | #include "webrtc/common_audio/signal_processing/include/spl_inl.h"
14 | 
15 | // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
16 | // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
17 | // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
18 | // n.
19 | const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
20 |     32, 8,  17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
21 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  26, 25, 24,
22 |     4,  11, 23, 31, 3,  7,  10, 16, 22, 30, -1, -1, 2,  6,  13, 9,
23 |     -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1,  27, 5,  12,
24 | };
25 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_sqrt.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains the function WebRtcSpl_Sqrt().
 14 |  * The description header can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include "webrtc/rtc_base/checks.h"
 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 20 | 
 21 | int32_t WebRtcSpl_SqrtLocal(int32_t in);
 22 | 
 23 | int32_t WebRtcSpl_SqrtLocal(int32_t in)
 24 | {
 25 | 
 26 |     int16_t x_half, t16;
 27 |     int32_t A, B, x2;
 28 | 
 29 |     /* The following block performs:
 30 |      y=in/2
 31 |      x=y-2^30
 32 |      x_half=x/2^31
 33 |      t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
 34 |          + 0.875*((x_half)^5)
 35 |      */
 36 | 
 37 |     B = in / 2;
 38 | 
 39 |     B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
 40 |     x_half = (int16_t)(B >> 16);  // x_half = x/2 = (in-1)/2
 41 |     B = B + ((int32_t)0x40000000); // B = 1 + x/2
 42 |     B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
 43 | 
 44 |     x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
 45 |     A = -x2; // A = -(x/2)^2
 46 |     B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
 47 | 
 48 |     A >>= 16;
 49 |     A = A * A * 2; // A = (x/2)^4
 50 |     t16 = (int16_t)(A >> 16);
 51 |     B += -20480 * t16 * 2;  // B = B - 0.625*A
 52 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
 53 | 
 54 |     A = x_half * t16 * 2;  // A = (x/2)^5
 55 |     t16 = (int16_t)(A >> 16);
 56 |     B += 28672 * t16 * 2;  // B = B + 0.875*A
 57 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
 58 | 
 59 |     t16 = (int16_t)(x2 >> 16);
 60 |     A = x_half * t16 * 2;  // A = x/2^3
 61 | 
 62 |     B = B + (A >> 1); // B = B + 0.5*A
 63 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
 64 | 
 65 |     B = B + ((int32_t)32768); // Round off bit
 66 | 
 67 |     return B;
 68 | }
 69 | 
 70 | int32_t WebRtcSpl_Sqrt(int32_t value)
 71 | {
 72 |     /*
 73 |      Algorithm:
 74 | 
 75 |      Six term Taylor Series is used here to compute the square root of a number
 76 |      y^0.5 = (1+x)^0.5 where x = y-1
 77 |      = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
 78 |      0.5 <= x < 1
 79 | 
 80 |      Example of how the algorithm works, with ut=sqrt(in), and
 81 |      with in=73632 and ut=271 (even shift value case):
 82 | 
 83 |      in=73632
 84 |      y= in/131072
 85 |      x=y-1
 86 |      t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
 87 |      ut=t*(1/sqrt(2))*512
 88 | 
 89 |      or:
 90 | 
 91 |      in=73632
 92 |      in2=73632*2^14
 93 |      y= in2/2^31
 94 |      x=y-1
 95 |      t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
 96 |      ut=t*(1/sqrt(2))
 97 |      ut2=ut*2^9
 98 | 
 99 |      which gives:
100 | 
101 |      in  = 73632
102 |      in2 = 1206386688
103 |      y   = 0.56176757812500
104 |      x   = -0.43823242187500
105 |      t   = 0.74973506527313
106 |      ut  = 0.53014274874797
107 |      ut2 = 2.714330873589594e+002
108 | 
109 |      or:
110 | 
111 |      in=73632
112 |      in2=73632*2^14
113 |      y=in2/2
114 |      x=y-2^30
115 |      x_half=x/2^31
116 |      t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
117 |          + 0.875*((x_half)^5)
118 |      ut=t*(1/sqrt(2))
119 |      ut2=ut*2^9
120 | 
121 |      which gives:
122 | 
123 |      in  = 73632
124 |      in2 = 1206386688
125 |      y   = 603193344
126 |      x   = -470548480
127 |      x_half =  -0.21911621093750
128 |      t   = 0.74973506527313
129 |      ut  = 0.53014274874797
130 |      ut2 = 2.714330873589594e+002
131 | 
132 |      */
133 | 
134 |     int16_t x_norm, nshift, t16, sh;
135 |     int32_t A;
136 | 
137 |     int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
138 | 
139 |     A = value;
140 | 
141 |     // The convention in this function is to calculate sqrt(abs(A)). Negate the
142 |     // input if it is negative.
143 |     if (A < 0) {
144 |         if (A == WEBRTC_SPL_WORD32_MIN) {
145 |             // This number cannot be held in an int32_t after negating.
146 |             // Map it to the maximum positive value.
147 |             A = WEBRTC_SPL_WORD32_MAX;
148 |         } else {
149 |             A = -A;
150 |         }
151 |     } else if (A == 0) {
152 |         return 0;  // sqrt(0) = 0
153 |     }
154 | 
155 |     sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
156 |     A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
157 |     if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
158 |     {
159 |         A = A + ((int32_t)32768); // Round off bit
160 |     } else
161 |     {
162 |         A = WEBRTC_SPL_WORD32_MAX;
163 |     }
164 | 
165 |     x_norm = (int16_t)(A >> 16);  // x_norm = AH
166 | 
167 |     nshift = (sh / 2);
168 |     RTC_DCHECK_GE(nshift, 0);
169 | 
170 |     A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
171 |     A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
172 |     A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
173 | 
174 |     if (2 * nshift == sh) {
175 |         // Even shift value case
176 | 
177 |         t16 = (int16_t)(A >> 16);  // t16 = AH
178 | 
179 |         A = k_sqrt_2 * t16 * 2;  // A = 1/sqrt(2)*t16
180 |         A = A + ((int32_t)32768); // Round off
181 |         A = A & ((int32_t)0x7fff0000); // Round off
182 | 
183 |         A >>= 15;  // A = A>>16
184 | 
185 |     } else
186 |     {
187 |         A >>= 16;  // A = A>>16
188 |     }
189 | 
190 |     A = A & ((int32_t)0x0000ffff);
191 |     A >>= nshift;  // De-normalize the result.
192 | 
193 |     return A;
194 | }
195 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/vector_scaling_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains implementations of the functions
 14 |  * WebRtcSpl_VectorBitShiftW16()
 15 |  * WebRtcSpl_VectorBitShiftW32()
 16 |  * WebRtcSpl_VectorBitShiftW32ToW16()
 17 |  * WebRtcSpl_ScaleVector()
 18 |  * WebRtcSpl_ScaleVectorWithSat()
 19 |  * WebRtcSpl_ScaleAndAddVectors()
 20 |  * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
 21 |  */
 22 | 
 23 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 24 | 
 25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
 26 |                                  const int16_t *in, int16_t right_shifts)
 27 | {
 28 |     size_t i;
 29 | 
 30 |     if (right_shifts > 0)
 31 |     {
 32 |         for (i = length; i > 0; i--)
 33 |         {
 34 |             (*res++) = ((*in++) >> right_shifts);
 35 |         }
 36 |     } else
 37 |     {
 38 |         for (i = length; i > 0; i--)
 39 |         {
 40 |             (*res++) = ((*in++) * (1 << (-right_shifts)));
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
 46 |                                  size_t vector_length,
 47 |                                  const int32_t *in_vector,
 48 |                                  int16_t right_shifts)
 49 | {
 50 |     size_t i;
 51 | 
 52 |     if (right_shifts > 0)
 53 |     {
 54 |         for (i = vector_length; i > 0; i--)
 55 |         {
 56 |             (*out_vector++) = ((*in_vector++) >> right_shifts);
 57 |         }
 58 |     } else
 59 |     {
 60 |         for (i = vector_length; i > 0; i--)
 61 |         {
 62 |             (*out_vector++) = ((*in_vector++) << (-right_shifts));
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
 68 |                                       const int32_t* in, int right_shifts) {
 69 |   size_t i;
 70 |   int32_t tmp_w32;
 71 | 
 72 |   if (right_shifts >= 0) {
 73 |     for (i = length; i > 0; i--) {
 74 |       tmp_w32 = (*in++) >> right_shifts;
 75 |       (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
 76 |     }
 77 |   } else {
 78 |     int left_shifts = -right_shifts;
 79 |     for (i = length; i > 0; i--) {
 80 |       tmp_w32 = (*in++) << left_shifts;
 81 |       (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
 82 |     }
 83 |   }
 84 | }
 85 | 
 86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
 87 |                            int16_t gain, size_t in_vector_length,
 88 |                            int16_t right_shifts)
 89 | {
 90 |     // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
 91 |     size_t i;
 92 |     const int16_t *inptr;
 93 |     int16_t *outptr;
 94 | 
 95 |     inptr = in_vector;
 96 |     outptr = out_vector;
 97 | 
 98 |     for (i = 0; i < in_vector_length; i++)
 99 |     {
100 |       *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
101 |     }
102 | }
103 | 
104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
105 |                                  int16_t gain, size_t in_vector_length,
106 |                                  int16_t right_shifts)
107 | {
108 |     // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
109 |     size_t i;
110 |     const int16_t *inptr;
111 |     int16_t *outptr;
112 | 
113 |     inptr = in_vector;
114 |     outptr = out_vector;
115 | 
116 |     for (i = 0; i < in_vector_length; i++) {
117 |       *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
118 |     }
119 | }
120 | 
121 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
122 |                                   const int16_t *in2, int16_t gain2, int shift2,
123 |                                   int16_t *out, size_t vector_length)
124 | {
125 |     // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
126 |     size_t i;
127 |     const int16_t *in1ptr;
128 |     const int16_t *in2ptr;
129 |     int16_t *outptr;
130 | 
131 |     in1ptr = in1;
132 |     in2ptr = in2;
133 |     outptr = out;
134 | 
135 |     for (i = 0; i < vector_length; i++)
136 |     {
137 |       *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
138 |           (int16_t)((gain2 * *in2ptr++) >> shift2);
139 |     }
140 | }
141 | 
142 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
143 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
144 |                                            int16_t in_vector1_scale,
145 |                                            const int16_t* in_vector2,
146 |                                            int16_t in_vector2_scale,
147 |                                            int right_shifts,
148 |                                            int16_t* out_vector,
149 |                                            size_t length) {
150 |   size_t i = 0;
151 |   int round_value = (1 << right_shifts) >> 1;
152 | 
153 |   if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
154 |       length == 0 || right_shifts < 0) {
155 |     return -1;
156 |   }
157 | 
158 |   for (i = 0; i < length; i++) {
159 |     out_vector[i] = (int16_t)((
160 |         in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
161 |         round_value) >> right_shifts);
162 |   }
163 | 
164 |   return 0;
165 | }
166 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
 3 |  * license.
 4 |  *
 5 |  * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
 6 |  * Date: Fri, Jun 24, 2011 at 3:20 AM
 7 |  * Subject: Re: sqrt routine
 8 |  * To: Kevin Ma <kma@google.com>
 9 |  * Hi Kevin,
10 |  * Thanks for asking. Those routines are public domain (originally posted to
11 |  * comp.sys.arm a long time ago), so you can use them freely for any purpose.
12 |  * Cheers,
13 |  * Wilco
14 |  *
15 |  * ----- Original Message -----
16 |  * From: "Kevin Ma" <kma@google.com>
17 |  * To: <Wilco.Dijkstra@ntlworld.com>
18 |  * Sent: Thursday, June 23, 2011 11:44 PM
19 |  * Subject: Fwd: sqrt routine
20 |  * Hi Wilco,
21 |  * I saw your sqrt routine from several web sites, including
22 |  * http://www.finesse.demon.co.uk/steven/sqrt.html.
23 |  * Just wonder if there's any copyright information with your Successive
24 |  * approximation routines, or if I can freely use it for any purpose.
25 |  * Thanks.
26 |  * Kevin
27 |  */
28 | 
29 | // Minor modifications in code style for WebRTC, 2012.
30 | 
31 | #include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
32 | 
33 | /*
34 |  * Algorithm:
35 |  * Successive approximation of the equation (root + delta) ^ 2 = N
36 |  * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
37 |  * Use delta = 2^i for i = 15 .. 0.
38 |  *
39 |  * Output precision is 16 bits. Note for large input values (close to
40 |  * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
41 |  * contains the MSB information (a non-sign value). Do with caution
42 |  * if you need to cast the output to int16_t type.
43 |  *
44 |  * If the input value is negative, it returns 0.
45 |  */
46 | 
47 | #define WEBRTC_SPL_SQRT_ITER(N)                 \
48 |   try1 = root + (1 << (N));                     \
49 |   if (value >= try1 << (N))                     \
50 |   {                                             \
51 |     value -= try1 << (N);                       \
52 |     root |= 2 << (N);                           \
53 |   }
54 | 
55 | int32_t WebRtcSpl_SqrtFloor(int32_t value)
56 | {
57 |   int32_t root = 0, try1;
58 | 
59 |   WEBRTC_SPL_SQRT_ITER (15);
60 |   WEBRTC_SPL_SQRT_ITER (14);
61 |   WEBRTC_SPL_SQRT_ITER (13);
62 |   WEBRTC_SPL_SQRT_ITER (12);
63 |   WEBRTC_SPL_SQRT_ITER (11);
64 |   WEBRTC_SPL_SQRT_ITER (10);
65 |   WEBRTC_SPL_SQRT_ITER ( 9);
66 |   WEBRTC_SPL_SQRT_ITER ( 8);
67 |   WEBRTC_SPL_SQRT_ITER ( 7);
68 |   WEBRTC_SPL_SQRT_ITER ( 6);
69 |   WEBRTC_SPL_SQRT_ITER ( 5);
70 |   WEBRTC_SPL_SQRT_ITER ( 4);
71 |   WEBRTC_SPL_SQRT_ITER ( 3);
72 |   WEBRTC_SPL_SQRT_ITER ( 2);
73 |   WEBRTC_SPL_SQRT_ITER ( 1);
74 |   WEBRTC_SPL_SQRT_ITER ( 0);
75 | 
76 |   return root >> 1;
77 | }
78 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include <stdint.h>
12 | 
13 | //
14 | // WebRtcSpl_SqrtFloor(...)
15 | //
16 | // Returns the square root of the input value |value|. The precision of this
17 | // function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
18 | // If |value| is a negative number then 0 is returned.
19 | //
20 | // Algorithm:
21 | //
22 | // An iterative 4 cylce/bit routine
23 | //
24 | // Input:
25 | //      - value     : Value to calculate sqrt of
26 | //
27 | // Return value     : Result of the sqrt calculation
28 | //
29 | int32_t WebRtcSpl_SqrtFloor(int32_t value);
30 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/include/webrtc_vad.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This header file includes the VAD API calls. Specific function calls are
13 |  * given below.
14 |  */
15 | 
16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18 | 
19 | #include <stddef.h>
20 | #include <stdint.h>
21 | 
22 | typedef struct WebRtcVadInst VadInst;
23 | 
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 | 
28 | // Creates an instance to the VAD structure.
29 | VadInst* WebRtcVad_Create(void);
30 | 
31 | // Frees the dynamic memory of a specified VAD instance.
32 | //
33 | // - handle [i] : Pointer to VAD instance that should be freed.
34 | void WebRtcVad_Free(VadInst* handle);
35 | 
36 | // Initializes a VAD instance.
37 | //
38 | // - handle [i/o] : Instance that should be initialized.
39 | //
40 | // returns        : 0 - (OK),
41 | //                 -1 - (null pointer or Default mode could not be set).
42 | int WebRtcVad_Init(VadInst* handle);
43 | 
44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
45 | // restrictive in reporting speech. Put in other words the probability of being
46 | // speech when the VAD returns 1 is increased with increasing mode. As a
47 | // consequence also the missed detection rate goes up.
48 | //
49 | // - handle [i/o] : VAD instance.
50 | // - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
51 | //
52 | // returns        : 0 - (OK),
53 | //                 -1 - (null pointer, mode could not be set or the VAD instance
54 | //                       has not been initialized).
55 | int WebRtcVad_set_mode(VadInst* handle, int mode);
56 | 
57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates
58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
59 | //
60 | // - handle       [i/o] : VAD Instance. Needs to be initialized by
61 | //                        WebRtcVad_Init() before call.
62 | // - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
63 | // - audio_frame  [i]   : Audio frame buffer.
64 | // - frame_length [i]   : Length of audio frame buffer in number of samples.
65 | //
66 | // returns              : 1 - (Active Voice),
67 | //                        0 - (Non-active Voice),
68 | //                       -1 - (Error)
69 | int WebRtcVad_Process(VadInst* handle,
70 |                       int fs,
71 |                       const int16_t* audio_frame,
72 |                       size_t frame_length);
73 | 
74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10,
75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
76 | //
77 | // - rate         [i] : Sampling frequency (Hz).
78 | // - frame_length [i] : Speech frame buffer length in number of samples.
79 | //
80 | // returns            : 0 - (valid combination), -1 - (invalid combination)
81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
82 | 
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | 
87 | #endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
88 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /*
 12 |  * This header file includes the descriptions of the core VAD calls.
 13 |  */
 14 | 
 15 | #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
 16 | #define COMMON_AUDIO_VAD_VAD_CORE_H_
 17 | 
 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 19 | 
 20 | enum { kNumChannels = 6 };   // Number of frequency bands (named channels).
 21 | enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
 22 | enum { kTableSize = kNumChannels * kNumGaussians };
 23 | enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
 24 | 
 25 | typedef struct VadInstT_ {
 26 |   int vad;
 27 |   int32_t downsampling_filter_states[4];
 28 |   WebRtcSpl_State48khzTo8khz state_48_to_8;
 29 |   int16_t noise_means[kTableSize];
 30 |   int16_t speech_means[kTableSize];
 31 |   int16_t noise_stds[kTableSize];
 32 |   int16_t speech_stds[kTableSize];
 33 |   // TODO(bjornv): Change to |frame_count|.
 34 |   int32_t frame_counter;
 35 |   int16_t over_hang;  // Over Hang
 36 |   int16_t num_of_speech;
 37 |   // TODO(bjornv): Change to |age_vector|.
 38 |   int16_t index_vector[16 * kNumChannels];
 39 |   int16_t low_value_vector[16 * kNumChannels];
 40 |   // TODO(bjornv): Change to |median|.
 41 |   int16_t mean_value[kNumChannels];
 42 |   int16_t upper_state[5];
 43 |   int16_t lower_state[5];
 44 |   int16_t hp_filter_state[4];
 45 |   int16_t over_hang_max_1[3];
 46 |   int16_t over_hang_max_2[3];
 47 |   int16_t individual[3];
 48 |   int16_t total[3];
 49 | 
 50 |   int init_flag;
 51 | } VadInstT;
 52 | 
 53 | // Initializes the core VAD component. The default aggressiveness mode is
 54 | // controlled by |kDefaultMode| in vad_core.c.
 55 | //
 56 | // - self [i/o] : Instance that should be initialized
 57 | //
 58 | // returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
 59 | //                set)
 60 | int WebRtcVad_InitCore(VadInstT* self);
 61 | 
 62 | /****************************************************************************
 63 |  * WebRtcVad_set_mode_core(...)
 64 |  *
 65 |  * This function changes the VAD settings
 66 |  *
 67 |  * Input:
 68 |  *      - inst      : VAD instance
 69 |  *      - mode      : Aggressiveness degree
 70 |  *                    0 (High quality) - 3 (Highly aggressive)
 71 |  *
 72 |  * Output:
 73 |  *      - inst      : Changed  instance
 74 |  *
 75 |  * Return value     :  0 - Ok
 76 |  *                    -1 - Error
 77 |  */
 78 | 
 79 | int WebRtcVad_set_mode_core(VadInstT* self, int mode);
 80 | 
 81 | /****************************************************************************
 82 |  * WebRtcVad_CalcVad48khz(...)
 83 |  * WebRtcVad_CalcVad32khz(...)
 84 |  * WebRtcVad_CalcVad16khz(...)
 85 |  * WebRtcVad_CalcVad8khz(...)
 86 |  *
 87 |  * Calculate probability for active speech and make VAD decision.
 88 |  *
 89 |  * Input:
 90 |  *      - inst          : Instance that should be initialized
 91 |  *      - speech_frame  : Input speech frame
 92 |  *      - frame_length  : Number of input samples
 93 |  *
 94 |  * Output:
 95 |  *      - inst          : Updated filter states etc.
 96 |  *
 97 |  * Return value         : VAD decision
 98 |  *                        0 - No active speech
 99 |  *                        1-6 - Active speech
100 |  */
101 | int WebRtcVad_CalcVad48khz(VadInstT* inst,
102 |                            const int16_t* speech_frame,
103 |                            size_t frame_length);
104 | int WebRtcVad_CalcVad32khz(VadInstT* inst,
105 |                            const int16_t* speech_frame,
106 |                            size_t frame_length);
107 | int WebRtcVad_CalcVad16khz(VadInstT* inst,
108 |                            const int16_t* speech_frame,
109 |                            size_t frame_length);
110 | int WebRtcVad_CalcVad8khz(VadInstT* inst,
111 |                           const int16_t* speech_frame,
112 |                           size_t frame_length);
113 | 
114 | #endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
115 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_filterbank.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This file includes feature calculating functionality used in vad_core.c.
13 |  */
14 | 
15 | #ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
16 | #define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
17 | 
18 | #include "webrtc/common_audio/vad/vad_core.h"
19 | 
20 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the
21 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
22 | //        80 Hz - 250 Hz
23 | //        250 Hz - 500 Hz
24 | //        500 Hz - 1000 Hz
25 | //        1000 Hz - 2000 Hz
26 | //        2000 Hz - 3000 Hz
27 | //        3000 Hz - 4000 Hz
28 | //
29 | // The values are given in Q4 and written to |features|. Further, an approximate
30 | // overall energy is returned. The return value is used in
31 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
32 | // the threshold |kMinEnergy|.
33 | //
34 | // - self         [i/o] : State information of the VAD.
35 | // - data_in      [i]   : Input audio data, for feature extraction.
36 | // - data_length  [i]   : Audio data size, in number of samples.
37 | // - features     [o]   : 10 * log10(energy in each frequency band), Q4.
38 | // - returns            : Total energy of the signal (NOTE! This value is not
39 | //                        exact. It is only used in a comparison.)
40 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
41 |                                     const int16_t* data_in,
42 |                                     size_t data_length,
43 |                                     int16_t* features);
44 | 
45 | #endif  // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
46 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/vad/vad_gmm.h"
12 | 
13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
14 | 
15 | static const int32_t kCompVar = 22005;
16 | static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.
17 | 
18 | // For a normal distribution, the probability of |input| is calculated and
19 | // returned (in Q20). The formula for normal distributed probability is
20 | //
21 | // 1 / s * exp(-(x - m)^2 / (2 * s^2))
22 | //
23 | // where the parameters are given in the following Q domains:
24 | // m = |mean| (Q7)
25 | // s = |std| (Q7)
26 | // x = |input| (Q4)
27 | // in addition to the probability we output |delta| (in Q11) used when updating
28 | // the noise/speech model.
29 | int32_t WebRtcVad_GaussianProbability(int16_t input,
30 |                                       int16_t mean,
31 |                                       int16_t std,
32 |                                       int16_t* delta) {
33 |   int16_t tmp16, inv_std, inv_std2, exp_value = 0;
34 |   int32_t tmp32;
35 | 
36 |   // Calculate |inv_std| = 1 / s, in Q10.
37 |   // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
38 |   // Q-domain: Q17 / Q7 = Q10.
39 |   tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
40 |   inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
41 | 
42 |   // Calculate |inv_std2| = 1 / s^2, in Q14.
43 |   tmp16 = (inv_std >> 2);  // Q10 -> Q8.
44 |   // Q-domain: (Q8 * Q8) >> 2 = Q14.
45 |   inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
46 |   // TODO(bjornv): Investigate if changing to
47 |   // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
48 |   // gives better accuracy.
49 | 
50 |   tmp16 = (input << 3);  // Q4 -> Q7
51 |   tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7
52 | 
53 |   // To be used later, when updating noise/speech model.
54 |   // |delta| = (x - m) / s^2, in Q11.
55 |   // Q-domain: (Q14 * Q7) >> 10 = Q11.
56 |   *delta = (int16_t)((inv_std2 * tmp16) >> 10);
57 | 
58 |   // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
59 |   // division by two with one shift.
60 |   // Q-domain: (Q11 * Q7) >> 8 = Q10.
61 |   tmp32 = (*delta * tmp16) >> 9;
62 | 
63 |   // If the exponent is small enough to give a non-zero probability we calculate
64 |   // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
65 |   //             ~= exp2(-log2(exp(1)) * |tmp32|).
66 |   if (tmp32 < kCompVar) {
67 |     // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
68 |     // Q-domain: (Q12 * Q10) >> 12 = Q10.
69 |     tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
70 |     tmp16 = -tmp16;
71 |     exp_value = (0x0400 | (tmp16 & 0x03FF));
72 |     tmp16 ^= 0xFFFF;
73 |     tmp16 >>= 10;
74 |     tmp16 += 1;
75 |     // Get |exp_value| = exp(-|tmp32|) in Q10.
76 |     exp_value >>= tmp16;
77 |   }
78 | 
79 |   // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
80 |   // Q-domain: Q10 * Q10 = Q20.
81 |   return inv_std * exp_value;
82 | }
83 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // Gaussian probability calculations internally used in vad_core.c.
12 | 
13 | #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
14 | #define COMMON_AUDIO_VAD_VAD_GMM_H_
15 | 
16 | #include <stdint.h>
17 | 
18 | // Calculates the probability for |input|, given that |input| comes from a
19 | // normal distribution with mean and standard deviation (|mean|, |std|).
20 | //
21 | // Inputs:
22 | //      - input         : input sample in Q4.
23 | //      - mean          : mean input in the statistical model, Q7.
24 | //      - std           : standard deviation, Q7.
25 | //
26 | // Output:
27 | //
28 | //      - delta         : input used when updating the model, Q11.
29 | //                        |delta| = (|input| - |mean|) / |std|^2.
30 | //
31 | // Return:
32 | //   (probability for |input|) =
33 | //    1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
34 | int32_t WebRtcVad_GaussianProbability(int16_t input,
35 |                                       int16_t mean,
36 |                                       int16_t std,
37 |                                       int16_t* delta);
38 | 
39 | #endif  // COMMON_AUDIO_VAD_VAD_GMM_H_
40 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/vad/vad_sp.h"
 12 | 
 13 | #include "webrtc/rtc_base/checks.h"
 14 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 15 | #include "webrtc/common_audio/vad/vad_core.h"
 16 | 
 17 | // Allpass filter coefficients, upper and lower, in Q13.
 18 | // Upper: 0.64, Lower: 0.17.
 19 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13.
 20 | static const int16_t kSmoothingDown = 6553;  // 0.2 in Q15.
 21 | static const int16_t kSmoothingUp = 32439;  // 0.99 in Q15.
 22 | 
 23 | // TODO(bjornv): Move this function to vad_filterbank.c.
 24 | // Downsampling filter based on splitting filter and allpass functions.
 25 | void WebRtcVad_Downsampling(const int16_t* signal_in,
 26 |                             int16_t* signal_out,
 27 |                             int32_t* filter_state,
 28 |                             size_t in_length) {
 29 |   int16_t tmp16_1 = 0, tmp16_2 = 0;
 30 |   int32_t tmp32_1 = filter_state[0];
 31 |   int32_t tmp32_2 = filter_state[1];
 32 |   size_t n = 0;
 33 |   // Downsampling by 2 gives half length.
 34 |   size_t half_length = (in_length >> 1);
 35 | 
 36 |   // Filter coefficients in Q13, filter state in Q0.
 37 |   for (n = 0; n < half_length; n++) {
 38 |     // All-pass filtering upper branch.
 39 |     tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
 40 |         ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
 41 |     *signal_out = tmp16_1;
 42 |     tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
 43 | 
 44 |     // All-pass filtering lower branch.
 45 |     tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
 46 |         ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
 47 |     *signal_out++ += tmp16_2;
 48 |     tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
 49 |   }
 50 |   // Store the filter states.
 51 |   filter_state[0] = tmp32_1;
 52 |   filter_state[1] = tmp32_2;
 53 | }
 54 | 
 55 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16
 56 | // smallest values the last 100 frames. Then calculates and returns the median
 57 | // of the five smallest values.
 58 | int16_t WebRtcVad_FindMinimum(VadInstT* self,
 59 |                               int16_t feature_value,
 60 |                               int channel) {
 61 |   int i = 0, j = 0;
 62 |   int position = -1;
 63 |   // Offset to beginning of the 16 minimum values in memory.
 64 |   const int offset = (channel << 4);
 65 |   int16_t current_median = 1600;
 66 |   int16_t alpha = 0;
 67 |   int32_t tmp32 = 0;
 68 |   // Pointer to memory for the 16 minimum values and the age of each value of
 69 |   // the |channel|.
 70 |   int16_t* age = &self->index_vector[offset];
 71 |   int16_t* smallest_values = &self->low_value_vector[offset];
 72 | 
 73 |   RTC_DCHECK_LT(channel, kNumChannels);
 74 | 
 75 |   // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
 76 |   // remove old values.
 77 |   for (i = 0; i < 16; i++) {
 78 |     if (age[i] != 100) {
 79 |       age[i]++;
 80 |     } else {
 81 |       // Too old value. Remove from memory and shift larger values downwards.
 82 |       for (j = i; j < 15; j++) {
 83 |         smallest_values[j] = smallest_values[j + 1];
 84 |         age[j] = age[j + 1];
 85 |       }
 86 |       age[15] = 101;
 87 |       smallest_values[15] = 10000;
 88 |     }
 89 |   }
 90 | 
 91 |   // Check if |feature_value| is smaller than any of the values in
 92 |   // |smallest_values|. If so, find the |position| where to insert the new value
 93 |   // (|feature_value|).
 94 |   if (feature_value < smallest_values[7]) {
 95 |     if (feature_value < smallest_values[3]) {
 96 |       if (feature_value < smallest_values[1]) {
 97 |         if (feature_value < smallest_values[0]) {
 98 |           position = 0;
 99 |         } else {
100 |           position = 1;
101 |         }
102 |       } else if (feature_value < smallest_values[2]) {
103 |         position = 2;
104 |       } else {
105 |         position = 3;
106 |       }
107 |     } else if (feature_value < smallest_values[5]) {
108 |       if (feature_value < smallest_values[4]) {
109 |         position = 4;
110 |       } else {
111 |         position = 5;
112 |       }
113 |     } else if (feature_value < smallest_values[6]) {
114 |       position = 6;
115 |     } else {
116 |       position = 7;
117 |     }
118 |   } else if (feature_value < smallest_values[15]) {
119 |     if (feature_value < smallest_values[11]) {
120 |       if (feature_value < smallest_values[9]) {
121 |         if (feature_value < smallest_values[8]) {
122 |           position = 8;
123 |         } else {
124 |           position = 9;
125 |         }
126 |       } else if (feature_value < smallest_values[10]) {
127 |         position = 10;
128 |       } else {
129 |         position = 11;
130 |       }
131 |     } else if (feature_value < smallest_values[13]) {
132 |       if (feature_value < smallest_values[12]) {
133 |         position = 12;
134 |       } else {
135 |         position = 13;
136 |       }
137 |     } else if (feature_value < smallest_values[14]) {
138 |       position = 14;
139 |     } else {
140 |       position = 15;
141 |     }
142 |   }
143 | 
144 |   // If we have detected a new small value, insert it at the correct position
145 |   // and shift larger values up.
146 |   if (position > -1) {
147 |     for (i = 15; i > position; i--) {
148 |       smallest_values[i] = smallest_values[i - 1];
149 |       age[i] = age[i - 1];
150 |     }
151 |     smallest_values[position] = feature_value;
152 |     age[position] = 1;
153 |   }
154 | 
155 |   // Get |current_median|.
156 |   if (self->frame_counter > 2) {
157 |     current_median = smallest_values[2];
158 |   } else if (self->frame_counter > 0) {
159 |     current_median = smallest_values[0];
160 |   }
161 | 
162 |   // Smooth the median value.
163 |   if (self->frame_counter > 0) {
164 |     if (current_median < self->mean_value[channel]) {
165 |       alpha = kSmoothingDown;  // 0.2 in Q15.
166 |     } else {
167 |       alpha = kSmoothingUp;  // 0.99 in Q15.
168 |     }
169 |   }
170 |   tmp32 = (alpha + 1) * self->mean_value[channel];
171 |   tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
172 |   tmp32 += 16384;
173 |   self->mean_value[channel] = (int16_t) (tmp32 >> 15);
174 | 
175 |   return self->mean_value[channel];
176 | }
177 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // This file includes specific signal processing tools used in vad_core.c.
12 | 
13 | #ifndef COMMON_AUDIO_VAD_VAD_SP_H_
14 | #define COMMON_AUDIO_VAD_VAD_SP_H_
15 | 
16 | #include "webrtc/common_audio/vad/vad_core.h"
17 | 
18 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
19 | //
20 | // Inputs:
21 | //      - signal_in     : Input signal.
22 | //      - in_length     : Length of input signal in samples.
23 | //
24 | // Input & Output:
25 | //      - filter_state  : Current filter states of the two all-pass filters. The
26 | //                        |filter_state| is updated after all samples have been
27 | //                        processed.
28 | //
29 | // Output:
30 | //      - signal_out    : Downsampled signal (of length |in_length| / 2).
31 | void WebRtcVad_Downsampling(const int16_t* signal_in,
32 |                             int16_t* signal_out,
33 |                             int32_t* filter_state,
34 |                             size_t in_length);
35 | 
36 | // Updates and returns the smoothed feature minimum. As minimum we use the
37 | // median of the five smallest feature values in a 100 frames long window.
38 | // As long as |handle->frame_counter| is zero, that is, we haven't received any
39 | // "valid" data, FindMinimum() outputs the default value of 1600.
40 | //
41 | // Inputs:
42 | //      - feature_value : New feature value to update with.
43 | //      - channel       : Channel number.
44 | //
45 | // Input & Output:
46 | //      - handle        : State information of the VAD.
47 | //
48 | // Returns:
49 | //                      : Smoothed minimum value for a moving window.
50 | int16_t WebRtcVad_FindMinimum(VadInstT* handle,
51 |                               int16_t feature_value,
52 |                               int channel);
53 | 
54 | #endif  // COMMON_AUDIO_VAD_VAD_SP_H_
55 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/webrtc_vad.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/vad/include/webrtc_vad.h"
 12 | 
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 17 | #include "webrtc/common_audio/vad/vad_core.h"
 18 | 
 19 | static const int kInitCheck = 42;
 20 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
 21 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
 22 | static const int kMaxFrameLengthMs = 30;
 23 | 
 24 | VadInst* WebRtcVad_Create() {
 25 |   VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
 26 | 
 27 |   WebRtcSpl_Init();
 28 |   self->init_flag = 0;
 29 | 
 30 |   return (VadInst*)self;
 31 | }
 32 | 
 33 | void WebRtcVad_Free(VadInst* handle) {
 34 |   free(handle);
 35 | }
 36 | 
 37 | // TODO(bjornv): Move WebRtcVad_InitCore() code here.
 38 | int WebRtcVad_Init(VadInst* handle) {
 39 |   // Initialize the core VAD component.
 40 |   return WebRtcVad_InitCore((VadInstT*) handle);
 41 | }
 42 | 
 43 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
 44 | int WebRtcVad_set_mode(VadInst* handle, int mode) {
 45 |   VadInstT* self = (VadInstT*) handle;
 46 | 
 47 |   if (handle == NULL) {
 48 |     return -1;
 49 |   }
 50 |   if (self->init_flag != kInitCheck) {
 51 |     return -1;
 52 |   }
 53 | 
 54 |   return WebRtcVad_set_mode_core(self, mode);
 55 | }
 56 | 
 57 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
 58 |                       size_t frame_length) {
 59 |   int vad = -1;
 60 |   VadInstT* self = (VadInstT*) handle;
 61 | 
 62 |   if (handle == NULL) {
 63 |     return -1;
 64 |   }
 65 | 
 66 |   if (self->init_flag != kInitCheck) {
 67 |     return -1;
 68 |   }
 69 |   if (audio_frame == NULL) {
 70 |     return -1;
 71 |   }
 72 |   if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
 73 |     return -1;
 74 |   }
 75 | 
 76 |   if (fs == 48000) {
 77 |       vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
 78 |   } else if (fs == 32000) {
 79 |     vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
 80 |   } else if (fs == 16000) {
 81 |     vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
 82 |   } else if (fs == 8000) {
 83 |     vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
 84 |   }
 85 | 
 86 |   if (vad > 0) {
 87 |     vad = 1;
 88 |   }
 89 |   return vad;
 90 | }
 91 | 
 92 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
 93 |   int return_value = -1;
 94 |   size_t i;
 95 |   int valid_length_ms;
 96 |   size_t valid_length;
 97 | 
 98 |   // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
 99 |   // see if we have a matching pair.
100 |   for (i = 0; i < kRatesSize; i++) {
101 |     if (kValidRates[i] == rate) {
102 |       for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
103 |           valid_length_ms += 10) {
104 |         valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
105 |         if (frame_length == valid_length) {
106 |           return_value = 0;
107 |           break;
108 |         }
109 |       }
110 |       break;
111 |     }
112 |   }
113 | 
114 |   return return_value;
115 | }
116 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/checks.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2006 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // Most of this was borrowed (with minor modifications) from V8's and Chromium's
 12 | // src/base/logging.cc.
 13 | 
 14 | #include <cstdarg>
 15 | #include <cstdio>
 16 | #include <cstdlib>
 17 | 
 18 | #if defined(WEBRTC_ANDROID)
 19 | #define RTC_LOG_TAG_ANDROID "rtc"
 20 | #include <android/log.h>  // NOLINT
 21 | #endif
 22 | 
 23 | #if defined(WEBRTC_WIN)
 24 | #include <windows.h>
 25 | #endif
 26 | 
 27 | #if defined(WEBRTC_WIN)
 28 | #define LAST_SYSTEM_ERROR (::GetLastError())
 29 | #elif defined(__native_client__) && __native_client__
 30 | #define LAST_SYSTEM_ERROR (0)
 31 | #elif defined(WEBRTC_POSIX)
 32 | #include <errno.h>
 33 | #define LAST_SYSTEM_ERROR (errno)
 34 | #endif  // WEBRTC_WIN
 35 | 
 36 | #include "webrtc/rtc_base/checks.h"
 37 | 
 38 | namespace {
 39 | #if defined(__GNUC__)
 40 | __attribute__((__format__(__printf__, 2, 3)))
 41 | #endif
 42 |   void AppendFormat(std::string* s, const char* fmt, ...) {
 43 |   va_list args, copy;
 44 |   va_start(args, fmt);
 45 |   va_copy(copy, args);
 46 |   const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy);
 47 |   va_end(copy);
 48 | 
 49 |   if (predicted_length > 0) {
 50 |     const size_t size = s->size();
 51 |     s->resize(size + predicted_length);
 52 |     // Pass "+ 1" to vsnprintf to include space for the '\0'.
 53 |     std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args);
 54 |   }
 55 |   va_end(args);
 56 | }
 57 | }
 58 | 
 59 | namespace rtc {
 60 | namespace webrtc_checks_impl {
 61 | 
 62 | // Reads one argument from args, appends it to s and advances fmt.
 63 | // Returns true iff an argument was sucessfully parsed.
 64 | bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) {
 65 |   if (**fmt == CheckArgType::kEnd)
 66 |     return false;
 67 | 
 68 |   switch (**fmt) {
 69 |     case CheckArgType::kInt:
 70 |       AppendFormat(s, "%d", va_arg(*args, int));
 71 |       break;
 72 |     case CheckArgType::kLong:
 73 |       AppendFormat(s, "%ld", va_arg(*args, long));
 74 |       break;
 75 |     case CheckArgType::kLongLong:
 76 |       AppendFormat(s, "%lld", va_arg(*args, long long));
 77 |       break;
 78 |     case CheckArgType::kUInt:
 79 |       AppendFormat(s, "%u", va_arg(*args, unsigned));
 80 |       break;
 81 |     case CheckArgType::kULong:
 82 |       AppendFormat(s, "%lu", va_arg(*args, unsigned long));
 83 |       break;
 84 |     case CheckArgType::kULongLong:
 85 |       AppendFormat(s, "%llu", va_arg(*args, unsigned long long));
 86 |       break;
 87 |     case CheckArgType::kDouble:
 88 |       AppendFormat(s, "%g", va_arg(*args, double));
 89 |       break;
 90 |     case CheckArgType::kLongDouble:
 91 |       AppendFormat(s, "%Lg", va_arg(*args, long double));
 92 |       break;
 93 |     case CheckArgType::kCharP:
 94 |       s->append(va_arg(*args, const char*));
 95 |       break;
 96 |     case CheckArgType::kStdString:
 97 |       s->append(*va_arg(*args, const std::string*));
 98 |       break;
 99 |     case CheckArgType::kVoidP:
100 |       AppendFormat(s, "%p", va_arg(*args, const void*));
101 |       break;
102 |     default:
103 |       s->append("[Invalid CheckArgType]");
104 |       return false;
105 |   }
106 |   (*fmt)++;
107 |   return true;
108 | }
109 | 
110 | RTC_NORETURN void FatalLog(const char* file,
111 |                            int line,
112 |                            const char* message,
113 |                            const CheckArgType* fmt,
114 |                            ...) {
115 |   va_list args;
116 |   va_start(args, fmt);
117 | 
118 |   std::string s;
119 |   AppendFormat(&s,
120 |                "\n\n"
121 |                "#\n"
122 |                "# Fatal error in: %s, line %d\n"
123 |                "# last system error: %u\n"
124 |                "# Check failed: %s",
125 |                file, line, LAST_SYSTEM_ERROR, message);
126 | 
127 |   if (*fmt == CheckArgType::kCheckOp) {
128 |     // This log message was generated by RTC_CHECK_OP, so we have to complete
129 |     // the error message using the operands that have been passed as the first
130 |     // two arguments.
131 |     fmt++;
132 | 
133 |     std::string s1, s2;
134 |     if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2))
135 |       AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str());
136 |   } else {
137 |     s.append("\n# ");
138 |   }
139 | 
140 |   // Append all the user-supplied arguments to the message.
141 |   while (ParseArg(&args, &fmt, &s))
142 |     ;
143 | 
144 |   va_end(args);
145 | 
146 |   const char* output = s.c_str();
147 | 
148 | #if defined(WEBRTC_ANDROID)
149 |   __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output);
150 | #endif
151 | 
152 |   fflush(stdout);
153 |   fprintf(stderr, "%s", output);
154 |   fflush(stderr);
155 |   abort();
156 | }
157 | 
158 | }  // namespace webrtc_checks_impl
159 | }  // namespace rtc
160 | 
161 | // Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros.
162 | RTC_NORETURN void rtc_FatalMessage(const char* file, int line,
163 |                                    const char* msg) {
164 |   static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = {
165 |       rtc::webrtc_checks_impl::CheckArgType::kEnd};
166 |   FatalLog(file, line, msg, t);
167 | }
168 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/compile_assert_c.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef RTC_BASE_COMPILE_ASSERT_C_H_
12 | #define RTC_BASE_COMPILE_ASSERT_C_H_
13 | 
14 | // Use this macro to verify at compile time that certain restrictions are met.
15 | // The argument is the boolean expression to evaluate.
16 | // Example:
17 | //   RTC_COMPILE_ASSERT(sizeof(foo) < 128);
18 | // Note: In C++, use static_assert instead!
19 | #define RTC_COMPILE_ASSERT(expression) \
20 |   switch (0) {                         \
21 |     case 0:                            \
22 |     case expression:;                  \
23 |   }
24 | 
25 | #endif  // RTC_BASE_COMPILE_ASSERT_C_H_
26 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/sanitizer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #ifndef RTC_BASE_SANITIZER_H_
 12 | #define RTC_BASE_SANITIZER_H_
 13 | 
 14 | #include <stddef.h>  // For size_t.
 15 | 
 16 | #ifdef __cplusplus
 17 | #include <type_traits>
 18 | #endif
 19 | 
 20 | #if defined(__has_feature)
 21 | #if __has_feature(address_sanitizer)
 22 | #define RTC_HAS_ASAN 1
 23 | #endif
 24 | #if __has_feature(memory_sanitizer)
 25 | #define RTC_HAS_MSAN 1
 26 | #endif
 27 | #endif
 28 | #ifndef RTC_HAS_ASAN
 29 | #define RTC_HAS_ASAN 0
 30 | #endif
 31 | #ifndef RTC_HAS_MSAN
 32 | #define RTC_HAS_MSAN 0
 33 | #endif
 34 | 
 35 | #if RTC_HAS_ASAN
 36 | #include <sanitizer/asan_interface.h>
 37 | #endif
 38 | #if RTC_HAS_MSAN
 39 | #include <sanitizer/msan_interface.h>
 40 | #endif
 41 | 
 42 | #ifdef __has_attribute
 43 | #if __has_attribute(no_sanitize)
 44 | #define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what)))
 45 | #endif
 46 | #endif
 47 | #ifndef RTC_NO_SANITIZE
 48 | #define RTC_NO_SANITIZE(what)
 49 | #endif
 50 | 
 51 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
 52 | // as being unaddressable, so that reads and writes are not allowed. ASan may
 53 | // narrow the range to the nearest alignment boundaries.
 54 | static inline void rtc_AsanPoison(const volatile void* ptr,
 55 |                                   size_t element_size,
 56 |                                   size_t num_elements) {
 57 | #if RTC_HAS_ASAN
 58 |   ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements);
 59 | #endif
 60 | }
 61 | 
 62 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
 63 | // as being addressable, so that reads and writes are allowed. ASan may widen
 64 | // the range to the nearest alignment boundaries.
 65 | static inline void rtc_AsanUnpoison(const volatile void* ptr,
 66 |                                     size_t element_size,
 67 |                                     size_t num_elements) {
 68 | #if RTC_HAS_ASAN
 69 |   ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements);
 70 | #endif
 71 | }
 72 | 
 73 | // Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements)
 74 | // as being uninitialized.
 75 | static inline void rtc_MsanMarkUninitialized(const volatile void* ptr,
 76 |                                              size_t element_size,
 77 |                                              size_t num_elements) {
 78 | #if RTC_HAS_MSAN
 79 |   __msan_poison(ptr, element_size * num_elements);
 80 | #endif
 81 | }
 82 | 
 83 | // Force an MSan check (if any bits in the memory range [ptr, ptr +
 84 | // element_size * num_elements) are uninitialized the call will crash with an
 85 | // MSan report).
 86 | static inline void rtc_MsanCheckInitialized(const volatile void* ptr,
 87 |                                             size_t element_size,
 88 |                                             size_t num_elements) {
 89 | #if RTC_HAS_MSAN
 90 |   __msan_check_mem_is_initialized(ptr, element_size * num_elements);
 91 | #endif
 92 | }
 93 | 
 94 | #ifdef __cplusplus
 95 | 
 96 | namespace rtc {
 97 | namespace sanitizer_impl {
 98 | 
 99 | template <typename T>
100 | constexpr bool IsTriviallyCopyable() {
101 |   return static_cast<bool>(std::is_trivially_copy_constructible<T>::value &&
102 |                            (std::is_trivially_copy_assignable<T>::value ||
103 |                             !std::is_copy_assignable<T>::value) &&
104 |                            std::is_trivially_destructible<T>::value);
105 | }
106 | 
107 | }  // namespace sanitizer_impl
108 | 
109 | template <typename T>
110 | inline void AsanPoison(const T& mem) {
111 |   rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size());
112 | }
113 | 
114 | template <typename T>
115 | inline void AsanUnpoison(const T& mem) {
116 |   rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size());
117 | }
118 | 
119 | template <typename T>
120 | inline void MsanMarkUninitialized(const T& mem) {
121 |   rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
122 | }
123 | 
124 | template <typename T>
125 | inline T MsanUninitialized(T t) {
126 | #if RTC_HAS_MSAN
127 |   // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it
128 |   // becomes available in downstream projects.
129 |   static_assert(sanitizer_impl::IsTriviallyCopyable<T>(), "");
130 | #endif
131 |   rtc_MsanMarkUninitialized(&t, sizeof(T), 1);
132 |   return t;
133 | }
134 | 
135 | template <typename T>
136 | inline void MsanCheckInitialized(const T& mem) {
137 |   rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
138 | }
139 | 
140 | }  // namespace rtc
141 | 
142 | #endif  // __cplusplus
143 | 
144 | #endif  // RTC_BASE_SANITIZER_H_
145 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/system/arch.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // This file contains platform-specific typedefs and defines.
12 | // Much of it is derived from Chromium's build/build_config.h.
13 | 
14 | #ifndef RTC_BASE_SYSTEM_ARCH_H_
15 | #define RTC_BASE_SYSTEM_ARCH_H_
16 | 
17 | // Processor architecture detection.  For more info on what's defined, see:
18 | //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
19 | //   http://www.agner.org/optimize/calling_conventions.pdf
20 | //   or with gcc, run: "echo | gcc -E -dM -"
21 | #if defined(_M_X64) || defined(__x86_64__)
22 | #define WEBRTC_ARCH_X86_FAMILY
23 | #define WEBRTC_ARCH_X86_64
24 | #define WEBRTC_ARCH_64_BITS
25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
26 | #elif defined(__aarch64__)
27 | #define WEBRTC_ARCH_ARM_FAMILY
28 | #define WEBRTC_ARCH_64_BITS
29 | #define WEBRTC_ARCH_LITTLE_ENDIAN
30 | #elif defined(_M_IX86) || defined(__i386__)
31 | #define WEBRTC_ARCH_X86_FAMILY
32 | #define WEBRTC_ARCH_X86
33 | #define WEBRTC_ARCH_32_BITS
34 | #define WEBRTC_ARCH_LITTLE_ENDIAN
35 | #elif defined(__ARMEL__)
36 | #define WEBRTC_ARCH_ARM_FAMILY
37 | #define WEBRTC_ARCH_32_BITS
38 | #define WEBRTC_ARCH_LITTLE_ENDIAN
39 | #elif defined(__MIPSEL__)
40 | #define WEBRTC_ARCH_MIPS_FAMILY
41 | #if defined(__LP64__)
42 | #define WEBRTC_ARCH_64_BITS
43 | #else
44 | #define WEBRTC_ARCH_32_BITS
45 | #endif
46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
47 | #elif defined(__pnacl__)
48 | #define WEBRTC_ARCH_32_BITS
49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
50 | #else
51 | #error Please add support for your architecture in typedefs.h
52 | #endif
53 | 
54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
56 | #endif
57 | 
58 | #endif  // RTC_BASE_SYSTEM_ARCH_H_
59 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/system/inline.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef RTC_BASE_SYSTEM_INLINE_H_
12 | #define RTC_BASE_SYSTEM_INLINE_H_
13 | 
14 | #if defined(_MSC_VER)
15 | 
16 | #define RTC_FORCE_INLINE __forceinline
17 | #define RTC_NO_INLINE __declspec(noinline)
18 | 
19 | #elif defined(__GNUC__)
20 | 
21 | #define RTC_FORCE_INLINE __attribute__((__always_inline__))
22 | #define RTC_NO_INLINE __attribute__((__noinline__))
23 | 
24 | #else
25 | 
26 | #define RTC_FORCE_INLINE
27 | #define RTC_NO_INLINE
28 | 
29 | #endif
30 | 
31 | #endif  // RTC_BASE_SYSTEM_INLINE_H_
32 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/type_traits.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #ifndef RTC_BASE_TYPE_TRAITS_H_
 12 | #define RTC_BASE_TYPE_TRAITS_H_
 13 | 
 14 | #include <cstddef>
 15 | #include <type_traits>
 16 | 
 17 | namespace rtc {
 18 | 
 19 | // Determines if the given class has zero-argument .data() and .size() methods
 20 | // whose return values are convertible to T* and size_t, respectively.
 21 | template <typename DS, typename T>
 22 | class HasDataAndSize {
 23 |  private:
 24 |   template <
 25 |       typename C,
 26 |       typename std::enable_if<
 27 |           std::is_convertible<decltype(std::declval<C>().data()), T*>::value &&
 28 |           std::is_convertible<decltype(std::declval<C>().size()),
 29 |                               std::size_t>::value>::type* = nullptr>
 30 |   static int Test(int);
 31 | 
 32 |   template <typename>
 33 |   static char Test(...);
 34 | 
 35 |  public:
 36 |   static constexpr bool value = std::is_same<decltype(Test<DS>(0)), int>::value;
 37 | };
 38 | 
 39 | namespace test_has_data_and_size {
 40 | 
 41 | template <typename DR, typename SR>
 42 | struct Test1 {
 43 |   DR data();
 44 |   SR size();
 45 | };
 46 | static_assert(HasDataAndSize<Test1<int*, int>, int>::value, "");
 47 | static_assert(HasDataAndSize<Test1<int*, int>, const int>::value, "");
 48 | static_assert(HasDataAndSize<Test1<const int*, int>, const int>::value, "");
 49 | static_assert(!HasDataAndSize<Test1<const int*, int>, int>::value,
 50 |               "implicit cast of const int* to int*");
 51 | static_assert(!HasDataAndSize<Test1<char*, size_t>, int>::value,
 52 |               "implicit cast of char* to int*");
 53 | 
 54 | struct Test2 {
 55 |   int* data;
 56 |   size_t size;
 57 | };
 58 | static_assert(!HasDataAndSize<Test2, int>::value,
 59 |               ".data and .size aren't functions");
 60 | 
 61 | struct Test3 {
 62 |   int* data();
 63 | };
 64 | static_assert(!HasDataAndSize<Test3, int>::value, ".size() is missing");
 65 | 
 66 | class Test4 {
 67 |   int* data();
 68 |   size_t size();
 69 | };
 70 | static_assert(!HasDataAndSize<Test4, int>::value,
 71 |               ".data() and .size() are private");
 72 | 
 73 | }  // namespace test_has_data_and_size
 74 | 
 75 | namespace type_traits_impl {
 76 | 
 77 | // Determines if the given type is an enum that converts implicitly to
 78 | // an integral type.
 79 | template <typename T>
 80 | struct IsIntEnum {
 81 |  private:
 82 |   // This overload is used if the type is an enum, and unary plus
 83 |   // compiles and turns it into an integral type.
 84 |   template <typename X,
 85 |             typename std::enable_if<
 86 |                 std::is_enum<X>::value &&
 87 |                 std::is_integral<decltype(+std::declval<X>())>::value>::type* =
 88 |                 nullptr>
 89 |   static int Test(int);
 90 | 
 91 |   // Otherwise, this overload is used.
 92 |   template <typename>
 93 |   static char Test(...);
 94 | 
 95 |  public:
 96 |   static constexpr bool value =
 97 |       std::is_same<decltype(Test<typename std::remove_reference<T>::type>(0)),
 98 |                    int>::value;
 99 | };
100 | 
101 | }  // namespace type_traits_impl
102 | 
103 | // Determines if the given type is integral, or an enum that
104 | // converts implicitly to an integral type.
105 | template <typename T>
106 | struct IsIntlike {
107 |  private:
108 |   using X = typename std::remove_reference<T>::type;
109 | 
110 |  public:
111 |   static constexpr bool value =
112 |       std::is_integral<X>::value || type_traits_impl::IsIntEnum<X>::value;
113 | };
114 | 
115 | namespace test_enum_intlike {
116 | 
117 | enum E1 { e1 };
118 | enum { e2 };
119 | enum class E3 { e3 };
120 | struct S {};
121 | 
122 | static_assert(type_traits_impl::IsIntEnum<E1>::value, "");
123 | static_assert(type_traits_impl::IsIntEnum<decltype(e2)>::value, "");
124 | static_assert(!type_traits_impl::IsIntEnum<E3>::value, "");
125 | static_assert(!type_traits_impl::IsIntEnum<int>::value, "");
126 | static_assert(!type_traits_impl::IsIntEnum<float>::value, "");
127 | static_assert(!type_traits_impl::IsIntEnum<S>::value, "");
128 | 
129 | static_assert(IsIntlike<E1>::value, "");
130 | static_assert(IsIntlike<decltype(e2)>::value, "");
131 | static_assert(!IsIntlike<E3>::value, "");
132 | static_assert(IsIntlike<int>::value, "");
133 | static_assert(!IsIntlike<float>::value, "");
134 | static_assert(!IsIntlike<S>::value, "");
135 | 
136 | }  // namespace test_enum_intlike
137 | 
138 | }  // namespace rtc
139 | 
140 | #endif  // RTC_BASE_TYPE_TRAITS_H_
141 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/system_wrappers/include/cpu_features_wrapper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
12 | #define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
13 | 
14 | #include <stdint.h>
15 | 
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 | 
20 | // List of features in x86.
21 | typedef enum { kSSE2, kSSE3 } CPUFeature;
22 | 
23 | // List of features in ARM.
24 | enum {
25 |   kCPUFeatureARMv7 = (1 << 0),
26 |   kCPUFeatureVFPv3 = (1 << 1),
27 |   kCPUFeatureNEON = (1 << 2),
28 |   kCPUFeatureLDREXSTREX = (1 << 3)
29 | };
30 | 
31 | typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
32 | 
33 | // Returns true if the CPU supports the feature.
34 | extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
35 | 
36 | // No CPU feature is available => straight C path.
37 | extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
38 | 
39 | // Return the features in an ARM device.
40 | // It detects the features in the hardware platform, and returns supported
41 | // values in the above enum definition as a bitmask.
42 | extern uint64_t WebRtc_GetCPUFeaturesARM(void);
43 | 
44 | #if defined(__cplusplus) || defined(c_plusplus)
45 | }  // extern "C"
46 | #endif
47 | 
48 | #endif  // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
49 | 


--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/typedefs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // This file contains platform-specific typedefs and defines.
 12 | // Much of it is derived from Chromium's build/build_config.h.
 13 | 
 14 | #ifndef WEBRTC_TYPEDEFS_H_
 15 | #define WEBRTC_TYPEDEFS_H_
 16 | 
 17 | // Processor architecture detection.  For more info on what's defined, see:
 18 | //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
 19 | //   http://www.agner.org/optimize/calling_conventions.pdf
 20 | //   or with gcc, run: "echo | gcc -E -dM -"
 21 | #if defined(_M_X64) || defined(__x86_64__)
 22 | #define WEBRTC_ARCH_X86_FAMILY
 23 | #define WEBRTC_ARCH_X86_64
 24 | #define WEBRTC_ARCH_64_BITS
 25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 26 | #elif defined(__aarch64__)
 27 | #define WEBRTC_ARCH_64_BITS
 28 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 29 | #elif defined(_M_IX86) || defined(__i386__)
 30 | #define WEBRTC_ARCH_X86_FAMILY
 31 | #define WEBRTC_ARCH_X86
 32 | #define WEBRTC_ARCH_32_BITS
 33 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 34 | #elif defined(__ARMEL__)
 35 | // TODO(ajm): We'd prefer to control platform defines here, but this is
 36 | // currently provided by the Android makefiles. Commented to avoid duplicate
 37 | // definition warnings.
 38 | //#define WEBRTC_ARCH_ARM
 39 | // TODO(ajm): Chromium uses the following two defines. Should we switch?
 40 | //#define WEBRTC_ARCH_ARM_FAMILY
 41 | //#define WEBRTC_ARCH_ARMEL
 42 | #define WEBRTC_ARCH_32_BITS
 43 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 44 | #elif defined(__MIPSEL__)
 45 | #define WEBRTC_ARCH_32_BITS
 46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 47 | #elif defined(__pnacl__)
 48 | #define WEBRTC_ARCH_32_BITS
 49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 50 | #elif defined(__PPC__)
 51 | #if defined(__PPC64__)
 52 | #define WEBRTC_ARCH_64_BITS
 53 | #else
 54 | #define WEBRTC_ARCH_32_BITS
 55 | #endif
 56 | #define WEBRTC_ARCH_BIG_ENDIAN
 57 | #else
 58 | #error Please add support for your architecture in typedefs.h
 59 | #endif
 60 | 
 61 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
 62 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
 63 | #endif
 64 | 
 65 | #if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) ||  \
 66 |     (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
 67 | #define WEBRTC_CPU_DETECTION
 68 | #endif
 69 | 
 70 | #if !defined(_MSC_VER)
 71 | #include <stdint.h>
 72 | #else
 73 | // Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
 74 | typedef signed char         int8_t;
 75 | typedef signed short        int16_t;
 76 | typedef signed int          int32_t;
 77 | typedef __int64             int64_t;
 78 | typedef unsigned char       uint8_t;
 79 | typedef unsigned short      uint16_t;
 80 | typedef unsigned int        uint32_t;
 81 | typedef unsigned __int64    uint64_t;
 82 | #endif
 83 | 
 84 | // Borrowed from Chromium's base/compiler_specific.h.
 85 | // Annotate a virtual method indicating it must be overriding a virtual
 86 | // method in the parent class.
 87 | // Use like:
 88 | //   virtual void foo() OVERRIDE;
 89 | #if defined(_MSC_VER)
 90 | #define OVERRIDE override
 91 | #elif defined(__clang__)
 92 | // Clang defaults to C++03 and warns about using override. Squelch that.
 93 | // Intentionally no push/pop here so all users of OVERRIDE ignore the warning
 94 | // too. This is like passing -Wno-c++11-extensions, except that GCC won't die
 95 | // (because it won't see this pragma).
 96 | #pragma clang diagnostic ignored "-Wc++11-extensions"
 97 | #define OVERRIDE override
 98 | #elif defined(__GNUC__) && __cplusplus >= 201103 && \
 99 |     (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700
100 | // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled.
101 | #define OVERRIDE override
102 | #else
103 | #define OVERRIDE
104 | #endif
105 | 
106 | // Annotate a function indicating the caller must examine the return value.
107 | // Use like:
108 | //   int foo() WARN_UNUSED_RESULT;
109 | // TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
110 | // libjingle are merged.
111 | #if !defined(WARN_UNUSED_RESULT)
112 | #if defined(__GNUC__)
113 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
114 | #else
115 | #define WARN_UNUSED_RESULT
116 | #endif
117 | #endif  // WARN_UNUSED_RESULT
118 | 
119 | // Put after a variable that might not be used, to prevent compiler warnings:
120 | //   int result ATTRIBUTE_UNUSED = DoSomething();
121 | //   assert(result == 17);
122 | #ifndef ATTRIBUTE_UNUSED
123 | #if defined(__GNUC__) || defined(__clang__)
124 | #define ATTRIBUTE_UNUSED __attribute__((unused))
125 | #else
126 | #define ATTRIBUTE_UNUSED
127 | #endif
128 | #endif
129 | 
130 | // Macro to be used for switch-case fallthrough (required for enabling
131 | // -Wimplicit-fallthrough warning on Clang).
132 | #ifndef FALLTHROUGH
133 | #if defined(__clang__)
134 | #define FALLTHROUGH() [[clang::fallthrough]]
135 | #else
136 | #define FALLTHROUGH() do { } while (0)
137 | #endif
138 | #endif
139 | 
140 | // Annotate a function that will not return control flow to the caller.
141 | #if defined(_MSC_VER)
142 | #define NO_RETURN __declspec(noreturn)
143 | #elif defined(__GNUC__)
144 | #define NO_RETURN __attribute__((noreturn))
145 | #else
146 | #define NO_RETURN
147 | #endif
148 | 
149 | #endif  // WEBRTC_TYPEDEFS_H_
150 | 


--------------------------------------------------------------------------------
/cpp_onnx/wave/asr_example.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/asr_example.wav


--------------------------------------------------------------------------------
/cpp_onnx/wave/long.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/long.wav


--------------------------------------------------------------------------------
/cpp_onnx/wave/short.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/short.wav


--------------------------------------------------------------------------------
/cpp_onnx/wave/test.pcm.bytes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.bytes


--------------------------------------------------------------------------------
/cpp_onnx/wave/test.pcm.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.wav


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3f-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3f-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3l-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3l-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/onnxruntime.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/onnxruntime.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3f-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3f-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3l-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3l-3.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/onnxruntime.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/onnxruntime.dll


--------------------------------------------------------------------------------
/cpp_onnx/win/images/sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/images/sample.png


--------------------------------------------------------------------------------
/cpp_onnx/win/include/cpu_provider_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #include "onnxruntime_c_api.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | /**
11 |  * \param use_arena zero: false. non-zero: true.
12 |  */
13 | ORT_EXPORT
14 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena)
15 | ORT_ALL_ARGS_NONNULL;
16 | 
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 | 


--------------------------------------------------------------------------------
/cpp_onnx/win/include/onnxruntime_run_options_config_keys.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #pragma once
 5 | 
 6 | /*
 7 |  * This file defines RunOptions Config Keys and format of the Config Values.
 8 |  *
 9 |  * The Naming Convention for a RunOptions Config Key,
10 |  * "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
11 |  * Such as "ep.cuda.use_arena"
12 |  * The Config Key cannot be empty
13 |  * The maximum length of the Config Key is 128
14 |  *
15 |  * The string format of a RunOptions Config Value is defined individually for each Config.
16 |  * The maximum length of the Config Value is 1024
17 |  */
18 | 
19 | // Key for enabling shrinkages of user listed device memory arenas.
20 | // Expects a list of semi-colon separated key value pairs separated by colon in the following format:
21 | // "device_0:device_id_0;device_1:device_id_1"
22 | // No white-spaces allowed in the provided list string.
23 | // Currently, the only supported devices are : "cpu", "gpu" (case sensitive).
24 | // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled.
25 | // Example usage: "cpu:0;gpu:0" (or) "gpu:0"
26 | // By default, the value for this key is empty (i.e.) no memory arenas are shrunk
27 | static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage";
28 | 
29 | // Set to '1' to not synchronize execution providers with CPU at the end of session run.
30 | // Per default it will be set to '0'
31 | // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
32 | static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
33 | 


--------------------------------------------------------------------------------
/cpp_onnx/win/include/provider_options.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #pragma once
 5 | 
 6 | #include <string>
 7 | #include <unordered_map>
 8 | #include <vector>
 9 | 
10 | namespace onnxruntime {
11 | 
12 | // data types for execution provider options
13 | 
14 | using ProviderOptions = std::unordered_map<std::string, std::string>;
15 | using ProviderOptionsVector = std::vector<ProviderOptions>;
16 | using ProviderOptionsMap = std::unordered_map<std::string, ProviderOptions>;
17 | 
18 | }  // namespace onnxruntime
19 | 


--------------------------------------------------------------------------------
/cpp_onnx/win/include/tensorrt_provider_factory.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Microsoft Corporation. All rights reserved.
 2 | // Licensed under the MIT License.
 3 | 
 4 | #include "onnxruntime_c_api.h"
 5 | 
 6 | #ifdef __cplusplus
 7 | extern "C" {
 8 | #endif
 9 | 
10 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
11 | 
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 | 


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3f-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3f-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3l-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3l-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/onnxruntime.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/onnxruntime.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3f-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3f-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3l-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.exp


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3l-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/onnxruntime.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/onnxruntime.lib


--------------------------------------------------------------------------------
/cpp_onnx/win/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/python/.gitattributes:
--------------------------------------------------------------------------------
 1 | *.7z filter=lfs diff=lfs merge=lfs -text
 2 | *.arrow filter=lfs diff=lfs merge=lfs -text
 3 | *.bin filter=lfs diff=lfs merge=lfs -text
 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
 6 | *.ftz filter=lfs diff=lfs merge=lfs -text
 7 | *.gz filter=lfs diff=lfs merge=lfs -text
 8 | *.h5 filter=lfs diff=lfs merge=lfs -text
 9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tar filter=lfs diff=lfs merge=lfs -text
29 | *.tflite filter=lfs diff=lfs merge=lfs -text
30 | *.tgz filter=lfs diff=lfs merge=lfs -text
31 | *.wasm filter=lfs diff=lfs merge=lfs -text
32 | *.xz filter=lfs diff=lfs merge=lfs -text
33 | *.zip filter=lfs diff=lfs merge=lfs -text
34 | *.zst filter=lfs diff=lfs merge=lfs -text
35 | *tfevents* filter=lfs diff=lfs merge=lfs -text
36 | 


--------------------------------------------------------------------------------
/python/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | - repo: https://gitee.com/SWHL/autoflake
 3 |   rev: v2.1.1
 4 |   hooks:
 5 |     - id: autoflake
 6 |       args:
 7 |         [
 8 |           "--recursive",
 9 |           "--in-place",
10 |           "--remove-all-unused-imports",
11 |           "--remove-unused-variable",
12 |           "--ignore-init-module-imports",
13 |         ]
14 |       files: \.py$
15 | - repo: https://gitee.com/SWHL/black
16 |   rev: 23.1.0
17 |   hooks:
18 |     - id: black
19 |       files: \.py$


--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | <div align="center">
 3 |     <h1><b>Rapid Paraformer</b></h1>
 4 | </div>
 5 | 
 6 | <a href=""><img src="https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg"></a>
 7 | <a href=""><img src="https://img.shields.io/badge/Python->=3.6,<3.13-aff.svg"></a>
 8 | <a href="https://pepy.tech/project/rapid_paraformer"><img src="https://static.pepy.tech/personalized-badge/rapid_paraformer?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads"></a>
 9 | <a href="https://pypi.org/project/rapid_paraformer/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rapid_paraformer"></a>
10 | <a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
11 | <a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
12 | </div>
13 | 
14 | ## 简介
15 | rapid_paraformer是一个基于阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)的语音识别工具。
16 | 
17 | 🎉该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR)
18 | 
19 | 本仓库仅对模型做了转换，只采用ONNXRuntime推理引擎
20 | 
21 | ## TODO
22 | - [ ] 整合vad + asr + pun三个模型，打造可部署使用的方案
23 | 
24 | ## 安装
25 | ```bash
26 | pip install rapid_paraformer
27 | ```
28 | 
29 | ## 模型下载
30 | 方法一：从Hugging Face上下载（[link](https://huggingface.co/SWHL/RapidParaformer)）
31 | ```python
32 | from rapid_paraformer import download_hf_model
33 | 
34 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".")
35 | ```
36 | 
37 | 方法二：([Google Drive](https://drive.google.com/drive/folders/1RVQtMe0eB_k6G5TJlmXwPELx4VtF2oCw?usp=sharing) | [百度网盘](https://pan.baidu.com/s/1zf8Ta6QxFHY3Z75fHNYKrQ?pwd=6ekq))
38 | ```bash
39 | resources
40 | ├── [ 700]  config.yaml
41 | └── [4.0K]  models
42 |     ├── [ 11K]  am.mvn
43 |     ├── [824M]  asr_paraformerv2.onnx
44 |     └── [ 50K]  token_list.pkl
45 | ```
46 | 
47 | ## 模型转换
48 | 基于modescope下的notebook环境自助转换：
49 | 1. 打开[快速体验](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)
50 | 2. 打开notebook → Cell中输入以下命令, 执行即可。
51 |     ```bash
52 |     !python -m funasr.export.export_model --model-name 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' --export-dir "./export"
53 |     ```
54 | 
55 | ## 使用
56 | ```python
57 | from rapid_paraformer import RapidParaformer
58 | 
59 | config_path = "resources/config.yaml"
60 | 
61 | paraformer = RapidParaformer(config_path)
62 | 
63 | wav_path = [
64 |     "test_wavs/0478_00017.wav",
65 |     "test_wavs/asr_example_zh.wav",
66 | ]
67 | 
68 | result = paraformer(wav_path)
69 | print(result)
70 | # ['y', '欢迎大家来体验达摩院推出的语音识别模型']
71 | ```
72 | 


--------------------------------------------------------------------------------
/python/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | from rapid_paraformer import RapidParaformer, download_hf_model
 5 | 
 6 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".")
 7 | 
 8 | config_path = "resources/config.yaml"
 9 | 
10 | paraformer = RapidParaformer(config_path)
11 | 
12 | wav_path = [
13 |     "test_wavs/0478_00017.wav",
14 |     "test_wavs/asr_example_zh.wav",
15 | ]
16 | 
17 | print(wav_path)
18 | result = paraformer(wav_path)
19 | print(result)
20 | 


--------------------------------------------------------------------------------
/python/docs/doc_whl.md:
--------------------------------------------------------------------------------
1 | See [link](https://github.com/RapidAI/RapidASR/tree/main/python) for details.


--------------------------------------------------------------------------------
/python/rapid_paraformer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidParaformer
5 | from .utils import download_hf_model
6 | 


--------------------------------------------------------------------------------
/python/rapid_paraformer/kaldifeat/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | from .feature import compute_fbank_feats, compute_mfcc_feats, apply_cmvn_sliding
3 | from .ivector import compute_vad
4 | 


--------------------------------------------------------------------------------
/python/rapid_paraformer/kaldifeat/ivector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .feature import sliding_window
 4 | 
 5 | 
 6 | # ---------- compute-vad ----------
 7 | 
 8 | def compute_vad(log_energy, energy_mean_scale=0.5, energy_threshold=0.5, frames_context=0, proportion_threshold=0.6):
 9 |     """ Apply voice activity detection
10 | 
11 |     :param log_energy: Log mel energy.
12 |     :param energy_mean_scale: If this is set to s, to get the actual threshold we let m be the mean log-energy of the file, and use s*m + vad-energy-threshold (float, default = 0.5)
13 |     :param energy_threshold: Constant term in energy threshold for VAD (also see energy_mean_scale) (float, default = 5)
14 |     :param frames_context: Number of frames of context on each side of central frame, in window for which energy is monitored (int, default = 0)
15 |     :param proportion_threshold: Parameter controlling the proportion of frames within the window that need to have more energy than the threshold (float, default = 0.6)
16 |     :return: A vector of boolean that are True if we judge the frame voiced and False otherwise.
17 |     """
18 |     assert len(log_energy.shape) == 1
19 |     assert energy_mean_scale >= 0
20 |     assert frames_context >= 0
21 |     assert 0 < proportion_threshold < 1
22 |     dtype = log_energy.dtype
23 |     energy_threshold += energy_mean_scale * log_energy.mean()
24 |     if frames_context > 0:
25 |         num_frames = len(log_energy)
26 |         window_size = frames_context * 2 + 1
27 |         log_energy_pad = np.concatenate([
28 |             np.zeros(frames_context, dtype=dtype),
29 |             log_energy,
30 |             np.zeros(frames_context, dtype=dtype)
31 |         ])
32 |         log_energy_window = sliding_window(log_energy_pad, window_size, 1)
33 |         num_count = np.count_nonzero(log_energy_window > energy_threshold, axis=1)
34 |         den_count = np.ones(num_frames, dtype=dtype) * window_size
35 |         max_den_count = np.arange(frames_context + 1, min(window_size, num_frames) + 1, dtype=dtype)
36 |         den_count[:-(frames_context + 2):-1] = max_den_count
37 |         den_count[:frames_context + 1] = np.min([den_count[:frames_context + 1], max_den_count], axis=0)
38 |         vad = num_count / den_count >= proportion_threshold
39 |     else:
40 |         vad = log_energy > energy_threshold
41 |     return vad
42 | 
43 | # ---------- compute-vad ----------
44 | 


--------------------------------------------------------------------------------
/python/rapid_paraformer/main.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | # @Author: SWHL
  3 | # @Contact: liekkaskono@163.com
  4 | from pathlib import Path
  5 | from typing import List, Tuple, Union
  6 | 
  7 | import librosa
  8 | import numpy as np
  9 | 
 10 | from .utils import (
 11 |     CharTokenizer,
 12 |     Hypothesis,
 13 |     ONNXRuntimeError,
 14 |     OrtInferSession,
 15 |     TokenIDConverter,
 16 |     WavFrontend,
 17 |     get_logger,
 18 |     read_yaml,
 19 | )
 20 | 
 21 | logging = get_logger()
 22 | 
 23 | 
 24 | class RapidParaformer:
 25 |     def __init__(self, config_path: Union[str, Path]) -> None:
 26 |         if not Path(config_path).exists():
 27 |             raise FileNotFoundError(f"{config_path} does not exist.")
 28 | 
 29 |         config = read_yaml(config_path)
 30 | 
 31 |         self.converter = TokenIDConverter(**config["TokenIDConverter"])
 32 |         self.tokenizer = CharTokenizer(**config["CharTokenizer"])
 33 |         self.frontend = WavFrontend(
 34 |             cmvn_file=config["WavFrontend"]["cmvn_file"],
 35 |             **config["WavFrontend"]["frontend_conf"],
 36 |         )
 37 |         self.ort_infer = OrtInferSession(config["Model"])
 38 |         self.batch_size = config["Model"]["batch_size"]
 39 | 
 40 |     def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
 41 |         waveform_list = self.load_data(wav_content)
 42 |         waveform_nums = len(waveform_list)
 43 | 
 44 |         asr_res = []
 45 |         for beg_idx in range(0, waveform_nums, self.batch_size):
 46 |             end_idx = min(waveform_nums, beg_idx + self.batch_size)
 47 | 
 48 |             feats, feats_len = self.extract_feat(waveform_list[beg_idx:end_idx])
 49 | 
 50 |             try:
 51 |                 am_scores, valid_token_lens = self.infer(feats, feats_len)
 52 |             except ONNXRuntimeError:
 53 |                 logging.warning("input wav is silence or noise")
 54 |                 preds = []
 55 |             else:
 56 |                 preds = self.decode(am_scores, valid_token_lens)
 57 | 
 58 |             asr_res.extend(preds)
 59 |         return asr_res
 60 | 
 61 |     def load_data(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
 62 |         def load_wav(path: str) -> np.ndarray:
 63 |             waveform, _ = librosa.load(path, sr=None)
 64 |             return waveform[None, ...]
 65 | 
 66 |         if isinstance(wav_content, np.ndarray):
 67 |             return [wav_content]
 68 | 
 69 |         if isinstance(wav_content, str):
 70 |             return [load_wav(wav_content)]
 71 | 
 72 |         if isinstance(wav_content, list):
 73 |             return [load_wav(path) for path in wav_content]
 74 | 
 75 |         raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]")
 76 | 
 77 |     def extract_feat(
 78 |         self, waveform_list: List[np.ndarray]
 79 |     ) -> Tuple[np.ndarray, np.ndarray]:
 80 |         feats, feats_len = [], []
 81 |         for waveform in waveform_list:
 82 |             speech, _ = self.frontend.fbank(waveform)
 83 |             feat, feat_len = self.frontend.lfr_cmvn(speech)
 84 |             feats.append(feat)
 85 |             feats_len.append(feat_len)
 86 | 
 87 |         feats = self.pad_feats(feats, np.max(feats_len))
 88 |         feats_len = np.array(feats_len).astype(np.int32)
 89 |         return feats, feats_len
 90 | 
 91 |     @staticmethod
 92 |     def pad_feats(feats: List[np.ndarray], max_feat_len: int) -> np.ndarray:
 93 |         def pad_feat(feat: np.ndarray, cur_len: int) -> np.ndarray:
 94 |             pad_width = ((0, max_feat_len - cur_len), (0, 0))
 95 |             return np.pad(feat, pad_width, "constant", constant_values=0)
 96 | 
 97 |         feat_res = [pad_feat(feat, feat.shape[0]) for feat in feats]
 98 |         feats = np.array(feat_res).astype(np.float32)
 99 |         return feats
100 | 
101 |     def infer(
102 |         self, feats: np.ndarray, feats_len: np.ndarray
103 |     ) -> Tuple[np.ndarray, np.ndarray]:
104 |         am_scores, token_nums = self.ort_infer([feats, feats_len])
105 |         return am_scores, token_nums
106 | 
107 |     def decode(self, am_scores: np.ndarray, token_nums: int) -> List[str]:
108 |         return [
109 |             self.decode_one(am_score, token_num)
110 |             for am_score, token_num in zip(am_scores, token_nums)
111 |         ]
112 | 
113 |     def decode_one(self, am_score: np.ndarray, valid_token_num: int) -> List[str]:
114 |         yseq = am_score.argmax(axis=-1)
115 |         score = am_score.max(axis=-1)
116 |         score = np.sum(score, axis=-1)
117 | 
118 |         # pad with mask tokens to ensure compatibility with sos/eos tokens
119 |         # asr_model.sos:1  asr_model.eos:2
120 |         yseq = np.array([1] + yseq.tolist() + [2])
121 |         hyp = Hypothesis(yseq=yseq, score=score)
122 | 
123 |         # remove sos/eos and get results
124 |         last_pos = -1
125 |         token_int = hyp.yseq[1:last_pos].tolist()
126 | 
127 |         # remove blank symbol id, which is assumed to be 0
128 |         token_int = list(filter(lambda x: x not in (0, 2), token_int))
129 | 
130 |         # Change integer-ids to tokens
131 |         token = self.converter.ids2tokens(token_int)
132 |         text = self.tokenizer.tokens2text(token)
133 |         return text[: valid_token_num - 1]
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     project_dir = Path(__file__).resolve().parent.parent
138 |     cfg_path = project_dir / "resources" / "config.yaml"
139 |     paraformer = RapidParaformer(cfg_path)
140 | 
141 |     wav_file = "0478_00017.wav"
142 |     for i in range(1000):
143 |         result = paraformer(wav_file)
144 |         print(result)
145 | 


--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | librosa
2 | numpy
3 | onnxruntime
4 | typeguard==2.13.3
5 | huggingface_hub


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import sys
 5 | from pathlib import Path
 6 | from typing import List, Union
 7 | 
 8 | import setuptools
 9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 | 
11 | 
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 |     with open(txt_path, "r", encoding="utf-8") as f:
14 |         data = [v.rstrip("\n") for v in f]
15 |     return data
16 | 
17 | 
18 | def get_readme() -> str:
19 |     root_dir = Path(__file__).resolve().parent
20 |     readme_path = str(root_dir / "docs" / "doc_whl.md")
21 |     with open(readme_path, "r", encoding="utf-8") as f:
22 |         readme = f.read()
23 |     return readme
24 | 
25 | 
26 | MODULE_NAME = "rapid_paraformer"
27 | 
28 | obtainer = GetPyPiLatestVersion()
29 | try:
30 |     latest_version = obtainer(MODULE_NAME)
31 | except ValueError:
32 |     latest_version = "0.0.1"
33 | 
34 | VERSION_NUM = obtainer.version_add_one(latest_version)
35 | 
36 | if len(sys.argv) > 2:
37 |     match_str = " ".join(sys.argv[2:])
38 |     matched_versions = obtainer.extract_version(match_str)
39 |     if matched_versions:
40 |         VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 | 
43 | setuptools.setup(
44 |     name=MODULE_NAME,
45 |     version=VERSION_NUM,
46 |     platforms="Any",
47 |     description="Tool of speech recognition.",
48 |     long_description=get_readme(),
49 |     long_description_content_type="text/markdown",
50 |     author="SWHL",
51 |     author_email="liekkaskono@163.com",
52 |     url="https://github.com/RapidAI/RapidASR",
53 |     license="Apache-2.0",
54 |     include_package_data=True,
55 |     install_requires=read_txt("requirements.txt"),
56 |     packages=[MODULE_NAME, f"{MODULE_NAME}/kaldifeat"],
57 |     package_data={"": ["*.md", "LICENSE"]},
58 |     keywords=["asr,paraformer,wenet"],
59 |     classifiers=[
60 |         "Programming Language :: Python :: 3.6",
61 |         "Programming Language :: Python :: 3.7",
62 |         "Programming Language :: Python :: 3.8",
63 |         "Programming Language :: Python :: 3.9",
64 |         "Programming Language :: Python :: 3.10",
65 |         "Programming Language :: Python :: 3.11",
66 |         "Programming Language :: Python :: 3.12",
67 |     ],
68 |     python_requires=">=3.6,<3.13",
69 |     entry_points={
70 |         "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 |     },
72 | )
73 | 


--------------------------------------------------------------------------------
/python/test_wavs/0478_00017.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/0478_00017.wav


--------------------------------------------------------------------------------
/python/test_wavs/asr_example_zh.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/asr_example_zh.wav


--------------------------------------------------------------------------------
/python/tests/test_infer.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | # @Author: SWHL
 3 | # @Contact: liekkaskono@163.com
 4 | import os
 5 | from pathlib import Path
 6 | 
 7 | import pytest
 8 | import librosa
 9 | 
10 | project_dir = Path(__file__).resolve().parent.parent
11 | os.sys.path.append(str(project_dir))
12 | 
13 | from rapid_paraformer import RapidParaformer
14 | 
15 | 
16 | cfg_path = project_dir / 'resources' / 'config.yaml'
17 | paraformer = RapidParaformer(cfg_path)
18 | 
19 | 
20 | def test_input_by_path():
21 |     wav_file = 'test_wavs/0478_00017.wav'
22 |     result = paraformer(wav_file)
23 |     assert result[0][:5] == '呃说不配合'
24 | 
25 | 
26 | def test_input_by_ndarray():
27 |     wav_file = 'test_wavs/0478_00017.wav'
28 |     waveform, _ = librosa.load(wav_file)
29 |     result = paraformer(waveform[None, ...])
30 |     assert result[0][:5] == '呃说不配合'
31 | 
32 | 
33 | def test_input_by_str_list():
34 |     wave_list = [
35 |         'test_wavs/0478_00017.wav',
36 |         'test_wavs/asr_example_zh.wav',
37 |     ]
38 |     result = paraformer(wave_list)
39 |     assert result[0][:5] == '呃说不配合'
40 | 
41 | 
42 | def test_empty():
43 |     wav_file = None
44 |     with pytest.raises(TypeError) as exc_info:
45 |         paraformer(wav_file)
46 |         raise TypeError()
47 |     assert exc_info.type is TypeError
48 | 


--------------------------------------------------------------------------------