├── .github └── workflows │ └── gen_whl_to_pypi.yml ├── .gitignore ├── LICENSE ├── README.md ├── cpp_onnx ├── CMakeLists.txt ├── CMakeSettings.json ├── api.md ├── images │ ├── demo.png │ └── threadnum.png ├── include │ ├── Audio.h │ ├── ComDefine.h │ ├── Model.h │ ├── librapidasrapi.h │ ├── webrtc_vad.h │ └── win_func.h ├── models │ ├── readme.md │ └── vocab.txt ├── readme.md ├── src │ ├── Audio.cpp │ ├── CMakeLists.txt │ ├── CommonStruct.h │ ├── FeatureExtract.cpp │ ├── FeatureExtract.h │ ├── FeatureQueue.cpp │ ├── FeatureQueue.h │ ├── Model.cpp │ ├── SpeechWrap.cpp │ ├── SpeechWrap.h │ ├── Tensor.h │ ├── Vocab.cpp │ ├── Vocab.h │ ├── alignedmem.cpp │ ├── alignedmem.h │ ├── commonfunc.h │ ├── librapidasrapi.cpp │ ├── paraformer_onnx.cpp │ ├── paraformer_onnx.h │ ├── precomp.h │ ├── predefine_coe.h │ ├── tmp.h │ ├── util.cpp │ └── util.h ├── tester │ ├── CMakeLists.txt │ └── tester.cpp ├── third_party │ └── webrtc │ │ ├── CMakeLists.txt │ │ ├── common_audio │ │ ├── signal_processing │ │ │ ├── complex_bit_reverse.c │ │ │ ├── complex_fft.c │ │ │ ├── complex_fft_tables.h │ │ │ ├── cross_correlation.c │ │ │ ├── division_operations.c │ │ │ ├── dot_product_with_scale.cc │ │ │ ├── dot_product_with_scale.h │ │ │ ├── downsample_fast.c │ │ │ ├── energy.c │ │ │ ├── get_scaling_square.c │ │ │ ├── include │ │ │ │ ├── real_fft.h │ │ │ │ ├── signal_processing_library.h │ │ │ │ └── spl_inl.h │ │ │ ├── min_max_operations.c │ │ │ ├── resample_48khz.c │ │ │ ├── resample_by_2_internal.c │ │ │ ├── resample_by_2_internal.h │ │ │ ├── resample_fractional.c │ │ │ ├── spl_init.c │ │ │ ├── spl_inl.c │ │ │ ├── spl_sqrt.c │ │ │ └── vector_scaling_operations.c │ │ ├── third_party │ │ │ └── spl_sqrt_floor │ │ │ │ ├── spl_sqrt_floor.c │ │ │ │ └── spl_sqrt_floor.h │ │ └── vad │ │ │ ├── include │ │ │ └── webrtc_vad.h │ │ │ ├── vad_core.c │ │ │ ├── vad_core.h │ │ │ ├── vad_filterbank.c │ │ │ ├── vad_filterbank.h │ │ │ ├── vad_gmm.c │ │ │ ├── vad_gmm.h │ │ │ ├── vad_sp.c │ │ │ ├── vad_sp.h │ │ │ └── webrtc_vad.c │ │ ├── rtc_base │ │ ├── checks.cc │ │ ├── checks.h │ │ ├── compile_assert_c.h │ │ ├── numerics │ │ │ └── safe_compare.h │ │ ├── sanitizer.h │ │ ├── system │ │ │ ├── arch.h │ │ │ └── inline.h │ │ └── type_traits.h │ │ ├── system_wrappers │ │ └── include │ │ │ └── cpu_features_wrapper.h │ │ └── typedefs.h ├── wave │ ├── asr_example.wav │ ├── long.wav │ ├── short.wav │ ├── test.pcm.bytes │ └── test.pcm.wav └── win │ ├── bin │ ├── x64 │ │ ├── libfftw3-3.dll │ │ ├── libfftw3f-3.dll │ │ ├── libfftw3l-3.dll │ │ └── onnxruntime.dll │ └── x86 │ │ ├── libfftw3-3.dll │ │ ├── libfftw3f-3.dll │ │ ├── libfftw3l-3.dll │ │ └── onnxruntime.dll │ ├── images │ └── sample.png │ ├── include │ ├── cpu_provider_factory.h │ ├── fftw3.h │ ├── onnxruntime_c_api.h │ ├── onnxruntime_cxx_api.h │ ├── onnxruntime_cxx_inline.h │ ├── onnxruntime_run_options_config_keys.h │ ├── onnxruntime_session_options_config_keys.h │ ├── provider_options.h │ └── tensorrt_provider_factory.h │ ├── lib │ ├── x64 │ │ ├── libfftw3-3.def │ │ ├── libfftw3-3.exp │ │ ├── libfftw3-3.lib │ │ ├── libfftw3f-3.def │ │ ├── libfftw3f-3.exp │ │ ├── libfftw3f-3.lib │ │ ├── libfftw3l-3.def │ │ ├── libfftw3l-3.exp │ │ ├── libfftw3l-3.lib │ │ └── onnxruntime.lib │ └── x86 │ │ ├── libfftw3-3.def │ │ ├── libfftw3-3.exp │ │ ├── libfftw3-3.lib │ │ ├── libfftw3f-3.def │ │ ├── libfftw3f-3.exp │ │ ├── libfftw3f-3.lib │ │ ├── libfftw3l-3.def │ │ ├── libfftw3l-3.exp │ │ ├── libfftw3l-3.lib │ │ └── onnxruntime.lib │ └── readme.md └── python ├── .gitattributes ├── .pre-commit-config.yaml ├── README.md ├── demo.py ├── docs └── doc_whl.md ├── rapid_paraformer ├── __init__.py ├── kaldifeat │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── feature.py │ └── ivector.py ├── main.py └── utils.py ├── requirements.txt ├── setup.py ├── test_wavs ├── 0478_00017.wav └── asr_example_zh.wav └── tests └── test_infer.py /.github/workflows/gen_whl_to_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Push rapid_paraformer to pypi 2 | 3 | on: 4 | push: 5 | # branches: [ main ] 6 | # paths: 7 | # - 'python/rapid_paraformer/**' 8 | # - 'python/docs/doc_whl.md' 9 | # - 'python/setup.py' 10 | # - '.github/workflows/gen_whl_to_pypi.yml' 11 | tags: 12 | - v* 13 | 14 | # env: 15 | # RESOURCES_URL: https://github.com/RapidAI/RapidLatexOCR/releases/download/v0.0.0/models.zip 16 | 17 | jobs: 18 | # UnitTesting: 19 | # runs-on: ubuntu-latest 20 | # steps: 21 | # - name: Pull latest code 22 | # uses: actions/checkout@v3 23 | 24 | # - name: Set up Python 3.7 25 | # uses: actions/setup-python@v4 26 | # with: 27 | # python-version: '3.7' 28 | # architecture: 'x64' 29 | 30 | # - name: Display Python version 31 | # run: python -c "import sys; print(sys.version)" 32 | 33 | # - name: Download models 34 | # run: | 35 | # wget $RESOURCES_URL 36 | # ZIP_NAME=${RESOURCES_URL##*/} 37 | # DIR_NAME=${ZIP_NAME%.*} 38 | # unzip $ZIP_NAME 39 | 40 | # - name: Unit testings with rapid_latex_ocr 41 | # run: | 42 | # pip install -r requirements.txt 43 | # pip install pytest 44 | # pytest tests/test*.py 45 | 46 | GenerateWHL_PushPyPi: 47 | runs-on: ubuntu-latest 48 | 49 | steps: 50 | - uses: actions/checkout@v3 51 | 52 | - name: Set up Python 3.7 53 | uses: actions/setup-python@v4 54 | with: 55 | python-version: '3.7' 56 | architecture: 'x64' 57 | 58 | - name: Run setup.py 59 | run: | 60 | cd python 61 | pip install -r requirements.txt 62 | python -m pip install --upgrade pip 63 | pip install wheel get_pypi_latest_version 64 | python setup.py bdist_wheel ${{ github.ref_name }} 65 | 66 | # - name: Publish distribution 📦 to Test PyPI 67 | # uses: pypa/gh-action-pypi-publish@v1.5.0 68 | # with: 69 | # password: ${{ secrets.TEST_PYPI_API_TOKEN }} 70 | # repository_url: https://test.pypi.org/legacy/ 71 | # packages_dir: dist/ 72 | 73 | - name: Publish distribution 📦 to PyPI 74 | uses: pypa/gh-action-pypi-publish@v1.5.0 75 | with: 76 | password: ${{ secrets.PYPI_API_TOKEN }} 77 | packages_dir: python/dist/ 78 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.onnx 2 | *.json 3 | 4 | # Created by .ignore support plugin (hsz.mobi) 5 | ### Python template 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | .pytest_cache 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | # *.manifest 40 | # *.spec 41 | *.res 42 | 43 | # Installer logs 44 | pip-log.txt 45 | pip-delete-this-directory.txt 46 | 47 | # Unit test / coverage reports 48 | htmlcov/ 49 | .tox/ 50 | .nox/ 51 | .coverage 52 | .coverage.* 53 | .cache 54 | nosetests.xml 55 | coverage.xml 56 | *.cover 57 | *.py,cover 58 | .hypothesis/ 59 | .pytest_cache/ 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | db.sqlite3 69 | db.sqlite3-journal 70 | 71 | # Flask stuff: 72 | instance/ 73 | .webassets-cache 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # Jupyter Notebook 85 | .ipynb_checkpoints 86 | 87 | # IPython 88 | profile_default/ 89 | ipython_config.py 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | #idea 139 | .vs 140 | .vscode 141 | .idea 142 | /models 143 | 144 | #models 145 | 146 | *.ttf 147 | *.ttc 148 | 149 | 150 | *.bin 151 | *.mapping 152 | *.xml 153 | 154 | *.pdiparams 155 | *.pdiparams.info 156 | *.pdmodel 157 | 158 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 RapidAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Rapid ASR 2 |

3 | 4 | 5 | 6 | 7 | 8 |

9 | 10 | - 🎉 推出知识星球[RapidAI私享群](https://t.zsxq.com/0duLBZczw),这里的提问会优先得到回答和支持,也会享受到RapidAI组织后续持续优质的服务。欢迎大家的加入。 11 | - Paraformer模型出自阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)。 12 | - 本仓库仅对模型做了转换,只采用ONNXRuntime推理引擎。该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR)。 13 | - 项目仍会持续更新,欢迎关注。 14 | - QQ群号:645751008 15 | 16 | #### 📖文档导航 17 | - 语音识别: 18 | - rapid_paraformer: 19 | - [rapid_paraformer-Python](./python/README.md) 20 | - [rapid_C++/C](./cpp_onnx/readme.md) 21 | - [rapid_wenet](https://github.com/RapidAI/RapidASR/tree/rapid_wenet) 22 | - [Python](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/python) 23 | - [C++](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/cpp) 24 | - [rapid_paddlespeech-Python](https://github.com/RapidAI/RapidASR/tree/rapid_paddlespeech) 25 | - 标点符号 26 | - [RapidPunc](https://github.com/RapidAI/RapidPunc) 27 | 28 | #### 📆TODO以及任务认领 29 | - 参见这里:[link](https://github.com/RapidAI/RapidASR/issues/15) 30 | 31 | #### 🎨整体框架 32 | ```mermaid 33 | flowchart LR 34 | 35 | A([wav]) --RapidVad--> B([各个小段的音频]) --RapidASR--> C([识别的文本内容]) --RapidPunc--> D([最终识别内容]) 36 | ``` 37 | 38 | #### 📣更新日志 39 |
40 | 详情 41 | - 2023-08-21 v2.0.4 update: 42 | - 添加whl包支持 43 | - 更新文档 44 | - 2023-02-25 45 | - 添加C++版本推理,使用onnxruntime引擎,预/后处理代码来自: [FastASR](https://github.com/chenkui164/FastASR) 46 | - 2023-02-14 v2.0.3 update: 47 | - 修复librosa读取wav文件错误 48 | - 修复fbank与torch下fbank提取结果不一致bug 49 | - 2023-02-11 v2.0.2 update: 50 | - 模型和推理代码解耦(`rapid_paraformer`和`resources`) 51 | - 支持批量推理(通过`resources/config.yaml`中`batch_size`指定) 52 | - 增加多种输入方式(`Union[str, np.ndarray, List[str]]`) 53 | - 2023-02-10 v2.0.1 update: 54 | - 添加对输入音频为噪音或者静音的文件推理结果捕捉。 55 | 56 |
57 | -------------------------------------------------------------------------------- /cpp_onnx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | 3 | #-DONNXRUNTIME_DIR=D:\thirdpart\onnxruntime 4 | project(FastASR) 5 | 6 | set(CMAKE_CXX_STANDARD 11) 7 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 8 | 9 | # for onnxruntime 10 | 11 | IF(WIN32) 12 | 13 | 14 | if(CMAKE_CL_64) 15 | link_directories(${ONNXRUNTIME_DIR}\\lib) 16 | else() 17 | add_definitions(-D_WIN_X86) 18 | endif() 19 | ELSE() 20 | 21 | 22 | link_directories(${ONNXRUNTIME_DIR}/lib) 23 | 24 | endif() 25 | 26 | #option(FASTASR_BUILD_PYTHON_MODULE "build python module, using FastASR in Python" OFF) 27 | 28 | add_subdirectory("./third_party/webrtc") 29 | add_subdirectory(src) 30 | add_subdirectory(tester) 31 | -------------------------------------------------------------------------------- /cpp_onnx/CMakeSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "x64-Debug", 5 | "generator": "Ninja", 6 | "configurationType": "Debug", 7 | "inheritEnvironments": [ "msvc_x64_x64" ], 8 | "buildRoot": "${projectDir}\\out\\build\\${name}", 9 | "installRoot": "${projectDir}\\out\\install\\${name}", 10 | "buildCommandArgs": "", 11 | "ctestCommandArgs": "" 12 | }, 13 | { 14 | "name": "x64-Release", 15 | "generator": "Ninja", 16 | "configurationType": "RelWithDebInfo", 17 | "buildRoot": "${projectDir}\\out\\build\\${name}", 18 | "installRoot": "${projectDir}\\out\\install\\${name}", 19 | "cmakeCommandArgs": "", 20 | "buildCommandArgs": "", 21 | "ctestCommandArgs": "", 22 | "inheritEnvironments": [ "msvc_x64_x64" ] 23 | }, 24 | { 25 | "name": "Linux-GCC-Debug", 26 | "generator": "Unix Makefiles", 27 | "configurationType": "Debug", 28 | "cmakeExecutable": "cmake", 29 | "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ], 30 | "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1", 31 | "buildCommandArgs": "", 32 | "ctestCommandArgs": "", 33 | "inheritEnvironments": [ "linux_x64" ], 34 | "remoteMachineName": "${defaultRemoteMachineName}", 35 | "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src", 36 | "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}", 37 | "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}", 38 | "remoteCopySources": true, 39 | "rsyncCommandArgs": "-t --delete", 40 | "remoteCopyBuildOutput": false, 41 | "remoteCopySourcesMethod": "rsync" 42 | } 43 | ] 44 | } -------------------------------------------------------------------------------- /cpp_onnx/api.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 参考代码: [tester.cpp](tester/tester.cpp) 4 | 5 | ``` 6 | 初始化程序库 7 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread); 8 | 9 | 10 | 11 | // if not give a fnCallback ,it should be NULL 12 | 识别内存缓冲区,完整的wav文件数据,包括文件头 13 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); 14 | 识别内存缓冲区,只包括采样点数据,不包括wav文件头 15 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); 16 | 识别文件,只包括采样点数据,不包括wav文件头 17 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback); 18 | 19 | 识别音频文件,完整的wav文件数据,包括文件头 20 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback); 21 | 22 | 获取识别后的文本和相关数据 23 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex); 24 | 25 | 获取结果块个数 26 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result); 27 | 28 | 释放返回的结果块内存 29 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result); 30 | 31 | 32 | 使用完成后清理程序库 33 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle); 34 | 35 | 获取结果块中的数据所表示的音频长度,单位秒 36 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result); 37 | 38 | 39 | ``` 40 | -------------------------------------------------------------------------------- /cpp_onnx/images/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/demo.png -------------------------------------------------------------------------------- /cpp_onnx/images/threadnum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/threadnum.png -------------------------------------------------------------------------------- /cpp_onnx/include/Audio.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef AUDIO_H 3 | #define AUDIO_H 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | class AudioFrame { 12 | private: 13 | int start; 14 | int end; 15 | int len; 16 | 17 | public: 18 | AudioFrame(); 19 | AudioFrame(int len); 20 | 21 | ~AudioFrame(); 22 | int set_start(int val); 23 | int set_end(int val, int max_len); 24 | int get_start(); 25 | int get_len(); 26 | int disp(); 27 | }; 28 | 29 | class Audio { 30 | private: 31 | float *speech_data; 32 | int16_t *speech_buff; 33 | int speech_len; 34 | int speech_align_len; 35 | int16_t sample_rate; 36 | int offset; 37 | float align_size; 38 | int data_type; 39 | queue frame_queue; 40 | 41 | public: 42 | Audio(int data_type); 43 | Audio(int data_type, int size); 44 | ~Audio(); 45 | void disp(); 46 | bool loadwav(const char* filename); 47 | bool loadwav(const char* buf, int nLen); 48 | bool loadpcmwav(const char* buf, int nFileLen); 49 | bool loadpcmwav(const char* filename); 50 | int fetch_chunck(float *&dout, int len); 51 | int fetch(float *&dout, int &len, int &flag); 52 | void padding(); 53 | void split(); 54 | float get_time_len(); 55 | 56 | int get_queue_size() { return (int)frame_queue.size(); } 57 | }; 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /cpp_onnx/include/ComDefine.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMDEFINE_H 3 | #define COMDEFINE_H 4 | 5 | #define S_BEGIN 0 6 | #define S_MIDDLE 1 7 | #define S_END 2 8 | #define S_ALL 3 9 | #define S_ERR 4 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /cpp_onnx/include/Model.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MODEL_H 3 | #define MODEL_H 4 | 5 | #include 6 | 7 | class Model { 8 | public: 9 | virtual ~Model(){}; 10 | virtual void reset() = 0; 11 | virtual std::string forward_chunk(float *din, int len, int flag) = 0; 12 | virtual std::string forward(float *din, int len, int flag) = 0; 13 | virtual std::string rescoring() = 0; 14 | }; 15 | 16 | Model *create_model(const char *path,int nThread=0); 17 | #endif 18 | -------------------------------------------------------------------------------- /cpp_onnx/include/librapidasrapi.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #ifdef WIN32 5 | 6 | 7 | #ifdef _RPASR_API_EXPORT 8 | 9 | #define _RAPIDASRAPI __declspec(dllexport) 10 | #else 11 | #define _RAPIDASRAPI __declspec(dllimport) 12 | #endif 13 | 14 | 15 | #else 16 | #define _RAPIDASRAPI 17 | #endif 18 | 19 | 20 | 21 | 22 | 23 | #ifndef _WIN32 24 | 25 | #define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__)) 26 | 27 | #else 28 | #define RPASR_CALLBCK_PREFIX __stdcall 29 | #endif 30 | 31 | 32 | #ifdef __cplusplus 33 | 34 | extern "C" { 35 | #endif 36 | 37 | typedef void* RPASR_HANDLE; 38 | 39 | typedef void* RPASR_RESULT; 40 | 41 | typedef unsigned char RPASR_BOOL; 42 | 43 | #define RPASR_TRUE 1 44 | #define RPASR_FALSE 0 45 | #define QM_DEFAULT_THREAD_NUM 4 46 | 47 | 48 | typedef enum 49 | { 50 | RASR_NONE=-1, 51 | RASRM_CTC_GREEDY_SEARCH=0, 52 | RASRM_CTC_RPEFIX_BEAM_SEARCH = 1, 53 | RASRM_ATTENSION_RESCORING = 2, 54 | 55 | }RPASR_MODE; 56 | 57 | typedef enum { 58 | 59 | RPASR_MODEL_PADDLE = 0, 60 | RPASR_MODEL_PADDLE_2 = 1, 61 | RPASR_MODEL_K2 = 2, 62 | RPASR_MODEL_PARAFORMER = 3, 63 | 64 | }RPASR_MODEL_TYPE; 65 | 66 | 67 | typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step. 68 | 69 | // APIs for qmasr 70 | 71 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread); 72 | 73 | 74 | 75 | // if not give a fnCallback ,it should be NULL 76 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); 77 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback); 78 | 79 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback); 80 | 81 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback); 82 | 83 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex); 84 | 85 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result); 86 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result); 87 | 88 | 89 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle); 90 | 91 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result); 92 | 93 | #ifdef __cplusplus 94 | 95 | } 96 | #endif -------------------------------------------------------------------------------- /cpp_onnx/include/webrtc_vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This header file includes the VAD API calls. Specific function calls are 13 | * given below. 14 | */ 15 | 16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18 | 19 | #include 20 | #include 21 | 22 | typedef struct WebRtcVadInst VadInst; 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | // Creates an instance to the VAD structure. 29 | VadInst* WebRtcVad_Create(void); 30 | 31 | // Frees the dynamic memory of a specified VAD instance. 32 | // 33 | // - handle [i] : Pointer to VAD instance that should be freed. 34 | void WebRtcVad_Free(VadInst* handle); 35 | 36 | // Initializes a VAD instance. 37 | // 38 | // - handle [i/o] : Instance that should be initialized. 39 | // 40 | // returns : 0 - (OK), 41 | // -1 - (null pointer or Default mode could not be set). 42 | int WebRtcVad_Init(VadInst* handle); 43 | 44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 45 | // restrictive in reporting speech. Put in other words the probability of being 46 | // speech when the VAD returns 1 is increased with increasing mode. As a 47 | // consequence also the missed detection rate goes up. 48 | // 49 | // - handle [i/o] : VAD instance. 50 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 51 | // 52 | // returns : 0 - (OK), 53 | // -1 - (null pointer, mode could not be set or the VAD instance 54 | // has not been initialized). 55 | int WebRtcVad_set_mode(VadInst* handle, int mode); 56 | 57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates 58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 59 | // 60 | // - handle [i/o] : VAD Instance. Needs to be initialized by 61 | // WebRtcVad_Init() before call. 62 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 63 | // - audio_frame [i] : Audio frame buffer. 64 | // - frame_length [i] : Length of audio frame buffer in number of samples. 65 | // 66 | // returns : 1 - (Active Voice), 67 | // 0 - (Non-active Voice), 68 | // -1 - (Error) 69 | int WebRtcVad_Process(VadInst* handle, 70 | int fs, 71 | const int16_t* audio_frame, 72 | size_t frame_length); 73 | 74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, 75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 76 | // 77 | // - rate [i] : Sampling frequency (Hz). 78 | // - frame_length [i] : Speech frame buffer length in number of samples. 79 | // 80 | // returns : 0 - (valid combination), -1 - (invalid combination) 81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); 82 | 83 | #ifdef __cplusplus 84 | } 85 | #endif 86 | 87 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 88 | -------------------------------------------------------------------------------- /cpp_onnx/include/win_func.h: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef WIN32 3 | #include 4 | #else 5 | #include 6 | #endif 7 | #ifdef WIN32 8 | int gettimeofday(struct timeval* tp, void* tzp) 9 | { 10 | time_t clock; 11 | struct tm tm; 12 | SYSTEMTIME wtm; 13 | 14 | GetLocalTime(&wtm); 15 | tm.tm_year = wtm.wYear - 1900; 16 | tm.tm_mon = wtm.wMonth - 1; 17 | tm.tm_mday = wtm.wDay; 18 | tm.tm_hour = wtm.wHour; 19 | tm.tm_min = wtm.wMinute; 20 | tm.tm_sec = wtm.wSecond; 21 | tm.tm_isdst = -1; 22 | 23 | clock = mktime(&tm); 24 | tp->tv_sec = clock; 25 | tp->tv_usec = wtm.wMilliseconds * 1000; 26 | return (0); 27 | } 28 | #endif -------------------------------------------------------------------------------- /cpp_onnx/models/readme.md: -------------------------------------------------------------------------------- 1 | Place model.onnx here! 2 | -------------------------------------------------------------------------------- /cpp_onnx/readme.md: -------------------------------------------------------------------------------- 1 | 2 | ## 特别鸣谢 3 | 4 | 本程序中的预处理及后处理代码,来自于:https://github.com/chenkui164/FastASR 5 | 6 | 7 | ## 线程数与性能关系 8 | 9 | 测试环境Rocky Linux 8,仅测试cpp版本结果(未测python版本),@acely 10 | 11 | 简述: 12 | 在3台配置不同的机器上分别编译并测试,在fftw和onnxruntime版本都相同的前提下,识别同一个30分钟的音频文件,分别测试不同onnx线程数量的表现。 13 | 14 | ![线程数关系](images/threadnum.png "Windows ASR") 15 | 16 | 目前可以总结出大致规律: 17 | 18 | 并非onnx线程数越多越好 19 | 2线程比1线程提升显著,线程再多则提升较小 20 | 线程数等于CPU物理核心数时效率最好 21 | 实操建议: 22 | 23 | 大部分场景用3-4线程性价比最高 24 | 低配机器用2线程合适 25 | 26 | 27 | 28 | ## API 29 | [API文档](api.md) 30 | 31 | ## 演示 32 | 33 | ![Windows演示](images/demo.png "Windows ASR") 34 | 35 | ## 注意 36 | 本程序只支持 采样率16000hz, 位深16bit的 **单声道** 音频。 37 | 38 | ## 快速使用 39 | 40 | ### Windows 41 | 42 | 安装Vs2022 打开cpp_onnx目录下的cmake工程,直接 build即可。 本仓库已经准备好所有相关依赖库。 43 | 44 | Windows下已经预置fftw3、onnxruntime及openblas库 45 | 46 | 47 | ### Linux 48 | See the bottom of this page: Building Guidance 49 | 50 | 51 | ### 运行程序 52 | 53 | tester /path/to/models/dir /path/to/wave/file 54 | 55 | 例如: tester /data/models /data/test.wav 56 | 57 | /data/models 需要包括如下两个文件: model.onnx 和vocab.txt 58 | 59 | 60 | ## 支持平台 61 | - Windows 62 | - Linux/Unix 63 | 64 | ## 依赖 65 | - fftw3 66 | - onnxruntime 67 | 68 | ## 导出onnx格式模型文件 69 | 安装 modelscope与FunASR,依赖:torch,torchaudio,安装过程[详细参考文档](https://github.com/alibaba-damo-academy/FunASR/wiki) 70 | ```shell 71 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html 72 | git clone https://github.com/alibaba/FunASR.git && cd FunASR 73 | pip install --editable ./ 74 | ``` 75 | 导出onnx模型,[详见](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/export),参考示例,从modelscope中模型导出: 76 | 77 | ``` 78 | python -m funasr.export.export_model 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' "./export" true 79 | ``` 80 | 81 | ## Building Guidance for Linux/Unix 82 | 83 | ``` 84 | git clone https://github.com/RapidAI/RapidASR.git 85 | cd RapidASR/cpp_onnx/ 86 | mkdir build 87 | cd build 88 | # download an appropriate onnxruntime from https://github.com/microsoft/onnxruntime/releases/tag/v1.14.0 89 | # here we get a copy of onnxruntime for linux 64 90 | wget https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz 91 | # ls 92 | # onnxruntime-linux-x64-1.14.0 onnxruntime-linux-x64-1.14.0.tgz 93 | 94 | #install fftw3-dev 95 | apt install libfftw3-dev 96 | 97 | # build 98 | cmake -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/mnt/c/Users/ma139/RapidASR/cpp_onnx/build/onnxruntime-linux-x64-1.14.0 99 | make 100 | 101 | # then in the subfolder tester of current direcotry, you will see a program, tester 102 | 103 | ```` 104 | 105 | ### The structure of a qualified onnxruntime package. 106 | ``` 107 | onnxruntime_xxx 108 | ├───include 109 | └───lib 110 | ``` 111 | -------------------------------------------------------------------------------- /cpp_onnx/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | file(GLOB files1 "*.cpp") 3 | file(GLOB files4 "paraformer/*.cpp") 4 | 5 | set(files ${files1} ${files2} ${files3} ${files4}) 6 | 7 | # message("${files}") 8 | 9 | add_library(rapidasr ${files}) 10 | 11 | if(WIN32) 12 | 13 | set(EXTRA_LIBS libfftw3f-3 webrtcvad) 14 | if(CMAKE_CL_64) 15 | target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64) 16 | else() 17 | target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86) 18 | endif() 19 | target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ) 20 | 21 | target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT) 22 | else() 23 | 24 | set(EXTRA_LIBS fftw3f webrtcvad pthread) 25 | target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include") 26 | target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib") 27 | 28 | target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include") 29 | target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib") 30 | 31 | target_include_directories(rapidasr PUBLIC "/usr/include") 32 | target_link_directories(rapidasr PUBLIC "/usr/lib64") 33 | 34 | target_include_directories(rapidasr PUBLIC ${FFTW3F_INCLUDE_DIR}) 35 | target_link_directories(rapidasr PUBLIC ${FFTW3F_LIBRARY_DIR}) 36 | include_directories(${ONNXRUNTIME_DIR}/include) 37 | endif() 38 | 39 | include_directories(${CMAKE_SOURCE_DIR}/include) 40 | target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS}) 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /cpp_onnx/src/CommonStruct.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef COMMONSTRUCT_H 3 | #define COMMONSTRUCT_H 4 | 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /cpp_onnx/src/FeatureExtract.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FEATUREEXTRACT_H 3 | #define FEATUREEXTRACT_H 4 | 5 | #include 6 | #include 7 | 8 | #include "FeatureQueue.h" 9 | #include "SpeechWrap.h" 10 | #include "Tensor.h" 11 | 12 | class FeatureExtract { 13 | private: 14 | SpeechWrap speech; 15 | FeatureQueue fqueue; 16 | int mode; 17 | 18 | float *fft_input; 19 | fftwf_complex *fft_out; 20 | fftwf_plan p; 21 | 22 | void fftw_init(); 23 | void melspect(float *din, float *dout); 24 | void global_cmvn(float *din); 25 | 26 | public: 27 | FeatureExtract(int mode); 28 | ~FeatureExtract(); 29 | int size(); 30 | int status(); 31 | void reset(); 32 | void insert(float *din, int len, int flag); 33 | bool fetch(Tensor *&dout); 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /cpp_onnx/src/FeatureQueue.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | FeatureQueue::FeatureQueue() 3 | { 4 | buff = new Tensor(67, 80); 5 | window_size = 67; 6 | buff_idx = 0; 7 | } 8 | 9 | FeatureQueue::~FeatureQueue() 10 | { 11 | delete buff; 12 | } 13 | 14 | void FeatureQueue::reinit(int size) 15 | { 16 | delete buff; 17 | buff = new Tensor(size, 80); 18 | buff_idx = 0; 19 | window_size = size; 20 | } 21 | 22 | void FeatureQueue::reset() 23 | { 24 | buff_idx = 0; 25 | } 26 | 27 | void FeatureQueue::push(float *din, int flag) 28 | { 29 | int offset = buff_idx * 80; 30 | memcpy(buff->buff + offset, din, 80 * sizeof(float)); 31 | buff_idx++; 32 | 33 | if (flag == S_END) { 34 | Tensor *tmp = new Tensor(buff_idx, 80); 35 | memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float)); 36 | feature_queue.push(tmp); 37 | buff_idx = 0; 38 | } else if (buff_idx == window_size) { 39 | feature_queue.push(buff); 40 | Tensor *tmp = new Tensor(window_size, 80); 41 | memcpy(tmp->buff, buff->buff + (window_size - 3) * 80, 42 | 3 * 80 * sizeof(float)); 43 | buff_idx = 3; 44 | buff = tmp; 45 | } 46 | } 47 | 48 | Tensor *FeatureQueue::pop() 49 | { 50 | 51 | Tensor *tmp = feature_queue.front(); 52 | feature_queue.pop(); 53 | return tmp; 54 | } 55 | 56 | int FeatureQueue::size() 57 | { 58 | return feature_queue.size(); 59 | } 60 | -------------------------------------------------------------------------------- /cpp_onnx/src/FeatureQueue.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FEATUREQUEUE_H 3 | #define FEATUREQUEUE_H 4 | 5 | #include "Tensor.h" 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | 11 | class FeatureQueue { 12 | private: 13 | queue *> feature_queue; 14 | Tensor *buff; 15 | int buff_idx; 16 | int window_size; 17 | 18 | public: 19 | FeatureQueue(); 20 | ~FeatureQueue(); 21 | void reinit(int size); 22 | void reset(); 23 | void push(float *din, int flag); 24 | Tensor *pop(); 25 | int size(); 26 | }; 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /cpp_onnx/src/Model.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | 3 | Model *create_model(const char *path,int nThread) 4 | { 5 | Model *mm; 6 | 7 | 8 | mm = new paraformer::ModelImp(path, nThread); 9 | 10 | return mm; 11 | } 12 | -------------------------------------------------------------------------------- /cpp_onnx/src/SpeechWrap.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | 3 | SpeechWrap::SpeechWrap() 4 | { 5 | cache_size = 0; 6 | } 7 | 8 | SpeechWrap::~SpeechWrap() 9 | { 10 | } 11 | 12 | void SpeechWrap::reset() 13 | { 14 | cache_size = 0; 15 | } 16 | 17 | void SpeechWrap::load(float *din, int len) 18 | { 19 | in = din; 20 | in_size = len; 21 | total_size = cache_size + in_size; 22 | } 23 | 24 | int SpeechWrap::size() 25 | { 26 | return total_size; 27 | } 28 | 29 | void SpeechWrap::update(int offset) 30 | { 31 | int in_offset = offset - cache_size; 32 | cache_size = (total_size - offset); 33 | memcpy(cache, in + in_offset, cache_size * sizeof(float)); 34 | } 35 | 36 | float &SpeechWrap::operator[](int i) 37 | { 38 | return i < cache_size ? cache[i] : in[i - cache_size]; 39 | } 40 | -------------------------------------------------------------------------------- /cpp_onnx/src/SpeechWrap.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef SPEECHWRAP_H 3 | #define SPEECHWRAP_H 4 | 5 | #include 6 | 7 | class SpeechWrap { 8 | private: 9 | float cache[400]; 10 | int cache_size; 11 | float *in; 12 | int in_size; 13 | int total_size; 14 | int next_cache_size; 15 | 16 | public: 17 | SpeechWrap(); 18 | ~SpeechWrap(); 19 | void load(float *din, int len); 20 | void update(int offset); 21 | void reset(); 22 | int size(); 23 | float &operator[](int i); 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /cpp_onnx/src/Tensor.h: -------------------------------------------------------------------------------- 1 | #ifndef TENSOR_H 2 | #define TENSOR_H 3 | 4 | #include "alignedmem.h" 5 | 6 | using namespace std; 7 | 8 | template class Tensor { 9 | private: 10 | void alloc_buff(); 11 | void free_buff(); 12 | int mem_size; 13 | 14 | public: 15 | T *buff; 16 | int size[4]; 17 | int buff_size; 18 | Tensor(Tensor *in); 19 | Tensor(int a); 20 | Tensor(int a, int b); 21 | Tensor(int a, int b, int c); 22 | Tensor(int a, int b, int c, int d); 23 | ~Tensor(); 24 | void zeros(); 25 | void shape(); 26 | void disp(); 27 | void dump(const char *mode); 28 | void concat(Tensor *din, int dim); 29 | void resize(int a, int b, int c, int d); 30 | void add(float coe, Tensor *in); 31 | void add(Tensor *in); 32 | void add(Tensor *in1, Tensor *in2); 33 | void reload(Tensor *in); 34 | }; 35 | 36 | template Tensor::Tensor(int a) : size{1, 1, 1, a} 37 | { 38 | alloc_buff(); 39 | } 40 | 41 | template Tensor::Tensor(int a, int b) : size{1, 1, a, b} 42 | { 43 | alloc_buff(); 44 | } 45 | 46 | template Tensor::Tensor(int a, int b, int c) : size{1, a, b, c} 47 | { 48 | 49 | alloc_buff(); 50 | } 51 | 52 | template 53 | Tensor::Tensor(int a, int b, int c, int d) : size{a, b, c, d} 54 | { 55 | alloc_buff(); 56 | } 57 | 58 | template Tensor::Tensor(Tensor *in) 59 | { 60 | memcpy(size, in->size, 4 * sizeof(int)); 61 | alloc_buff(); 62 | memcpy(buff, in->buff, in->buff_size * sizeof(T)); 63 | } 64 | 65 | template Tensor::~Tensor() 66 | { 67 | free_buff(); 68 | } 69 | 70 | template void Tensor::alloc_buff() 71 | { 72 | buff_size = size[0] * size[1] * size[2] * size[3]; 73 | mem_size = buff_size; 74 | buff = (T *)aligned_malloc(32, buff_size * sizeof(T)); 75 | } 76 | 77 | template void Tensor::free_buff() 78 | { 79 | aligned_free(buff); 80 | } 81 | 82 | template void Tensor::zeros() 83 | { 84 | memset(buff, 0, buff_size * sizeof(T)); 85 | } 86 | 87 | template void Tensor::shape() 88 | { 89 | printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]); 90 | } 91 | 92 | // TODO:: fix it!!!! 93 | template void Tensor::concat(Tensor *din, int dim) 94 | { 95 | memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T)); 96 | buff_size += din->buff_size; 97 | size[dim] += din->size[dim]; 98 | } 99 | 100 | // TODO:: fix it!!!! 101 | template void Tensor::resize(int a, int b, int c, int d) 102 | { 103 | size[0] = a; 104 | size[1] = b; 105 | size[2] = c; 106 | size[3] = d; 107 | buff_size = size[0] * size[1] * size[2] * size[3]; 108 | } 109 | 110 | template void Tensor::add(float coe, Tensor *in) 111 | { 112 | int i; 113 | for (i = 0; i < buff_size; i++) { 114 | buff[i] = buff[i] + coe * in->buff[i]; 115 | } 116 | } 117 | 118 | template void Tensor::add(Tensor *in) 119 | { 120 | int i; 121 | for (i = 0; i < buff_size; i++) { 122 | buff[i] = buff[i] + in->buff[i]; 123 | } 124 | } 125 | 126 | template void Tensor::add(Tensor *in1, Tensor *in2) 127 | { 128 | int i; 129 | for (i = 0; i < buff_size; i++) { 130 | buff[i] = buff[i] + in1->buff[i] + in2->buff[i]; 131 | } 132 | } 133 | 134 | template void Tensor::reload(Tensor *in) 135 | { 136 | memcpy(buff, in->buff, in->buff_size * sizeof(T)); 137 | } 138 | 139 | template void Tensor::disp() 140 | { 141 | int i; 142 | for (i = 0; i < buff_size; i++) { 143 | cout << buff[i] << " "; 144 | } 145 | cout << endl; 146 | } 147 | 148 | template void Tensor::dump(const char *mode) 149 | { 150 | FILE *fp; 151 | fp = fopen("tmp.bin", mode); 152 | fwrite(buff, 1, buff_size * sizeof(T), fp); 153 | fclose(fp); 154 | } 155 | #endif 156 | -------------------------------------------------------------------------------- /cpp_onnx/src/Vocab.cpp: -------------------------------------------------------------------------------- 1 | #include "Vocab.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | Vocab::Vocab(const char *filename) 12 | { 13 | ifstream in(filename); 14 | string line; 15 | 16 | if (in) // 有该文件 17 | { 18 | while (getline(in, line)) // line中不包括每行的换行符 19 | { 20 | vocab.push_back(line); 21 | } 22 | // cout << vocab[1719] << endl; 23 | } 24 | // else // 没有该文件 25 | //{ 26 | // cout << "no such file" << endl; 27 | // } 28 | } 29 | Vocab::~Vocab() 30 | { 31 | } 32 | 33 | string Vocab::vector2string(vector in) 34 | { 35 | int i; 36 | stringstream ss; 37 | for (auto it = in.begin(); it != in.end(); it++) { 38 | ss << vocab[*it]; 39 | } 40 | 41 | return ss.str(); 42 | } 43 | 44 | int str2int(string str) 45 | { 46 | const char *ch_array = str.c_str(); 47 | if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) || 48 | ((ch_array[2] & 0xc0) != 0x80)) 49 | return 0; 50 | 51 | int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) | 52 | (ch_array[2] & 0x3f); 53 | return val; 54 | } 55 | 56 | bool Vocab::isChinese(string ch) 57 | { 58 | if (ch.size() != 3) { 59 | return false; 60 | } 61 | 62 | int unicode = str2int(ch); 63 | if (unicode >= 19968 && unicode <= 40959) { 64 | return true; 65 | } 66 | 67 | return false; 68 | } 69 | 70 | 71 | string Vocab::vector2stringV2(vector in) 72 | { 73 | int i; 74 | list words; 75 | 76 | int is_pre_english = false; 77 | int pre_english_len = 0; 78 | 79 | int is_combining = false; 80 | string combine = ""; 81 | 82 | for (auto it = in.begin(); it != in.end(); it++) { 83 | string word = vocab[*it]; 84 | 85 | // step1 space character skips 86 | if (word == "" || word == "" || word == "") 87 | continue; 88 | 89 | // step2 combie phoneme to full word 90 | { 91 | int sub_word = !(word.find("@@") == string::npos); 92 | 93 | // process word start and middle part 94 | if (sub_word) { 95 | combine += word.erase(word.length() - 2); 96 | is_combining = true; 97 | continue; 98 | } 99 | // process word end part 100 | else if (is_combining) { 101 | combine += word; 102 | is_combining = false; 103 | word = combine; 104 | combine = ""; 105 | } 106 | } 107 | 108 | // step3 process english word deal with space , turn abbreviation to upper case 109 | { 110 | 111 | // input word is chinese, not need process 112 | if (isChinese(word)) { 113 | words.push_back(word); 114 | is_pre_english = false; 115 | } 116 | // input word is english word 117 | else { 118 | 119 | // pre word is chinese 120 | if (!is_pre_english) { 121 | word[0] = word[0] - 32; 122 | words.push_back(word); 123 | pre_english_len = word.size(); 124 | 125 | } 126 | 127 | // pre word is english word 128 | else { 129 | 130 | // single letter turn to upper case 131 | if (word.size() == 1) { 132 | word[0] = word[0] - 32; 133 | } 134 | 135 | if (pre_english_len > 1) { 136 | words.push_back(" "); 137 | words.push_back(word); 138 | pre_english_len = word.size(); 139 | } 140 | else { 141 | if (word.size() > 1) { 142 | words.push_back(" "); 143 | } 144 | words.push_back(word); 145 | pre_english_len = word.size(); 146 | } 147 | } 148 | 149 | is_pre_english = true; 150 | 151 | } 152 | } 153 | } 154 | 155 | // for (auto it = words.begin(); it != words.end(); it++) { 156 | // cout << *it << endl; 157 | // } 158 | 159 | stringstream ss; 160 | for (auto it = words.begin(); it != words.end(); it++) { 161 | ss << *it; 162 | } 163 | 164 | return ss.str(); 165 | } 166 | 167 | int Vocab::size() 168 | { 169 | return vocab.size(); 170 | } 171 | -------------------------------------------------------------------------------- /cpp_onnx/src/Vocab.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef VOCAB_H 3 | #define VOCAB_H 4 | 5 | #include 6 | #include 7 | #include 8 | using namespace std; 9 | 10 | class Vocab { 11 | private: 12 | vector vocab; 13 | bool isChinese(string ch); 14 | bool isEnglish(string ch); 15 | 16 | public: 17 | Vocab(const char *filename); 18 | ~Vocab(); 19 | int size(); 20 | string vector2string(vector in); 21 | string vector2stringV2(vector in); 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /cpp_onnx/src/alignedmem.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | void *aligned_malloc(size_t alignment, size_t required_bytes) 3 | { 4 | void *p1; // original block 5 | void **p2; // aligned block 6 | int offset = alignment - 1 + sizeof(void *); 7 | if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) { 8 | return NULL; 9 | } 10 | p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1)); 11 | p2[-1] = p1; 12 | return p2; 13 | } 14 | 15 | void aligned_free(void *p) 16 | { 17 | free(((void **)p)[-1]); 18 | } 19 | -------------------------------------------------------------------------------- /cpp_onnx/src/alignedmem.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef ALIGNEDMEM_H 3 | #define ALIGNEDMEM_H 4 | 5 | 6 | 7 | extern void *aligned_malloc(size_t alignment, size_t required_bytes); 8 | extern void aligned_free(void *p); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /cpp_onnx/src/commonfunc.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | typedef struct 5 | { 6 | std::string msg; 7 | float snippet_time; 8 | }RPASR_RECOG_RESULT; 9 | 10 | 11 | #ifdef _WIN32 12 | #include 13 | 14 | 15 | 16 | inline std::wstring string2wstring(const std::string& str, const std::string& locale) 17 | { 18 | typedef std::codecvt_byname F; 19 | std::wstring_convert strCnv(new F(locale)); 20 | return strCnv.from_bytes(str); 21 | } 22 | 23 | inline std::wstring strToWstr(std::string str) { 24 | if (str.length() == 0) 25 | return L""; 26 | return string2wstring(str, "zh-CN"); 27 | 28 | } 29 | 30 | #endif 31 | 32 | 33 | 34 | inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) { 35 | size_t numInputNodes = session->GetInputCount(); 36 | if (numInputNodes > 0) { 37 | Ort::AllocatorWithDefaultOptions allocator; 38 | { 39 | auto t = session->GetInputNameAllocated(nIndex, allocator); 40 | inputName = t.get(); 41 | 42 | } 43 | } 44 | } 45 | 46 | inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) { 47 | size_t numOutputNodes = session->GetOutputCount(); 48 | if (numOutputNodes > 0) { 49 | Ort::AllocatorWithDefaultOptions allocator; 50 | { 51 | auto t = session->GetOutputNameAllocated(nIndex, allocator); 52 | outputName = t.get(); 53 | 54 | } 55 | } 56 | } -------------------------------------------------------------------------------- /cpp_onnx/src/librapidasrapi.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | #ifdef __cplusplus 3 | 4 | 5 | 6 | // void __attribute__ ((visibility ("default"))) fun(); 7 | extern "C" { 8 | #endif 9 | 10 | 11 | // APIs for qmasr 12 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum) 13 | { 14 | 15 | 16 | Model* mm = create_model(szModelDir, nThreadNum); 17 | 18 | return mm; 19 | } 20 | 21 | 22 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) 23 | { 24 | 25 | 26 | Model* pRecogObj = (Model*)handle; 27 | 28 | if (!pRecogObj) 29 | return nullptr; 30 | 31 | Audio audio(1); 32 | audio.loadwav(szBuf,nLen); 33 | audio.split(); 34 | 35 | float* buff; 36 | int len; 37 | int flag=0; 38 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; 39 | pResult->snippet_time = audio.get_time_len(); 40 | int nStep = 0; 41 | int nTotal = audio.get_queue_size(); 42 | while (audio.fetch(buff, len, flag) > 0) { 43 | pRecogObj->reset(); 44 | string msg = pRecogObj->forward(buff, len, flag); 45 | pResult->msg += msg; 46 | nStep++; 47 | if (fnCallback) 48 | fnCallback(nStep, nTotal); 49 | } 50 | 51 | 52 | return pResult; 53 | } 54 | 55 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback) 56 | { 57 | 58 | Model* pRecogObj = (Model*)handle; 59 | 60 | if (!pRecogObj) 61 | return nullptr; 62 | 63 | Audio audio(1); 64 | audio.loadpcmwav(szBuf, nLen); 65 | audio.split(); 66 | 67 | float* buff; 68 | int len; 69 | int flag = 0; 70 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; 71 | pResult->snippet_time = audio.get_time_len(); 72 | int nStep = 0; 73 | int nTotal = audio.get_queue_size(); 74 | while (audio.fetch(buff, len, flag) > 0) { 75 | pRecogObj->reset(); 76 | string msg = pRecogObj->forward(buff, len, flag); 77 | pResult->msg += msg; 78 | nStep++; 79 | if (fnCallback) 80 | fnCallback(nStep, nTotal); 81 | } 82 | 83 | 84 | return pResult; 85 | 86 | } 87 | 88 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback) 89 | { 90 | 91 | Model* pRecogObj = (Model*)handle; 92 | 93 | if (!pRecogObj) 94 | return nullptr; 95 | 96 | Audio audio(1); 97 | audio.loadpcmwav(szFileName); 98 | audio.split(); 99 | 100 | float* buff; 101 | int len; 102 | int flag = 0; 103 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; 104 | pResult->snippet_time = audio.get_time_len(); 105 | int nStep = 0; 106 | int nTotal = audio.get_queue_size(); 107 | while (audio.fetch(buff, len, flag) > 0) { 108 | pRecogObj->reset(); 109 | string msg = pRecogObj->forward(buff, len, flag); 110 | pResult->msg += msg; 111 | nStep++; 112 | if (fnCallback) 113 | fnCallback(nStep, nTotal); 114 | } 115 | 116 | 117 | return pResult; 118 | 119 | } 120 | 121 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback) 122 | { 123 | Model* pRecogObj = (Model*)handle; 124 | 125 | if (!pRecogObj) 126 | return nullptr; 127 | 128 | Audio audio(1); 129 | if(!audio.loadwav(szWavfile)) 130 | return nullptr; 131 | audio.split(); 132 | 133 | float* buff; 134 | int len; 135 | int flag = 0; 136 | int nStep = 0; 137 | int nTotal = audio.get_queue_size(); 138 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT; 139 | pResult->snippet_time = audio.get_time_len(); 140 | while (audio.fetch(buff, len, flag) > 0) { 141 | pRecogObj->reset(); 142 | string msg = pRecogObj->forward(buff, len, flag); 143 | pResult->msg+= msg; 144 | nStep++; 145 | if (fnCallback) 146 | fnCallback(nStep, nTotal); 147 | } 148 | 149 | 150 | 151 | 152 | return pResult; 153 | } 154 | 155 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result) 156 | { 157 | if (!Result) 158 | return 0; 159 | 160 | return 1; 161 | 162 | } 163 | 164 | 165 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result) 166 | { 167 | if (!Result) 168 | return 0.0f; 169 | 170 | return ((RPASR_RECOG_RESULT*)Result)->snippet_time; 171 | 172 | } 173 | 174 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex) 175 | { 176 | RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result; 177 | if(!pResult) 178 | return nullptr; 179 | 180 | return pResult->msg.c_str(); 181 | 182 | } 183 | 184 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result) 185 | { 186 | 187 | if (Result) 188 | { 189 | delete (RPASR_RECOG_RESULT*)Result; 190 | 191 | } 192 | } 193 | 194 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle) 195 | { 196 | 197 | Model* pRecogObj = (Model*)handle; 198 | 199 | 200 | if (!pRecogObj) 201 | return; 202 | 203 | delete pRecogObj; 204 | 205 | } 206 | 207 | 208 | 209 | #ifdef __cplusplus 210 | 211 | } 212 | #endif 213 | 214 | -------------------------------------------------------------------------------- /cpp_onnx/src/paraformer_onnx.cpp: -------------------------------------------------------------------------------- 1 | #include "precomp.h" 2 | 3 | using namespace std; 4 | using namespace paraformer; 5 | 6 | ModelImp::ModelImp(const char* path,int nNumThread) 7 | { 8 | string model_path = pathAppend(path, "model.onnx"); 9 | string vocab_path = pathAppend(path, "vocab.txt"); 10 | 11 | fe = new FeatureExtract(3); 12 | 13 | sessionOptions.SetInterOpNumThreads(nNumThread); 14 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); 15 | 16 | #ifdef _WIN32 17 | wstring wstrPath = strToWstr(model_path); 18 | m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions); 19 | #else 20 | m_session = new Ort::Session(env, model_path.c_str(), sessionOptions); 21 | #endif 22 | 23 | string strName; 24 | getInputName(m_session, strName); 25 | m_strInputNames.push_back(strName.c_str()); 26 | getInputName(m_session, strName,1); 27 | m_strInputNames.push_back(strName); 28 | 29 | getOutputName(m_session, strName); 30 | m_strOutputNames.push_back(strName); 31 | getOutputName(m_session, strName,1); 32 | m_strOutputNames.push_back(strName); 33 | 34 | for (auto& item : m_strInputNames) 35 | m_szInputNames.push_back(item.c_str()); 36 | for (auto& item : m_strOutputNames) 37 | m_szOutputNames.push_back(item.c_str()); 38 | vocab = new Vocab(vocab_path.c_str()); 39 | } 40 | 41 | ModelImp::~ModelImp() 42 | { 43 | if(fe) 44 | delete fe; 45 | if (m_session) 46 | { 47 | delete m_session; 48 | m_session = nullptr; 49 | } 50 | if(vocab) 51 | delete vocab; 52 | } 53 | 54 | void ModelImp::reset() 55 | { 56 | fe->reset(); 57 | } 58 | 59 | void ModelImp::apply_lfr(Tensor*& din) 60 | { 61 | int mm = din->size[2]; 62 | int ll = ceil(mm / 6.0); 63 | Tensor* tmp = new Tensor(ll, 560); 64 | int out_offset = 0; 65 | for (int i = 0; i < ll; i++) { 66 | for (int j = 0; j < 7; j++) { 67 | int idx = i * 6 + j - 3; 68 | if (idx < 0) { 69 | idx = 0; 70 | } 71 | if (idx >= mm) { 72 | idx = mm - 1; 73 | } 74 | memcpy(tmp->buff + out_offset, din->buff + idx * 80, 75 | sizeof(float) * 80); 76 | out_offset += 80; 77 | } 78 | } 79 | delete din; 80 | din = tmp; 81 | } 82 | 83 | void ModelImp::apply_cmvn(Tensor* din) 84 | { 85 | const float* var; 86 | const float* mean; 87 | float scale = 22.6274169979695; 88 | int m = din->size[2]; 89 | int n = din->size[3]; 90 | 91 | var = (const float*)paraformer_cmvn_var_hex; 92 | mean = (const float*)paraformer_cmvn_mean_hex; 93 | for (int i = 0; i < m; i++) { 94 | for (int j = 0; j < n; j++) { 95 | int idx = i * n + j; 96 | din->buff[idx] = (din->buff[idx] + mean[j]) * var[j]; 97 | } 98 | } 99 | } 100 | 101 | string ModelImp::greedy_search(float * in, int nLen ) 102 | { 103 | vector hyps; 104 | int Tmax = nLen; 105 | for (int i = 0; i < Tmax; i++) { 106 | int max_idx; 107 | float max_val; 108 | findmax(in + i * 8404, 8404, max_val, max_idx); 109 | hyps.push_back(max_idx); 110 | } 111 | 112 | return vocab->vector2stringV2(hyps); 113 | } 114 | 115 | string ModelImp::forward(float* din, int len, int flag) 116 | { 117 | 118 | Tensor* in; 119 | fe->insert(din, len, flag); 120 | fe->fetch(in); 121 | apply_lfr(in); 122 | apply_cmvn(in); 123 | Ort::RunOptions run_option; 124 | 125 | std::array input_shape_{ in->size[0],in->size[2],in->size[3] }; 126 | Ort::Value onnx_feats = Ort::Value::CreateTensor(m_memoryInfo, 127 | in->buff, 128 | in->buff_size, 129 | input_shape_.data(), 130 | input_shape_.size()); 131 | 132 | std::vector feats_len{ in->size[2] }; 133 | std::vector feats_len_dim{ 1 }; 134 | Ort::Value onnx_feats_len = Ort::Value::CreateTensor( 135 | m_memoryInfo, 136 | feats_len.data(), 137 | feats_len.size() * sizeof(int32_t), 138 | feats_len_dim.data(), 139 | feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32); 140 | std::vector input_onnx; 141 | input_onnx.emplace_back(std::move(onnx_feats)); 142 | input_onnx.emplace_back(std::move(onnx_feats_len)); 143 | 144 | string result; 145 | try { 146 | 147 | auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size()); 148 | std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); 149 | 150 | 151 | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies()); 152 | float* floatData = outputTensor[0].GetTensorMutableData(); 153 | auto encoder_out_lens = outputTensor[1].GetTensorMutableData(); 154 | result = greedy_search(floatData, *encoder_out_lens); 155 | } 156 | catch (...) 157 | { 158 | result = ""; 159 | } 160 | 161 | 162 | if(in) 163 | delete in; 164 | 165 | return result; 166 | } 167 | 168 | string ModelImp::forward_chunk(float* din, int len, int flag) 169 | { 170 | 171 | printf("Not Imp!!!!!!\n"); 172 | return "Hello"; 173 | } 174 | 175 | string ModelImp::rescoring() 176 | { 177 | printf("Not Imp!!!!!!\n"); 178 | return "Hello"; 179 | } 180 | -------------------------------------------------------------------------------- /cpp_onnx/src/paraformer_onnx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | #ifndef PARAFORMER_MODELIMP_H 5 | #define PARAFORMER_MODELIMP_H 6 | 7 | 8 | 9 | 10 | 11 | namespace paraformer { 12 | 13 | class ModelImp : public Model { 14 | private: 15 | FeatureExtract* fe; 16 | 17 | Vocab* vocab; 18 | 19 | void apply_lfr(Tensor*& din); 20 | void apply_cmvn(Tensor* din); 21 | 22 | 23 | string greedy_search( float* in, int nLen); 24 | 25 | #ifdef _WIN_X86 26 | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); 27 | #else 28 | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); 29 | #endif 30 | 31 | Ort::Session* m_session = nullptr; 32 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "paraformer"); 33 | Ort::SessionOptions sessionOptions = Ort::SessionOptions(); 34 | 35 | vector m_strInputNames, m_strOutputNames; 36 | vector m_szInputNames; 37 | vector m_szOutputNames; 38 | //string m_strInputName, m_strInputNameLen; 39 | //string m_strOutputName, m_strOutputNameLen; 40 | 41 | public: 42 | ModelImp(const char* path, int nNumThread=0); 43 | ~ModelImp(); 44 | void reset(); 45 | string forward_chunk(float* din, int len, int flag); 46 | string forward(float* din, int len, int flag); 47 | string rescoring(); 48 | 49 | }; 50 | 51 | } // namespace paraformer 52 | #endif 53 | -------------------------------------------------------------------------------- /cpp_onnx/src/precomp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | // system 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | #include 20 | 21 | using namespace std; 22 | // third part 23 | 24 | #include 25 | #include "onnxruntime_run_options_config_keys.h" 26 | #include "onnxruntime_cxx_api.h" 27 | 28 | 29 | // mine 30 | 31 | #include "commonfunc.h" 32 | #include 33 | #include "predefine_coe.h" 34 | 35 | #include 36 | //#include "alignedmem.h" 37 | #include "Vocab.h" 38 | #include "Tensor.h" 39 | #include "util.h" 40 | #include "CommonStruct.h" 41 | #include "FeatureExtract.h" 42 | #include "FeatureQueue.h" 43 | #include "SpeechWrap.h" 44 | #include 45 | #include "Model.h" 46 | #include "paraformer_onnx.h" 47 | #include "librapidasrapi.h" 48 | 49 | 50 | using namespace paraformer; 51 | -------------------------------------------------------------------------------- /cpp_onnx/src/tmp.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef WENETPARAMS_H 3 | #define WENETPARAMS_H 4 | // #pragma pack(1) 5 | 6 | #define vocab_size 5538 7 | 8 | typedef struct { 9 | float conv0_weight[512 * 9]; 10 | float conv0_bias[512]; 11 | 12 | float conv1_weight[512 * 512 * 9]; 13 | float conv1_bias[512]; 14 | 15 | float out0_weight[9728 * 512]; 16 | float out0_bias[512]; 17 | 18 | } EncEmbedParams; 19 | 20 | typedef struct { 21 | float linear_q_weight[512 * 512]; 22 | float linear_q_bias[512]; 23 | float linear_k_weight[512 * 512]; 24 | float linear_k_bias[512]; 25 | float linear_v_weight[512 * 512]; 26 | float linear_v_bias[512]; 27 | float linear_out_weight[512 * 512]; 28 | float linear_out_bias[512]; 29 | } SelfAttnParams; 30 | 31 | typedef struct { 32 | SelfAttnParams linear0; 33 | float linear_pos_weight[512 * 512]; 34 | float pos_bias_u[512]; 35 | float pos_bias_v[512]; 36 | 37 | } EncSelfAttnParams; 38 | 39 | typedef struct { 40 | float w1_weight[512 * 2048]; 41 | float w1_bias[2048]; 42 | float w2_weight[2048 * 512]; 43 | float w2_bias[512]; 44 | } FeedForwardParams; 45 | 46 | typedef struct { 47 | float weight[512]; 48 | float bias[512]; 49 | } NormParams; 50 | 51 | typedef struct { 52 | float pointwise_conv1_weight[1024 * 512]; 53 | float pointwise_conv1_bias[1024]; 54 | 55 | float depthwise_conv_weight[512 * 15]; 56 | float depthwise_conv_bias[512]; 57 | 58 | float pointwise_conv2_weight[512 * 512]; 59 | float pointwise_conv2_bias[512]; 60 | NormParams norm; 61 | } EncConvParams; 62 | 63 | typedef struct { 64 | EncSelfAttnParams self_attn; 65 | FeedForwardParams feedforward; 66 | FeedForwardParams feedforward_macaron; 67 | EncConvParams conv_module; 68 | NormParams norm_ff; 69 | NormParams norm_mha; 70 | NormParams norm_macaron; 71 | NormParams norm_conv; 72 | NormParams norm_final; 73 | // float concat_weight[1024 * 512]; 74 | // float concat_bias[512]; 75 | } SubEncoderParams; 76 | 77 | typedef struct { 78 | EncEmbedParams embed; 79 | SubEncoderParams sub_encoder[12]; 80 | NormParams after_norm; 81 | } EncoderParams; 82 | 83 | typedef struct { 84 | SelfAttnParams self_attn; 85 | SelfAttnParams src_attn; 86 | FeedForwardParams feedward; 87 | NormParams norm1; 88 | NormParams norm2; 89 | NormParams norm3; 90 | // float concat_weight1[1024 * 512]; 91 | // float concat_bias1[512]; 92 | // float concat_weight2[1024 * 512]; 93 | // float concat_bias2[512]; 94 | } SubDecoderParams; 95 | 96 | typedef struct { 97 | float embed_weight[vocab_size * 512]; 98 | SubDecoderParams sub_decoder[6]; 99 | NormParams after_norm; 100 | float output_weight[vocab_size * 512]; 101 | float output_bias[vocab_size]; 102 | } DecoderParams; 103 | 104 | typedef struct { 105 | EncoderParams encoder; 106 | float ctc_weight[512 * vocab_size]; 107 | float ctc_bias[vocab_size]; 108 | DecoderParams decoder; 109 | } WenetParams; 110 | 111 | // #pragma pack() 112 | #endif 113 | -------------------------------------------------------------------------------- /cpp_onnx/src/util.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "precomp.h" 3 | 4 | float *loadparams(const char *filename) 5 | { 6 | 7 | FILE *fp; 8 | fp = fopen(filename, "rb"); 9 | fseek(fp, 0, SEEK_END); 10 | uint32_t nFileLen = ftell(fp); 11 | fseek(fp, 0, SEEK_SET); 12 | 13 | float *params_addr = (float *)aligned_malloc(32, nFileLen); 14 | int n = fread(params_addr, 1, nFileLen, fp); 15 | fclose(fp); 16 | 17 | return params_addr; 18 | } 19 | 20 | int val_align(int val, int align) 21 | { 22 | float tmp = ceil((float)val / (float)align) * (float)align; 23 | return (int)tmp; 24 | } 25 | 26 | void disp_params(float *din, int size) 27 | { 28 | int i; 29 | for (i = 0; i < size; i++) { 30 | printf("%f ", din[i]); 31 | } 32 | printf("\n"); 33 | } 34 | void SaveDataFile(const char *filename, void *data, uint32_t len) 35 | { 36 | FILE *fp; 37 | fp = fopen(filename, "wb+"); 38 | fwrite(data, 1, len, fp); 39 | fclose(fp); 40 | } 41 | 42 | void basic_norm(Tensor *&din, float norm) 43 | { 44 | 45 | int Tmax = din->size[2]; 46 | 47 | int i, j; 48 | for (i = 0; i < Tmax; i++) { 49 | float sum = 0; 50 | for (j = 0; j < 512; j++) { 51 | int ii = i * 512 + j; 52 | sum += din->buff[ii] * din->buff[ii]; 53 | } 54 | float mean = sqrt(sum / 512 + norm); 55 | for (j = 0; j < 512; j++) { 56 | int ii = i * 512 + j; 57 | din->buff[ii] = din->buff[ii] / mean; 58 | } 59 | } 60 | } 61 | 62 | void findmax(float *din, int len, float &max_val, int &max_idx) 63 | { 64 | int i; 65 | max_val = -INFINITY; 66 | max_idx = -1; 67 | for (i = 0; i < len; i++) { 68 | if (din[i] > max_val) { 69 | max_val = din[i]; 70 | max_idx = i; 71 | } 72 | } 73 | } 74 | 75 | string pathAppend(const string &p1, const string &p2) 76 | { 77 | 78 | char sep = '/'; 79 | string tmp = p1; 80 | 81 | #ifdef _WIN32 82 | sep = '\\'; 83 | #endif 84 | 85 | if (p1[p1.length()-1] != sep) { // Need to add a 86 | tmp += sep; // path separator 87 | return (tmp + p2); 88 | } else 89 | return (p1 + p2); 90 | } 91 | 92 | void relu(Tensor *din) 93 | { 94 | int i; 95 | for (i = 0; i < din->buff_size; i++) { 96 | float val = din->buff[i]; 97 | din->buff[i] = val < 0 ? 0 : val; 98 | } 99 | } 100 | 101 | void swish(Tensor *din) 102 | { 103 | int i; 104 | for (i = 0; i < din->buff_size; i++) { 105 | float val = din->buff[i]; 106 | din->buff[i] = val / (1 + exp(-val)); 107 | } 108 | } 109 | 110 | void sigmoid(Tensor *din) 111 | { 112 | int i; 113 | for (i = 0; i < din->buff_size; i++) { 114 | float val = din->buff[i]; 115 | din->buff[i] = 1 / (1 + exp(-val)); 116 | } 117 | } 118 | 119 | void doubleswish(Tensor *din) 120 | { 121 | int i; 122 | for (i = 0; i < din->buff_size; i++) { 123 | float val = din->buff[i]; 124 | din->buff[i] = val / (1 + exp(-val + 1)); 125 | } 126 | } 127 | 128 | void softmax(float *din, int mask, int len) 129 | { 130 | float *tmp = (float *)malloc(mask * sizeof(float)); 131 | int i; 132 | float sum = 0; 133 | float max = -INFINITY; 134 | 135 | for (i = 0; i < mask; i++) { 136 | max = max < din[i] ? din[i] : max; 137 | } 138 | 139 | for (i = 0; i < mask; i++) { 140 | tmp[i] = exp(din[i] - max); 141 | sum += tmp[i]; 142 | } 143 | for (i = 0; i < mask; i++) { 144 | din[i] = tmp[i] / sum; 145 | } 146 | free(tmp); 147 | for (i = mask; i < len; i++) { 148 | din[i] = 0; 149 | } 150 | } 151 | 152 | void log_softmax(float *din, int len) 153 | { 154 | float *tmp = (float *)malloc(len * sizeof(float)); 155 | int i; 156 | float sum = 0; 157 | for (i = 0; i < len; i++) { 158 | tmp[i] = exp(din[i]); 159 | sum += tmp[i]; 160 | } 161 | for (i = 0; i < len; i++) { 162 | din[i] = log(tmp[i] / sum); 163 | } 164 | free(tmp); 165 | } 166 | 167 | void glu(Tensor *din, Tensor *dout) 168 | { 169 | int mm = din->buff_size / 1024; 170 | int i, j; 171 | for (i = 0; i < mm; i++) { 172 | for (j = 0; j < 512; j++) { 173 | int in_off = i * 1024 + j; 174 | int out_off = i * 512 + j; 175 | float a = din->buff[in_off]; 176 | float b = din->buff[in_off + 512]; 177 | dout->buff[out_off] = a / (1 + exp(-b)); 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /cpp_onnx/src/util.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef UTIL_H 4 | #define UTIL_H 5 | 6 | using namespace std; 7 | 8 | extern float *loadparams(const char *filename); 9 | 10 | extern void SaveDataFile(const char *filename, void *data, uint32_t len); 11 | extern void relu(Tensor *din); 12 | extern void swish(Tensor *din); 13 | extern void sigmoid(Tensor *din); 14 | extern void doubleswish(Tensor *din); 15 | 16 | extern void softmax(float *din, int mask, int len); 17 | 18 | extern void log_softmax(float *din, int len); 19 | extern int val_align(int val, int align); 20 | extern void disp_params(float *din, int size); 21 | 22 | extern void basic_norm(Tensor *&din, float norm); 23 | 24 | extern void findmax(float *din, int len, float &max_val, int &max_idx); 25 | 26 | extern void glu(Tensor *din, Tensor *dout); 27 | 28 | string pathAppend(const string &p1, const string &p2); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /cpp_onnx/tester/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | if(WIN32) 4 | if(CMAKE_CL_64) 5 | link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 ) 6 | else() 7 | link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 ) 8 | endif() 9 | endif() 10 | 11 | set(EXTRA_LIBS rapidasr) 12 | 13 | 14 | include_directories(${CMAKE_SOURCE_DIR}/include) 15 | set(EXECNAME "tester") 16 | 17 | add_executable(${EXECNAME} "tester.cpp") 18 | target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS}) 19 | 20 | 21 | -------------------------------------------------------------------------------- /cpp_onnx/tester/tester.cpp: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _WIN32 3 | #include 4 | #else 5 | #include 6 | #endif 7 | 8 | #include "librapidasrapi.h" 9 | 10 | #include 11 | 12 | using namespace std; 13 | 14 | int main(int argc, char *argv[]) 15 | { 16 | 17 | if (argc < 2) 18 | { 19 | printf("Usage: %s /path/to/model_dir /path/to/wav/file", argv[0]); 20 | exit(-1); 21 | } 22 | struct timeval start, end; 23 | gettimeofday(&start, NULL); 24 | int nThreadNum = 4; 25 | RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum); 26 | 27 | if (!AsrHanlde) 28 | { 29 | printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]); 30 | exit(-1); 31 | } 32 | 33 | 34 | 35 | gettimeofday(&end, NULL); 36 | long seconds = (end.tv_sec - start.tv_sec); 37 | long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); 38 | printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000); 39 | 40 | 41 | 42 | gettimeofday(&start, NULL); 43 | 44 | RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL); 45 | gettimeofday(&end, NULL); 46 | float snippet_time = 0.0f; 47 | if (Result) 48 | { 49 | string msg = RapidAsrGetResult(Result, 0); 50 | setbuf(stdout, NULL); 51 | cout << "Result: \""; 52 | cout << msg << endl; 53 | cout << "\"." << endl; 54 | snippet_time = RapidAsrGetRetSnippetTime(Result); 55 | RapidAsrFreeResult(Result); 56 | } 57 | else 58 | { 59 | cout <<("no return data!"); 60 | } 61 | 62 | printf("Audio length %lfs.\n", (double)snippet_time); 63 | 64 | seconds = (end.tv_sec - start.tv_sec); 65 | long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec); 66 | printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000); 67 | 68 | printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000)); 69 | 70 | RapidAsrUninit(AsrHanlde); 71 | 72 | return 0; 73 | } 74 | 75 | 76 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | if(WIN32) 4 | add_definitions(-DWEBRTC_WIN) 5 | else() 6 | add_definitions(-DWEBRTC_POSIX) 7 | endif() 8 | 9 | 10 | include_directories("..") 11 | 12 | file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc") 13 | 14 | message("${files}") 15 | 16 | add_library(webrtcvad ${files}) 17 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/complex_bit_reverse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | /* Tables for data buffer indexes that are bit reversed and thus need to be 14 | * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap 15 | * operations, while index_7[{1, 3, 5, ...}] are for the right side of the 16 | * operation. Same for index_8. 17 | */ 18 | 19 | /* Indexes for the case of stages == 7. */ 20 | static const int16_t index_7[112] = { 21 | 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, 22 | 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, 23 | 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, 24 | 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, 25 | 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, 26 | 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, 27 | 103, 115, 111, 123 28 | }; 29 | 30 | /* Indexes for the case of stages == 8. */ 31 | static const int16_t index_8[240] = { 32 | 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, 33 | 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, 34 | 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, 35 | 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, 36 | 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, 37 | 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, 38 | 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, 39 | 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, 40 | 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, 41 | 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, 42 | 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, 43 | 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, 44 | 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, 45 | 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, 46 | 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 47 | }; 48 | 49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { 50 | /* For any specific value of stages, we know exactly the indexes that are 51 | * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of 52 | * stages are 7 and 8, so we use tables to save unnecessary iterations and 53 | * calculations for these two cases. 54 | */ 55 | if (stages == 7 || stages == 8) { 56 | int m = 0; 57 | int length = 112; 58 | const int16_t* index = index_7; 59 | 60 | if (stages == 8) { 61 | length = 240; 62 | index = index_8; 63 | } 64 | 65 | /* Decimation in time. Swap the elements with bit-reversed indexes. */ 66 | for (m = 0; m < length; m += 2) { 67 | /* We declare a int32_t* type pointer, to load both the 16-bit real 68 | * and imaginary elements from complex_data in one instruction, reducing 69 | * complexity. 70 | */ 71 | int32_t* complex_data_ptr = (int32_t*)complex_data; 72 | int32_t temp = 0; 73 | 74 | temp = complex_data_ptr[index[m]]; /* Real and imaginary */ 75 | complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; 76 | complex_data_ptr[index[m + 1]] = temp; 77 | } 78 | } 79 | else { 80 | int m = 0, mr = 0, l = 0; 81 | int n = 1 << stages; 82 | int nn = n - 1; 83 | 84 | /* Decimation in time - re-order data */ 85 | for (m = 1; m <= nn; ++m) { 86 | int32_t* complex_data_ptr = (int32_t*)complex_data; 87 | int32_t temp = 0; 88 | 89 | /* Find out indexes that are bit-reversed. */ 90 | l = n; 91 | do { 92 | l >>= 1; 93 | } while (l > nn - mr); 94 | mr = (mr & (l - 1)) + l; 95 | 96 | if (mr <= m) { 97 | continue; 98 | } 99 | 100 | /* Swap the elements with bit-reversed indexes. 101 | * This is similar to the loop in the stages == 7 or 8 cases. 102 | */ 103 | temp = complex_data_ptr[m]; /* Real and imaginary */ 104 | complex_data_ptr[m] = complex_data_ptr[mr]; 105 | complex_data_ptr[mr] = temp; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/cross_correlation.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ 14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, 15 | const int16_t* seq1, 16 | const int16_t* seq2, 17 | size_t dim_seq, 18 | size_t dim_cross_correlation, 19 | int right_shifts, 20 | int step_seq2) { 21 | size_t i = 0, j = 0; 22 | 23 | for (i = 0; i < dim_cross_correlation; i++) { 24 | int32_t corr = 0; 25 | for (j = 0; j < dim_seq; j++) 26 | corr += (seq1[j] * seq2[j]) >> right_shifts; 27 | seq2 += step_seq2; 28 | *cross_correlation++ = corr; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/division_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the divisions 14 | * WebRtcSpl_DivU32U16() 15 | * WebRtcSpl_DivW32W16() 16 | * WebRtcSpl_DivW32W16ResW16() 17 | * WebRtcSpl_DivResultInQ31() 18 | * WebRtcSpl_DivW32HiLow() 19 | * 20 | * The description header can be found in signal_processing_library.h 21 | * 22 | */ 23 | 24 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 25 | #include "webrtc/rtc_base/sanitizer.h" 26 | 27 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) 28 | { 29 | // Guard against division with 0 30 | if (den != 0) 31 | { 32 | return (uint32_t)(num / den); 33 | } else 34 | { 35 | return (uint32_t)0xFFFFFFFF; 36 | } 37 | } 38 | 39 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) 40 | { 41 | // Guard against division with 0 42 | if (den != 0) 43 | { 44 | return (int32_t)(num / den); 45 | } else 46 | { 47 | return (int32_t)0x7FFFFFFF; 48 | } 49 | } 50 | 51 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) 52 | { 53 | // Guard against division with 0 54 | if (den != 0) 55 | { 56 | return (int16_t)(num / den); 57 | } else 58 | { 59 | return (int16_t)0x7FFF; 60 | } 61 | } 62 | 63 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) 64 | { 65 | int32_t L_num = num; 66 | int32_t L_den = den; 67 | int32_t div = 0; 68 | int k = 31; 69 | int change_sign = 0; 70 | 71 | if (num == 0) 72 | return 0; 73 | 74 | if (num < 0) 75 | { 76 | change_sign++; 77 | L_num = -num; 78 | } 79 | if (den < 0) 80 | { 81 | change_sign++; 82 | L_den = -den; 83 | } 84 | while (k--) 85 | { 86 | div <<= 1; 87 | L_num <<= 1; 88 | if (L_num >= L_den) 89 | { 90 | L_num -= L_den; 91 | div++; 92 | } 93 | } 94 | if (change_sign == 1) 95 | { 96 | div = -div; 97 | } 98 | return div; 99 | } 100 | 101 | int32_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 102 | WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) 103 | { 104 | int16_t approx, tmp_hi, tmp_low, num_hi, num_low; 105 | int32_t tmpW32; 106 | 107 | approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); 108 | // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) 109 | 110 | // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) 111 | tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); 112 | // tmpW32 = den * approx 113 | 114 | tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx)) 115 | // UBSan: 2147483647 - -2 cannot be represented in type 'int' 116 | 117 | // Store tmpW32 in hi and low format 118 | tmp_hi = (int16_t)(tmpW32 >> 16); 119 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 120 | 121 | // tmpW32 = 1/den in Q29 122 | tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; 123 | 124 | // 1/den in hi and low format 125 | tmp_hi = (int16_t)(tmpW32 >> 16); 126 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 127 | 128 | // Store num in hi and low format 129 | num_hi = (int16_t)(num >> 16); 130 | num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); 131 | 132 | // num * (1/den) by 32 bit multiplication (result in Q28) 133 | 134 | tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + 135 | (num_low * tmp_hi >> 15); 136 | 137 | // Put result in Q31 (convert from Q28) 138 | tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); 139 | 140 | return tmpW32; 141 | } 142 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/dot_product_with_scale.h" 12 | 13 | #include "webrtc/rtc_base/numerics/safe_conversions.h" 14 | 15 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, 16 | const int16_t* vector2, 17 | size_t length, 18 | int scaling) { 19 | int64_t sum = 0; 20 | size_t i = 0; 21 | 22 | /* Unroll the loop to improve performance. */ 23 | for (i = 0; i + 3 < length; i += 4) { 24 | sum += (vector1[i + 0] * vector2[i + 0]) >> scaling; 25 | sum += (vector1[i + 1] * vector2[i + 1]) >> scaling; 26 | sum += (vector1[i + 2] * vector2[i + 2]) >> scaling; 27 | sum += (vector1[i + 3] * vector2[i + 3]) >> scaling; 28 | } 29 | for (; i < length; i++) { 30 | sum += (vector1[i] * vector2[i]) >> scaling; 31 | } 32 | 33 | return rtc::saturated_cast(sum); 34 | } 35 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ 12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ 13 | 14 | #include 15 | #include 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | // Calculates the dot product between two (int16_t) vectors. 22 | // 23 | // Input: 24 | // - vector1 : Vector 1 25 | // - vector2 : Vector 2 26 | // - vector_length : Number of samples used in the dot product 27 | // - scaling : The number of right bit shifts to apply on each term 28 | // during calculation to avoid overflow, i.e., the 29 | // output will be in Q(-|scaling|) 30 | // 31 | // Return value : The dot product in Q(-scaling) 32 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, 33 | const int16_t* vector2, 34 | size_t length, 35 | int scaling); 36 | 37 | #ifdef __cplusplus 38 | } 39 | #endif // __cplusplus 40 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ 41 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/downsample_fast.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | #include "webrtc/rtc_base/checks.h" 14 | #include "webrtc/rtc_base/sanitizer.h" 15 | 16 | // TODO(Bjornv): Change the function parameter order to WebRTC code style. 17 | // C version of WebRtcSpl_DownsampleFast() for generic platforms. 18 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in, 19 | size_t data_in_length, 20 | int16_t* data_out, 21 | size_t data_out_length, 22 | const int16_t* __restrict coefficients, 23 | size_t coefficients_length, 24 | int factor, 25 | size_t delay) { 26 | int16_t* const original_data_out = data_out; 27 | size_t i = 0; 28 | size_t j = 0; 29 | int32_t out_s32 = 0; 30 | size_t endpos = delay + factor * (data_out_length - 1) + 1; 31 | 32 | // Return error if any of the running conditions doesn't meet. 33 | if (data_out_length == 0 || coefficients_length == 0 34 | || data_in_length < endpos) { 35 | return -1; 36 | } 37 | 38 | rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]), 39 | coefficients_length); 40 | 41 | for (i = delay; i < endpos; i += factor) { 42 | out_s32 = 2048; // Round value, 0.5 in Q12. 43 | 44 | for (j = 0; j < coefficients_length; j++) { 45 | // Negative overflow is permitted here, because this is 46 | // auto-regressive filters, and the state for each batch run is 47 | // stored in the "negative" positions of the output vector. 48 | rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j], 49 | sizeof(data_in[0]), 1); 50 | // out_s32 is in Q12 domain. 51 | out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j]; 52 | } 53 | 54 | out_s32 >>= 12; // Q0. 55 | 56 | // Saturate and store the output. 57 | *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); 58 | } 59 | 60 | RTC_DCHECK_EQ(original_data_out + data_out_length, data_out); 61 | rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]), 62 | data_out_length); 63 | 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/energy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_Energy(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | int32_t WebRtcSpl_Energy(int16_t* vector, 21 | size_t vector_length, 22 | int* scale_factor) 23 | { 24 | int32_t en = 0; 25 | size_t i; 26 | int scaling = 27 | WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); 28 | size_t looptimes = vector_length; 29 | int16_t *vectorptr = vector; 30 | 31 | for (i = 0; i < looptimes; i++) 32 | { 33 | en += (*vectorptr * *vectorptr) >> scaling; 34 | vectorptr++; 35 | } 36 | *scale_factor = scaling; 37 | 38 | return en; 39 | } 40 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/get_scaling_square.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_GetScalingSquare(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, 21 | size_t in_vector_length, 22 | size_t times) 23 | { 24 | int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); 25 | size_t i; 26 | int16_t smax = -1; 27 | int16_t sabs; 28 | int16_t *sptr = in_vector; 29 | int16_t t; 30 | size_t looptimes = in_vector_length; 31 | 32 | for (i = looptimes; i > 0; i--) 33 | { 34 | sabs = (*sptr > 0 ? *sptr++ : -*sptr++); 35 | smax = (sabs > smax ? sabs : smax); 36 | } 37 | t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); 38 | 39 | if (smax == 0) 40 | { 41 | return 0; // Since norm(0) returns 0 42 | } else 43 | { 44 | return (t > nbits) ? 0 : nbits - t; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/real_fft.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 13 | 14 | #include 15 | 16 | // For ComplexFFT(), the maximum fft order is 10; 17 | // WebRTC APM uses orders of only 7 and 8. 18 | enum { kMaxFFTOrder = 10 }; 19 | 20 | struct RealFFT; 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order); 27 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self); 28 | 29 | // Compute an FFT for a real-valued signal of length of 2^order, 30 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the 31 | // specification structure, which must be initialized prior to calling the FFT 32 | // function with WebRtcSpl_CreateRealFFT(). 33 | // The relationship between the input and output sequences can 34 | // be expressed in terms of the DFT, i.e.: 35 | // x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) 36 | // n=0,1,2,...N-1 37 | // N=2^order. 38 | // The conjugate-symmetric output sequence is represented using a CCS vector, 39 | // which is of length N+2, and is organized as follows: 40 | // Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 41 | // Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 42 | // where R[n] and I[n], respectively, denote the real and imaginary components 43 | // for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. 44 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to 45 | // the foldover frequency. 46 | // 47 | // Input Arguments: 48 | // self - pointer to preallocated and initialized FFT specification structure. 49 | // real_data_in - the input signal. For an ARM Neon platform, it must be 50 | // aligned on a 32-byte boundary. 51 | // 52 | // Output Arguments: 53 | // complex_data_out - the output complex signal with (2^order + 2) 16-bit 54 | // elements. For an ARM Neon platform, it must be different 55 | // from real_data_in, and aligned on a 32-byte boundary. 56 | // 57 | // Return Value: 58 | // 0 - FFT calculation is successful. 59 | // -1 - Error with bad arguments (null pointers). 60 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self, 61 | const int16_t* real_data_in, 62 | int16_t* complex_data_out); 63 | 64 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of 65 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by 66 | // the specification structure, which must be initialized prior to calling the 67 | // FFT function with WebRtcSpl_CreateRealFFT(). 68 | // For a transform of length M, the input sequence is represented using a packed 69 | // CCS vector of length M+2, which is explained in the comments for 70 | // WebRtcSpl_RealForwardFFTC above. 71 | // 72 | // Input Arguments: 73 | // self - pointer to preallocated and initialized FFT specification structure. 74 | // complex_data_in - the input complex signal with (2^order + 2) 16-bit 75 | // elements. For an ARM Neon platform, it must be aligned on 76 | // a 32-byte boundary. 77 | // 78 | // Output Arguments: 79 | // real_data_out - the output real signal. For an ARM Neon platform, it must 80 | // be different to complex_data_in, and aligned on a 32-byte 81 | // boundary. 82 | // 83 | // Return Value: 84 | // 0 or a positive number - a value that the elements in the |real_data_out| 85 | // should be shifted left with in order to get 86 | // correct physical values. 87 | // -1 - Error with bad arguments (null pointers). 88 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self, 89 | const int16_t* complex_data_in, 90 | int16_t* real_data_out); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 97 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/spl_inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // This header file includes the inline functions in 12 | // the fix point signal processing library. 13 | 14 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 15 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 16 | 17 | #include "webrtc/rtc_base/compile_assert_c.h" 18 | 19 | extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64]; 20 | 21 | // Don't call this directly except in tests! 22 | static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) { 23 | // Normalize n by rounding up to the nearest number that is a sequence of 0 24 | // bits followed by a sequence of 1 bits. This number has the same number of 25 | // leading zeros as the original n. There are exactly 33 such values. 26 | n |= n >> 1; 27 | n |= n >> 2; 28 | n |= n >> 4; 29 | n |= n >> 8; 30 | n |= n >> 16; 31 | 32 | // Multiply the modified n with a constant selected (by exhaustive search) 33 | // such that each of the 33 possible values of n give a product whose 6 most 34 | // significant bits are unique. Then look up the answer in the table. 35 | return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; 36 | } 37 | 38 | // Don't call this directly except in tests! 39 | static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) { 40 | const int leading_zeros = n >> 32 == 0 ? 32 : 0; 41 | return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin( 42 | (uint32_t)(n >> (32 - leading_zeros))); 43 | } 44 | 45 | // Returns the number of leading zero bits in the argument. 46 | static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) { 47 | #ifdef __GNUC__ 48 | RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); 49 | return n == 0 ? 32 : __builtin_clz(n); 50 | #else 51 | return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n); 52 | #endif 53 | } 54 | 55 | // Returns the number of leading zero bits in the argument. 56 | static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) { 57 | #ifdef __GNUC__ 58 | RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT 59 | return n == 0 ? 64 : __builtin_clzll(n); 60 | #else 61 | return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n); 62 | #endif 63 | } 64 | 65 | #ifdef WEBRTC_ARCH_ARM_V7 66 | #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h" 67 | #else 68 | 69 | #if defined(MIPS32_LE) 70 | #include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h" 71 | #endif 72 | 73 | #if !defined(MIPS_DSP_R1_LE) 74 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { 75 | int16_t out16 = (int16_t)value32; 76 | 77 | if (value32 > 32767) 78 | out16 = 32767; 79 | else if (value32 < -32768) 80 | out16 = -32768; 81 | 82 | return out16; 83 | } 84 | 85 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) { 86 | // Do the addition in unsigned numbers, since signed overflow is undefined 87 | // behavior. 88 | const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b); 89 | 90 | // a + b can't overflow if a and b have different signs. If they have the 91 | // same sign, a + b also has the same sign iff it didn't overflow. 92 | if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) { 93 | // The direction of the overflow is obvious from the sign of a + b. 94 | return sum < 0 ? INT32_MAX : INT32_MIN; 95 | } 96 | return sum; 97 | } 98 | 99 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) { 100 | // Do the subtraction in unsigned numbers, since signed overflow is undefined 101 | // behavior. 102 | const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b); 103 | 104 | // a - b can't overflow if a and b have the same sign. If they have different 105 | // signs, a - b has the same sign as a iff it didn't overflow. 106 | if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) { 107 | // The direction of the overflow is obvious from the sign of a - b. 108 | return diff < 0 ? INT32_MAX : INT32_MIN; 109 | } 110 | return diff; 111 | } 112 | 113 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { 114 | return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b); 115 | } 116 | 117 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { 118 | return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2); 119 | } 120 | #endif // #if !defined(MIPS_DSP_R1_LE) 121 | 122 | #if !defined(MIPS32_LE) 123 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { 124 | return 32 - WebRtcSpl_CountLeadingZeros32(n); 125 | } 126 | 127 | // Return the number of steps a can be left-shifted without overflow, 128 | // or 0 if a == 0. 129 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) { 130 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1; 131 | } 132 | 133 | // Return the number of steps a can be left-shifted without overflow, 134 | // or 0 if a == 0. 135 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { 136 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a); 137 | } 138 | 139 | // Return the number of steps a can be left-shifted without overflow, 140 | // or 0 if a == 0. 141 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) { 142 | const int32_t a32 = a; 143 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17; 144 | } 145 | 146 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { 147 | return (a * b + c); 148 | } 149 | #endif // #if !defined(MIPS32_LE) 150 | 151 | #endif // WEBRTC_ARCH_ARM_V7 152 | 153 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 154 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/min_max_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file contains the implementation of functions 13 | * WebRtcSpl_MaxAbsValueW16C() 14 | * WebRtcSpl_MaxAbsValueW32C() 15 | * WebRtcSpl_MaxValueW16C() 16 | * WebRtcSpl_MaxValueW32C() 17 | * WebRtcSpl_MinValueW16C() 18 | * WebRtcSpl_MinValueW32C() 19 | * WebRtcSpl_MaxAbsIndexW16() 20 | * WebRtcSpl_MaxIndexW16() 21 | * WebRtcSpl_MaxIndexW32() 22 | * WebRtcSpl_MinIndexW16() 23 | * WebRtcSpl_MinIndexW32() 24 | * 25 | */ 26 | 27 | #include 28 | 29 | #include "webrtc/rtc_base/checks.h" 30 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 31 | 32 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine 33 | // WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) 34 | // TODO(kma): Move the next six functions into min_max_operations_c.c. 35 | 36 | // Maximum absolute value of word16 vector. C version for generic platforms. 37 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { 38 | size_t i = 0; 39 | int absolute = 0, maximum = 0; 40 | 41 | RTC_DCHECK_GT(length, 0); 42 | 43 | for (i = 0; i < length; i++) { 44 | absolute = abs((int)vector[i]); 45 | 46 | if (absolute > maximum) { 47 | maximum = absolute; 48 | } 49 | } 50 | 51 | // Guard the case for abs(-32768). 52 | if (maximum > WEBRTC_SPL_WORD16_MAX) { 53 | maximum = WEBRTC_SPL_WORD16_MAX; 54 | } 55 | 56 | return (int16_t)maximum; 57 | } 58 | 59 | // Maximum absolute value of word32 vector. C version for generic platforms. 60 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { 61 | // Use uint32_t for the local variables, to accommodate the return value 62 | // of abs(0x80000000), which is 0x80000000. 63 | 64 | uint32_t absolute = 0, maximum = 0; 65 | size_t i = 0; 66 | 67 | RTC_DCHECK_GT(length, 0); 68 | 69 | for (i = 0; i < length; i++) { 70 | absolute = abs((int)vector[i]); 71 | if (absolute > maximum) { 72 | maximum = absolute; 73 | } 74 | } 75 | 76 | maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); 77 | 78 | return (int32_t)maximum; 79 | } 80 | 81 | // Maximum value of word16 vector. C version for generic platforms. 82 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { 83 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 84 | size_t i = 0; 85 | 86 | RTC_DCHECK_GT(length, 0); 87 | 88 | for (i = 0; i < length; i++) { 89 | if (vector[i] > maximum) 90 | maximum = vector[i]; 91 | } 92 | return maximum; 93 | } 94 | 95 | // Maximum value of word32 vector. C version for generic platforms. 96 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { 97 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 98 | size_t i = 0; 99 | 100 | RTC_DCHECK_GT(length, 0); 101 | 102 | for (i = 0; i < length; i++) { 103 | if (vector[i] > maximum) 104 | maximum = vector[i]; 105 | } 106 | return maximum; 107 | } 108 | 109 | // Minimum value of word16 vector. C version for generic platforms. 110 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { 111 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 112 | size_t i = 0; 113 | 114 | RTC_DCHECK_GT(length, 0); 115 | 116 | for (i = 0; i < length; i++) { 117 | if (vector[i] < minimum) 118 | minimum = vector[i]; 119 | } 120 | return minimum; 121 | } 122 | 123 | // Minimum value of word32 vector. C version for generic platforms. 124 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { 125 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 126 | size_t i = 0; 127 | 128 | RTC_DCHECK_GT(length, 0); 129 | 130 | for (i = 0; i < length; i++) { 131 | if (vector[i] < minimum) 132 | minimum = vector[i]; 133 | } 134 | return minimum; 135 | } 136 | 137 | // Index of maximum absolute value in a word16 vector. 138 | size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { 139 | // Use type int for local variables, to accomodate the value of abs(-32768). 140 | 141 | size_t i = 0, index = 0; 142 | int absolute = 0, maximum = 0; 143 | 144 | RTC_DCHECK_GT(length, 0); 145 | 146 | for (i = 0; i < length; i++) { 147 | absolute = abs((int)vector[i]); 148 | 149 | if (absolute > maximum) { 150 | maximum = absolute; 151 | index = i; 152 | } 153 | } 154 | 155 | return index; 156 | } 157 | 158 | // Index of maximum value in a word16 vector. 159 | size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { 160 | size_t i = 0, index = 0; 161 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 162 | 163 | RTC_DCHECK_GT(length, 0); 164 | 165 | for (i = 0; i < length; i++) { 166 | if (vector[i] > maximum) { 167 | maximum = vector[i]; 168 | index = i; 169 | } 170 | } 171 | 172 | return index; 173 | } 174 | 175 | // Index of maximum value in a word32 vector. 176 | size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { 177 | size_t i = 0, index = 0; 178 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 179 | 180 | RTC_DCHECK_GT(length, 0); 181 | 182 | for (i = 0; i < length; i++) { 183 | if (vector[i] > maximum) { 184 | maximum = vector[i]; 185 | index = i; 186 | } 187 | } 188 | 189 | return index; 190 | } 191 | 192 | // Index of minimum value in a word16 vector. 193 | size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { 194 | size_t i = 0, index = 0; 195 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 196 | 197 | RTC_DCHECK_GT(length, 0); 198 | 199 | for (i = 0; i < length; i++) { 200 | if (vector[i] < minimum) { 201 | minimum = vector[i]; 202 | index = i; 203 | } 204 | } 205 | 206 | return index; 207 | } 208 | 209 | // Index of minimum value in a word32 vector. 210 | size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { 211 | size_t i = 0, index = 0; 212 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 213 | 214 | RTC_DCHECK_GT(length, 0); 215 | 216 | for (i = 0; i < length; i++) { 217 | if (vector[i] < minimum) { 218 | minimum = vector[i]; 219 | index = i; 220 | } 221 | } 222 | 223 | return index; 224 | } 225 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_48khz.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains resampling functions between 48 kHz and nb/wb. 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" 21 | 22 | //////////////////////////// 23 | ///// 48 kHz -> 16 kHz ///// 24 | //////////////////////////// 25 | 26 | // 48 -> 16 resampler 27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, 28 | WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) 29 | { 30 | ///// 48 --> 48(LP) ///// 31 | // int16_t in[480] 32 | // int32_t out[480] 33 | ///// 34 | WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); 35 | 36 | ///// 48 --> 32 ///// 37 | // int32_t in[480] 38 | // int32_t out[320] 39 | ///// 40 | // copy state to and from input array 41 | memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); 42 | memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); 43 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); 44 | 45 | ///// 32 --> 16 ///// 46 | // int32_t in[320] 47 | // int16_t out[160] 48 | ///// 49 | WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); 50 | } 51 | 52 | // initialize state of 48 -> 16 resampler 53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) 54 | { 55 | memset(state->S_48_48, 0, 16 * sizeof(int32_t)); 56 | memset(state->S_48_32, 0, 8 * sizeof(int32_t)); 57 | memset(state->S_32_16, 0, 8 * sizeof(int32_t)); 58 | } 59 | 60 | //////////////////////////// 61 | ///// 16 kHz -> 48 kHz ///// 62 | //////////////////////////// 63 | 64 | // 16 -> 48 resampler 65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, 66 | WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) 67 | { 68 | ///// 16 --> 32 ///// 69 | // int16_t in[160] 70 | // int32_t out[320] 71 | ///// 72 | WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); 73 | 74 | ///// 32 --> 24 ///// 75 | // int32_t in[320] 76 | // int32_t out[240] 77 | // copy state to and from input array 78 | ///// 79 | memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); 80 | memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); 81 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); 82 | 83 | ///// 24 --> 48 ///// 84 | // int32_t in[240] 85 | // int16_t out[480] 86 | ///// 87 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 88 | } 89 | 90 | // initialize state of 16 -> 48 resampler 91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) 92 | { 93 | memset(state->S_16_32, 0, 8 * sizeof(int32_t)); 94 | memset(state->S_32_24, 0, 8 * sizeof(int32_t)); 95 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 96 | } 97 | 98 | //////////////////////////// 99 | ///// 48 kHz -> 8 kHz ///// 100 | //////////////////////////// 101 | 102 | // 48 -> 8 resampler 103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, 104 | WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) 105 | { 106 | ///// 48 --> 24 ///// 107 | // int16_t in[480] 108 | // int32_t out[240] 109 | ///// 110 | WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); 111 | 112 | ///// 24 --> 24(LP) ///// 113 | // int32_t in[240] 114 | // int32_t out[240] 115 | ///// 116 | WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); 117 | 118 | ///// 24 --> 16 ///// 119 | // int32_t in[240] 120 | // int32_t out[160] 121 | ///// 122 | // copy state to and from input array 123 | memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); 124 | memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); 125 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); 126 | 127 | ///// 16 --> 8 ///// 128 | // int32_t in[160] 129 | // int16_t out[80] 130 | ///// 131 | WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); 132 | } 133 | 134 | // initialize state of 48 -> 8 resampler 135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) 136 | { 137 | memset(state->S_48_24, 0, 8 * sizeof(int32_t)); 138 | memset(state->S_24_24, 0, 16 * sizeof(int32_t)); 139 | memset(state->S_24_16, 0, 8 * sizeof(int32_t)); 140 | memset(state->S_16_8, 0, 8 * sizeof(int32_t)); 141 | } 142 | 143 | //////////////////////////// 144 | ///// 8 kHz -> 48 kHz ///// 145 | //////////////////////////// 146 | 147 | // 8 -> 48 resampler 148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, 149 | WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) 150 | { 151 | ///// 8 --> 16 ///// 152 | // int16_t in[80] 153 | // int32_t out[160] 154 | ///// 155 | WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); 156 | 157 | ///// 16 --> 12 ///// 158 | // int32_t in[160] 159 | // int32_t out[120] 160 | ///// 161 | // copy state to and from input array 162 | memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); 163 | memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); 164 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); 165 | 166 | ///// 12 --> 24 ///// 167 | // int32_t in[120] 168 | // int16_t out[240] 169 | ///// 170 | WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); 171 | 172 | ///// 24 --> 48 ///// 173 | // int32_t in[240] 174 | // int16_t out[480] 175 | ///// 176 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 177 | } 178 | 179 | // initialize state of 8 -> 48 resampler 180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) 181 | { 182 | memset(state->S_8_16, 0, 8 * sizeof(int32_t)); 183 | memset(state->S_16_12, 0, 8 * sizeof(int32_t)); 184 | memset(state->S_12_24, 0, 8 * sizeof(int32_t)); 185 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 186 | } 187 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This header file contains some internal resampling functions. 13 | * 14 | */ 15 | 16 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 17 | #define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 18 | 19 | #include 20 | 21 | /******************************************************************* 22 | * resample_by_2_fast.c 23 | * Functions for internal use in the other resample functions 24 | ******************************************************************/ 25 | void WebRtcSpl_DownBy2IntToShort(int32_t* in, 26 | int32_t len, 27 | int16_t* out, 28 | int32_t* state); 29 | 30 | void WebRtcSpl_DownBy2ShortToInt(const int16_t* in, 31 | int32_t len, 32 | int32_t* out, 33 | int32_t* state); 34 | 35 | void WebRtcSpl_UpBy2ShortToInt(const int16_t* in, 36 | int32_t len, 37 | int32_t* out, 38 | int32_t* state); 39 | 40 | void WebRtcSpl_UpBy2IntToInt(const int32_t* in, 41 | int32_t len, 42 | int32_t* out, 43 | int32_t* state); 44 | 45 | void WebRtcSpl_UpBy2IntToShort(const int32_t* in, 46 | int32_t len, 47 | int16_t* out, 48 | int32_t* state); 49 | 50 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, 51 | int32_t len, 52 | int32_t* out, 53 | int32_t* state); 54 | 55 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in, 56 | int32_t len, 57 | int32_t* out, 58 | int32_t* state); 59 | 60 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 61 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* The global function contained in this file initializes SPL function 12 | * pointers, currently only for ARM platforms. 13 | * 14 | * Some code came from common/rtcd.c in the WebM project. 15 | */ 16 | 17 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 18 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" 19 | 20 | /* Declare function pointers. */ 21 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; 22 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; 23 | MaxValueW16 WebRtcSpl_MaxValueW16; 24 | MaxValueW32 WebRtcSpl_MaxValueW32; 25 | MinValueW16 WebRtcSpl_MinValueW16; 26 | MinValueW32 WebRtcSpl_MinValueW32; 27 | CrossCorrelation WebRtcSpl_CrossCorrelation; 28 | DownsampleFast WebRtcSpl_DownsampleFast; 29 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; 30 | 31 | #if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE) 32 | /* Initialize function pointers to the generic C version. */ 33 | static void InitPointersToC(void) { 34 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; 35 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 36 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; 37 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; 38 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; 39 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; 40 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; 41 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; 42 | WebRtcSpl_ScaleAndAddVectorsWithRound = 43 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 44 | } 45 | #endif 46 | 47 | #if defined(WEBRTC_HAS_NEON) 48 | /* Initialize function pointers to the Neon version. */ 49 | static void InitPointersToNeon(void) { 50 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; 51 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; 52 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; 53 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; 54 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; 55 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; 56 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon; 57 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; 58 | WebRtcSpl_ScaleAndAddVectorsWithRound = 59 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 60 | } 61 | #endif 62 | 63 | #if defined(MIPS32_LE) 64 | /* Initialize function pointers to the MIPS version. */ 65 | static void InitPointersToMIPS(void) { 66 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; 67 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; 68 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; 69 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; 70 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; 71 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; 72 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; 73 | #if defined(MIPS_DSP_R1_LE) 74 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; 75 | WebRtcSpl_ScaleAndAddVectorsWithRound = 76 | WebRtcSpl_ScaleAndAddVectorsWithRound_mips; 77 | #else 78 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 79 | WebRtcSpl_ScaleAndAddVectorsWithRound = 80 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 81 | #endif 82 | } 83 | #endif 84 | 85 | static void InitFunctionPointers(void) { 86 | #if defined(WEBRTC_HAS_NEON) 87 | InitPointersToNeon(); 88 | #elif defined(MIPS32_LE) 89 | InitPointersToMIPS(); 90 | #else 91 | InitPointersToC(); 92 | #endif /* WEBRTC_HAS_NEON */ 93 | } 94 | 95 | #if defined(WEBRTC_POSIX) 96 | #include 97 | 98 | static void once(void (*func)(void)) { 99 | static pthread_once_t lock = PTHREAD_ONCE_INIT; 100 | pthread_once(&lock, func); 101 | } 102 | 103 | #elif defined(_WIN32) 104 | #include 105 | 106 | static void once(void (*func)(void)) { 107 | /* Didn't use InitializeCriticalSection() since there's no race-free context 108 | * in which to execute it. 109 | * 110 | * TODO(kma): Change to different implementation (e.g. 111 | * InterlockedCompareExchangePointer) to avoid issues similar to 112 | * http://code.google.com/p/webm/issues/detail?id=467. 113 | */ 114 | static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0}; 115 | static int done = 0; 116 | 117 | EnterCriticalSection(&lock); 118 | if (!done) { 119 | func(); 120 | done = 1; 121 | } 122 | LeaveCriticalSection(&lock); 123 | } 124 | 125 | /* There's no fallback version as an #else block here to ensure thread safety. 126 | * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build 127 | * system should pick it up. 128 | */ 129 | #endif /* WEBRTC_POSIX */ 130 | 131 | void WebRtcSpl_Init(void) { 132 | once(InitFunctionPointers); 133 | } 134 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_inl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include 12 | 13 | #include "webrtc/common_audio/signal_processing/include/spl_inl.h" 14 | 15 | // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n 16 | // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at 17 | // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in 18 | // n. 19 | const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = { 20 | 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, 21 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, 22 | 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, 23 | -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, 24 | }; 25 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_sqrt.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_Sqrt(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/rtc_base/checks.h" 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 | 21 | int32_t WebRtcSpl_SqrtLocal(int32_t in); 22 | 23 | int32_t WebRtcSpl_SqrtLocal(int32_t in) 24 | { 25 | 26 | int16_t x_half, t16; 27 | int32_t A, B, x2; 28 | 29 | /* The following block performs: 30 | y=in/2 31 | x=y-2^30 32 | x_half=x/2^31 33 | t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) 34 | + 0.875*((x_half)^5) 35 | */ 36 | 37 | B = in / 2; 38 | 39 | B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 40 | x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 41 | B = B + ((int32_t)0x40000000); // B = 1 + x/2 42 | B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) 43 | 44 | x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 45 | A = -x2; // A = -(x/2)^2 46 | B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 47 | 48 | A >>= 16; 49 | A = A * A * 2; // A = (x/2)^4 50 | t16 = (int16_t)(A >> 16); 51 | B += -20480 * t16 * 2; // B = B - 0.625*A 52 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 53 | 54 | A = x_half * t16 * 2; // A = (x/2)^5 55 | t16 = (int16_t)(A >> 16); 56 | B += 28672 * t16 * 2; // B = B + 0.875*A 57 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 58 | 59 | t16 = (int16_t)(x2 >> 16); 60 | A = x_half * t16 * 2; // A = x/2^3 61 | 62 | B = B + (A >> 1); // B = B + 0.5*A 63 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 64 | 65 | B = B + ((int32_t)32768); // Round off bit 66 | 67 | return B; 68 | } 69 | 70 | int32_t WebRtcSpl_Sqrt(int32_t value) 71 | { 72 | /* 73 | Algorithm: 74 | 75 | Six term Taylor Series is used here to compute the square root of a number 76 | y^0.5 = (1+x)^0.5 where x = y-1 77 | = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) 78 | 0.5 <= x < 1 79 | 80 | Example of how the algorithm works, with ut=sqrt(in), and 81 | with in=73632 and ut=271 (even shift value case): 82 | 83 | in=73632 84 | y= in/131072 85 | x=y-1 86 | t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) 87 | ut=t*(1/sqrt(2))*512 88 | 89 | or: 90 | 91 | in=73632 92 | in2=73632*2^14 93 | y= in2/2^31 94 | x=y-1 95 | t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) 96 | ut=t*(1/sqrt(2)) 97 | ut2=ut*2^9 98 | 99 | which gives: 100 | 101 | in = 73632 102 | in2 = 1206386688 103 | y = 0.56176757812500 104 | x = -0.43823242187500 105 | t = 0.74973506527313 106 | ut = 0.53014274874797 107 | ut2 = 2.714330873589594e+002 108 | 109 | or: 110 | 111 | in=73632 112 | in2=73632*2^14 113 | y=in2/2 114 | x=y-2^30 115 | x_half=x/2^31 116 | t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) 117 | + 0.875*((x_half)^5) 118 | ut=t*(1/sqrt(2)) 119 | ut2=ut*2^9 120 | 121 | which gives: 122 | 123 | in = 73632 124 | in2 = 1206386688 125 | y = 603193344 126 | x = -470548480 127 | x_half = -0.21911621093750 128 | t = 0.74973506527313 129 | ut = 0.53014274874797 130 | ut2 = 2.714330873589594e+002 131 | 132 | */ 133 | 134 | int16_t x_norm, nshift, t16, sh; 135 | int32_t A; 136 | 137 | int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) 138 | 139 | A = value; 140 | 141 | // The convention in this function is to calculate sqrt(abs(A)). Negate the 142 | // input if it is negative. 143 | if (A < 0) { 144 | if (A == WEBRTC_SPL_WORD32_MIN) { 145 | // This number cannot be held in an int32_t after negating. 146 | // Map it to the maximum positive value. 147 | A = WEBRTC_SPL_WORD32_MAX; 148 | } else { 149 | A = -A; 150 | } 151 | } else if (A == 0) { 152 | return 0; // sqrt(0) = 0 153 | } 154 | 155 | sh = WebRtcSpl_NormW32(A); // # shifts to normalize A 156 | A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A 157 | if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) 158 | { 159 | A = A + ((int32_t)32768); // Round off bit 160 | } else 161 | { 162 | A = WEBRTC_SPL_WORD32_MAX; 163 | } 164 | 165 | x_norm = (int16_t)(A >> 16); // x_norm = AH 166 | 167 | nshift = (sh / 2); 168 | RTC_DCHECK_GE(nshift, 0); 169 | 170 | A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); 171 | A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) 172 | A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) 173 | 174 | if (2 * nshift == sh) { 175 | // Even shift value case 176 | 177 | t16 = (int16_t)(A >> 16); // t16 = AH 178 | 179 | A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 180 | A = A + ((int32_t)32768); // Round off 181 | A = A & ((int32_t)0x7fff0000); // Round off 182 | 183 | A >>= 15; // A = A>>16 184 | 185 | } else 186 | { 187 | A >>= 16; // A = A>>16 188 | } 189 | 190 | A = A & ((int32_t)0x0000ffff); 191 | A >>= nshift; // De-normalize the result. 192 | 193 | return A; 194 | } 195 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/signal_processing/vector_scaling_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the functions 14 | * WebRtcSpl_VectorBitShiftW16() 15 | * WebRtcSpl_VectorBitShiftW32() 16 | * WebRtcSpl_VectorBitShiftW32ToW16() 17 | * WebRtcSpl_ScaleVector() 18 | * WebRtcSpl_ScaleVectorWithSat() 19 | * WebRtcSpl_ScaleAndAddVectors() 20 | * WebRtcSpl_ScaleAndAddVectorsWithRoundC() 21 | */ 22 | 23 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 24 | 25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, 26 | const int16_t *in, int16_t right_shifts) 27 | { 28 | size_t i; 29 | 30 | if (right_shifts > 0) 31 | { 32 | for (i = length; i > 0; i--) 33 | { 34 | (*res++) = ((*in++) >> right_shifts); 35 | } 36 | } else 37 | { 38 | for (i = length; i > 0; i--) 39 | { 40 | (*res++) = ((*in++) * (1 << (-right_shifts))); 41 | } 42 | } 43 | } 44 | 45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, 46 | size_t vector_length, 47 | const int32_t *in_vector, 48 | int16_t right_shifts) 49 | { 50 | size_t i; 51 | 52 | if (right_shifts > 0) 53 | { 54 | for (i = vector_length; i > 0; i--) 55 | { 56 | (*out_vector++) = ((*in_vector++) >> right_shifts); 57 | } 58 | } else 59 | { 60 | for (i = vector_length; i > 0; i--) 61 | { 62 | (*out_vector++) = ((*in_vector++) << (-right_shifts)); 63 | } 64 | } 65 | } 66 | 67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, 68 | const int32_t* in, int right_shifts) { 69 | size_t i; 70 | int32_t tmp_w32; 71 | 72 | if (right_shifts >= 0) { 73 | for (i = length; i > 0; i--) { 74 | tmp_w32 = (*in++) >> right_shifts; 75 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 76 | } 77 | } else { 78 | int left_shifts = -right_shifts; 79 | for (i = length; i > 0; i--) { 80 | tmp_w32 = (*in++) << left_shifts; 81 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 82 | } 83 | } 84 | } 85 | 86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, 87 | int16_t gain, size_t in_vector_length, 88 | int16_t right_shifts) 89 | { 90 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 91 | size_t i; 92 | const int16_t *inptr; 93 | int16_t *outptr; 94 | 95 | inptr = in_vector; 96 | outptr = out_vector; 97 | 98 | for (i = 0; i < in_vector_length; i++) 99 | { 100 | *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); 101 | } 102 | } 103 | 104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, 105 | int16_t gain, size_t in_vector_length, 106 | int16_t right_shifts) 107 | { 108 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 109 | size_t i; 110 | const int16_t *inptr; 111 | int16_t *outptr; 112 | 113 | inptr = in_vector; 114 | outptr = out_vector; 115 | 116 | for (i = 0; i < in_vector_length; i++) { 117 | *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); 118 | } 119 | } 120 | 121 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, 122 | const int16_t *in2, int16_t gain2, int shift2, 123 | int16_t *out, size_t vector_length) 124 | { 125 | // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 126 | size_t i; 127 | const int16_t *in1ptr; 128 | const int16_t *in2ptr; 129 | int16_t *outptr; 130 | 131 | in1ptr = in1; 132 | in2ptr = in2; 133 | outptr = out; 134 | 135 | for (i = 0; i < vector_length; i++) 136 | { 137 | *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + 138 | (int16_t)((gain2 * *in2ptr++) >> shift2); 139 | } 140 | } 141 | 142 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. 143 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, 144 | int16_t in_vector1_scale, 145 | const int16_t* in_vector2, 146 | int16_t in_vector2_scale, 147 | int right_shifts, 148 | int16_t* out_vector, 149 | size_t length) { 150 | size_t i = 0; 151 | int round_value = (1 << right_shifts) >> 1; 152 | 153 | if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || 154 | length == 0 || right_shifts < 0) { 155 | return -1; 156 | } 157 | 158 | for (i = 0; i < length; i++) { 159 | out_vector[i] = (int16_t)(( 160 | in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + 161 | round_value) >> right_shifts); 162 | } 163 | 164 | return 0; 165 | } 166 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Wilco Dijkstra, 1996. The following email exchange establishes the 3 | * license. 4 | * 5 | * From: Wilco Dijkstra 6 | * Date: Fri, Jun 24, 2011 at 3:20 AM 7 | * Subject: Re: sqrt routine 8 | * To: Kevin Ma 9 | * Hi Kevin, 10 | * Thanks for asking. Those routines are public domain (originally posted to 11 | * comp.sys.arm a long time ago), so you can use them freely for any purpose. 12 | * Cheers, 13 | * Wilco 14 | * 15 | * ----- Original Message ----- 16 | * From: "Kevin Ma" 17 | * To: 18 | * Sent: Thursday, June 23, 2011 11:44 PM 19 | * Subject: Fwd: sqrt routine 20 | * Hi Wilco, 21 | * I saw your sqrt routine from several web sites, including 22 | * http://www.finesse.demon.co.uk/steven/sqrt.html. 23 | * Just wonder if there's any copyright information with your Successive 24 | * approximation routines, or if I can freely use it for any purpose. 25 | * Thanks. 26 | * Kevin 27 | */ 28 | 29 | // Minor modifications in code style for WebRTC, 2012. 30 | 31 | #include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h" 32 | 33 | /* 34 | * Algorithm: 35 | * Successive approximation of the equation (root + delta) ^ 2 = N 36 | * until delta < 1. If delta < 1 we have the integer part of SQRT (N). 37 | * Use delta = 2^i for i = 15 .. 0. 38 | * 39 | * Output precision is 16 bits. Note for large input values (close to 40 | * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) 41 | * contains the MSB information (a non-sign value). Do with caution 42 | * if you need to cast the output to int16_t type. 43 | * 44 | * If the input value is negative, it returns 0. 45 | */ 46 | 47 | #define WEBRTC_SPL_SQRT_ITER(N) \ 48 | try1 = root + (1 << (N)); \ 49 | if (value >= try1 << (N)) \ 50 | { \ 51 | value -= try1 << (N); \ 52 | root |= 2 << (N); \ 53 | } 54 | 55 | int32_t WebRtcSpl_SqrtFloor(int32_t value) 56 | { 57 | int32_t root = 0, try1; 58 | 59 | WEBRTC_SPL_SQRT_ITER (15); 60 | WEBRTC_SPL_SQRT_ITER (14); 61 | WEBRTC_SPL_SQRT_ITER (13); 62 | WEBRTC_SPL_SQRT_ITER (12); 63 | WEBRTC_SPL_SQRT_ITER (11); 64 | WEBRTC_SPL_SQRT_ITER (10); 65 | WEBRTC_SPL_SQRT_ITER ( 9); 66 | WEBRTC_SPL_SQRT_ITER ( 8); 67 | WEBRTC_SPL_SQRT_ITER ( 7); 68 | WEBRTC_SPL_SQRT_ITER ( 6); 69 | WEBRTC_SPL_SQRT_ITER ( 5); 70 | WEBRTC_SPL_SQRT_ITER ( 4); 71 | WEBRTC_SPL_SQRT_ITER ( 3); 72 | WEBRTC_SPL_SQRT_ITER ( 2); 73 | WEBRTC_SPL_SQRT_ITER ( 1); 74 | WEBRTC_SPL_SQRT_ITER ( 0); 75 | 76 | return root >> 1; 77 | } 78 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include 12 | 13 | // 14 | // WebRtcSpl_SqrtFloor(...) 15 | // 16 | // Returns the square root of the input value |value|. The precision of this 17 | // function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer. 18 | // If |value| is a negative number then 0 is returned. 19 | // 20 | // Algorithm: 21 | // 22 | // An iterative 4 cylce/bit routine 23 | // 24 | // Input: 25 | // - value : Value to calculate sqrt of 26 | // 27 | // Return value : Result of the sqrt calculation 28 | // 29 | int32_t WebRtcSpl_SqrtFloor(int32_t value); 30 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/include/webrtc_vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This header file includes the VAD API calls. Specific function calls are 13 | * given below. 14 | */ 15 | 16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18 | 19 | #include 20 | #include 21 | 22 | typedef struct WebRtcVadInst VadInst; 23 | 24 | #ifdef __cplusplus 25 | extern "C" { 26 | #endif 27 | 28 | // Creates an instance to the VAD structure. 29 | VadInst* WebRtcVad_Create(void); 30 | 31 | // Frees the dynamic memory of a specified VAD instance. 32 | // 33 | // - handle [i] : Pointer to VAD instance that should be freed. 34 | void WebRtcVad_Free(VadInst* handle); 35 | 36 | // Initializes a VAD instance. 37 | // 38 | // - handle [i/o] : Instance that should be initialized. 39 | // 40 | // returns : 0 - (OK), 41 | // -1 - (null pointer or Default mode could not be set). 42 | int WebRtcVad_Init(VadInst* handle); 43 | 44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 45 | // restrictive in reporting speech. Put in other words the probability of being 46 | // speech when the VAD returns 1 is increased with increasing mode. As a 47 | // consequence also the missed detection rate goes up. 48 | // 49 | // - handle [i/o] : VAD instance. 50 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 51 | // 52 | // returns : 0 - (OK), 53 | // -1 - (null pointer, mode could not be set or the VAD instance 54 | // has not been initialized). 55 | int WebRtcVad_set_mode(VadInst* handle, int mode); 56 | 57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates 58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 59 | // 60 | // - handle [i/o] : VAD Instance. Needs to be initialized by 61 | // WebRtcVad_Init() before call. 62 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 63 | // - audio_frame [i] : Audio frame buffer. 64 | // - frame_length [i] : Length of audio frame buffer in number of samples. 65 | // 66 | // returns : 1 - (Active Voice), 67 | // 0 - (Non-active Voice), 68 | // -1 - (Error) 69 | int WebRtcVad_Process(VadInst* handle, 70 | int fs, 71 | const int16_t* audio_frame, 72 | size_t frame_length); 73 | 74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, 75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 76 | // 77 | // - rate [i] : Sampling frequency (Hz). 78 | // - frame_length [i] : Speech frame buffer length in number of samples. 79 | // 80 | // returns : 0 - (valid combination), -1 - (invalid combination) 81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); 82 | 83 | #ifdef __cplusplus 84 | } 85 | #endif 86 | 87 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 88 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This header file includes the descriptions of the core VAD calls. 13 | */ 14 | 15 | #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_ 16 | #define COMMON_AUDIO_VAD_VAD_CORE_H_ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | enum { kNumChannels = 6 }; // Number of frequency bands (named channels). 21 | enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. 22 | enum { kTableSize = kNumChannels * kNumGaussians }; 23 | enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. 24 | 25 | typedef struct VadInstT_ { 26 | int vad; 27 | int32_t downsampling_filter_states[4]; 28 | WebRtcSpl_State48khzTo8khz state_48_to_8; 29 | int16_t noise_means[kTableSize]; 30 | int16_t speech_means[kTableSize]; 31 | int16_t noise_stds[kTableSize]; 32 | int16_t speech_stds[kTableSize]; 33 | // TODO(bjornv): Change to |frame_count|. 34 | int32_t frame_counter; 35 | int16_t over_hang; // Over Hang 36 | int16_t num_of_speech; 37 | // TODO(bjornv): Change to |age_vector|. 38 | int16_t index_vector[16 * kNumChannels]; 39 | int16_t low_value_vector[16 * kNumChannels]; 40 | // TODO(bjornv): Change to |median|. 41 | int16_t mean_value[kNumChannels]; 42 | int16_t upper_state[5]; 43 | int16_t lower_state[5]; 44 | int16_t hp_filter_state[4]; 45 | int16_t over_hang_max_1[3]; 46 | int16_t over_hang_max_2[3]; 47 | int16_t individual[3]; 48 | int16_t total[3]; 49 | 50 | int init_flag; 51 | } VadInstT; 52 | 53 | // Initializes the core VAD component. The default aggressiveness mode is 54 | // controlled by |kDefaultMode| in vad_core.c. 55 | // 56 | // - self [i/o] : Instance that should be initialized 57 | // 58 | // returns : 0 (OK), -1 (null pointer in or if the default mode can't be 59 | // set) 60 | int WebRtcVad_InitCore(VadInstT* self); 61 | 62 | /**************************************************************************** 63 | * WebRtcVad_set_mode_core(...) 64 | * 65 | * This function changes the VAD settings 66 | * 67 | * Input: 68 | * - inst : VAD instance 69 | * - mode : Aggressiveness degree 70 | * 0 (High quality) - 3 (Highly aggressive) 71 | * 72 | * Output: 73 | * - inst : Changed instance 74 | * 75 | * Return value : 0 - Ok 76 | * -1 - Error 77 | */ 78 | 79 | int WebRtcVad_set_mode_core(VadInstT* self, int mode); 80 | 81 | /**************************************************************************** 82 | * WebRtcVad_CalcVad48khz(...) 83 | * WebRtcVad_CalcVad32khz(...) 84 | * WebRtcVad_CalcVad16khz(...) 85 | * WebRtcVad_CalcVad8khz(...) 86 | * 87 | * Calculate probability for active speech and make VAD decision. 88 | * 89 | * Input: 90 | * - inst : Instance that should be initialized 91 | * - speech_frame : Input speech frame 92 | * - frame_length : Number of input samples 93 | * 94 | * Output: 95 | * - inst : Updated filter states etc. 96 | * 97 | * Return value : VAD decision 98 | * 0 - No active speech 99 | * 1-6 - Active speech 100 | */ 101 | int WebRtcVad_CalcVad48khz(VadInstT* inst, 102 | const int16_t* speech_frame, 103 | size_t frame_length); 104 | int WebRtcVad_CalcVad32khz(VadInstT* inst, 105 | const int16_t* speech_frame, 106 | size_t frame_length); 107 | int WebRtcVad_CalcVad16khz(VadInstT* inst, 108 | const int16_t* speech_frame, 109 | size_t frame_length); 110 | int WebRtcVad_CalcVad8khz(VadInstT* inst, 111 | const int16_t* speech_frame, 112 | size_t frame_length); 113 | 114 | #endif // COMMON_AUDIO_VAD_VAD_CORE_H_ 115 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_filterbank.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file includes feature calculating functionality used in vad_core.c. 13 | */ 14 | 15 | #ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 16 | #define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 17 | 18 | #include "webrtc/common_audio/vad/vad_core.h" 19 | 20 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the 21 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD: 22 | // 80 Hz - 250 Hz 23 | // 250 Hz - 500 Hz 24 | // 500 Hz - 1000 Hz 25 | // 1000 Hz - 2000 Hz 26 | // 2000 Hz - 3000 Hz 27 | // 3000 Hz - 4000 Hz 28 | // 29 | // The values are given in Q4 and written to |features|. Further, an approximate 30 | // overall energy is returned. The return value is used in 31 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above 32 | // the threshold |kMinEnergy|. 33 | // 34 | // - self [i/o] : State information of the VAD. 35 | // - data_in [i] : Input audio data, for feature extraction. 36 | // - data_length [i] : Audio data size, in number of samples. 37 | // - features [o] : 10 * log10(energy in each frequency band), Q4. 38 | // - returns : Total energy of the signal (NOTE! This value is not 39 | // exact. It is only used in a comparison.) 40 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self, 41 | const int16_t* data_in, 42 | size_t data_length, 43 | int16_t* features); 44 | 45 | #endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 46 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_gmm.h" 12 | 13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 14 | 15 | static const int32_t kCompVar = 22005; 16 | static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. 17 | 18 | // For a normal distribution, the probability of |input| is calculated and 19 | // returned (in Q20). The formula for normal distributed probability is 20 | // 21 | // 1 / s * exp(-(x - m)^2 / (2 * s^2)) 22 | // 23 | // where the parameters are given in the following Q domains: 24 | // m = |mean| (Q7) 25 | // s = |std| (Q7) 26 | // x = |input| (Q4) 27 | // in addition to the probability we output |delta| (in Q11) used when updating 28 | // the noise/speech model. 29 | int32_t WebRtcVad_GaussianProbability(int16_t input, 30 | int16_t mean, 31 | int16_t std, 32 | int16_t* delta) { 33 | int16_t tmp16, inv_std, inv_std2, exp_value = 0; 34 | int32_t tmp32; 35 | 36 | // Calculate |inv_std| = 1 / s, in Q10. 37 | // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. 38 | // Q-domain: Q17 / Q7 = Q10. 39 | tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); 40 | inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); 41 | 42 | // Calculate |inv_std2| = 1 / s^2, in Q14. 43 | tmp16 = (inv_std >> 2); // Q10 -> Q8. 44 | // Q-domain: (Q8 * Q8) >> 2 = Q14. 45 | inv_std2 = (int16_t)((tmp16 * tmp16) >> 2); 46 | // TODO(bjornv): Investigate if changing to 47 | // inv_std2 = (int16_t)((inv_std * inv_std) >> 6); 48 | // gives better accuracy. 49 | 50 | tmp16 = (input << 3); // Q4 -> Q7 51 | tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 52 | 53 | // To be used later, when updating noise/speech model. 54 | // |delta| = (x - m) / s^2, in Q11. 55 | // Q-domain: (Q14 * Q7) >> 10 = Q11. 56 | *delta = (int16_t)((inv_std2 * tmp16) >> 10); 57 | 58 | // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing 59 | // division by two with one shift. 60 | // Q-domain: (Q11 * Q7) >> 8 = Q10. 61 | tmp32 = (*delta * tmp16) >> 9; 62 | 63 | // If the exponent is small enough to give a non-zero probability we calculate 64 | // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) 65 | // ~= exp2(-log2(exp(1)) * |tmp32|). 66 | if (tmp32 < kCompVar) { 67 | // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. 68 | // Q-domain: (Q12 * Q10) >> 12 = Q10. 69 | tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12); 70 | tmp16 = -tmp16; 71 | exp_value = (0x0400 | (tmp16 & 0x03FF)); 72 | tmp16 ^= 0xFFFF; 73 | tmp16 >>= 10; 74 | tmp16 += 1; 75 | // Get |exp_value| = exp(-|tmp32|) in Q10. 76 | exp_value >>= tmp16; 77 | } 78 | 79 | // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. 80 | // Q-domain: Q10 * Q10 = Q20. 81 | return inv_std * exp_value; 82 | } 83 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Gaussian probability calculations internally used in vad_core.c. 12 | 13 | #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_ 14 | #define COMMON_AUDIO_VAD_VAD_GMM_H_ 15 | 16 | #include 17 | 18 | // Calculates the probability for |input|, given that |input| comes from a 19 | // normal distribution with mean and standard deviation (|mean|, |std|). 20 | // 21 | // Inputs: 22 | // - input : input sample in Q4. 23 | // - mean : mean input in the statistical model, Q7. 24 | // - std : standard deviation, Q7. 25 | // 26 | // Output: 27 | // 28 | // - delta : input used when updating the model, Q11. 29 | // |delta| = (|input| - |mean|) / |std|^2. 30 | // 31 | // Return: 32 | // (probability for |input|) = 33 | // 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); 34 | int32_t WebRtcVad_GaussianProbability(int16_t input, 35 | int16_t mean, 36 | int16_t std, 37 | int16_t* delta); 38 | 39 | #endif // COMMON_AUDIO_VAD_VAD_GMM_H_ 40 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_sp.h" 12 | 13 | #include "webrtc/rtc_base/checks.h" 14 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 15 | #include "webrtc/common_audio/vad/vad_core.h" 16 | 17 | // Allpass filter coefficients, upper and lower, in Q13. 18 | // Upper: 0.64, Lower: 0.17. 19 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. 20 | static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. 21 | static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. 22 | 23 | // TODO(bjornv): Move this function to vad_filterbank.c. 24 | // Downsampling filter based on splitting filter and allpass functions. 25 | void WebRtcVad_Downsampling(const int16_t* signal_in, 26 | int16_t* signal_out, 27 | int32_t* filter_state, 28 | size_t in_length) { 29 | int16_t tmp16_1 = 0, tmp16_2 = 0; 30 | int32_t tmp32_1 = filter_state[0]; 31 | int32_t tmp32_2 = filter_state[1]; 32 | size_t n = 0; 33 | // Downsampling by 2 gives half length. 34 | size_t half_length = (in_length >> 1); 35 | 36 | // Filter coefficients in Q13, filter state in Q0. 37 | for (n = 0; n < half_length; n++) { 38 | // All-pass filtering upper branch. 39 | tmp16_1 = (int16_t) ((tmp32_1 >> 1) + 40 | ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); 41 | *signal_out = tmp16_1; 42 | tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); 43 | 44 | // All-pass filtering lower branch. 45 | tmp16_2 = (int16_t) ((tmp32_2 >> 1) + 46 | ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); 47 | *signal_out++ += tmp16_2; 48 | tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); 49 | } 50 | // Store the filter states. 51 | filter_state[0] = tmp32_1; 52 | filter_state[1] = tmp32_2; 53 | } 54 | 55 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16 56 | // smallest values the last 100 frames. Then calculates and returns the median 57 | // of the five smallest values. 58 | int16_t WebRtcVad_FindMinimum(VadInstT* self, 59 | int16_t feature_value, 60 | int channel) { 61 | int i = 0, j = 0; 62 | int position = -1; 63 | // Offset to beginning of the 16 minimum values in memory. 64 | const int offset = (channel << 4); 65 | int16_t current_median = 1600; 66 | int16_t alpha = 0; 67 | int32_t tmp32 = 0; 68 | // Pointer to memory for the 16 minimum values and the age of each value of 69 | // the |channel|. 70 | int16_t* age = &self->index_vector[offset]; 71 | int16_t* smallest_values = &self->low_value_vector[offset]; 72 | 73 | RTC_DCHECK_LT(channel, kNumChannels); 74 | 75 | // Each value in |smallest_values| is getting 1 loop older. Update |age|, and 76 | // remove old values. 77 | for (i = 0; i < 16; i++) { 78 | if (age[i] != 100) { 79 | age[i]++; 80 | } else { 81 | // Too old value. Remove from memory and shift larger values downwards. 82 | for (j = i; j < 15; j++) { 83 | smallest_values[j] = smallest_values[j + 1]; 84 | age[j] = age[j + 1]; 85 | } 86 | age[15] = 101; 87 | smallest_values[15] = 10000; 88 | } 89 | } 90 | 91 | // Check if |feature_value| is smaller than any of the values in 92 | // |smallest_values|. If so, find the |position| where to insert the new value 93 | // (|feature_value|). 94 | if (feature_value < smallest_values[7]) { 95 | if (feature_value < smallest_values[3]) { 96 | if (feature_value < smallest_values[1]) { 97 | if (feature_value < smallest_values[0]) { 98 | position = 0; 99 | } else { 100 | position = 1; 101 | } 102 | } else if (feature_value < smallest_values[2]) { 103 | position = 2; 104 | } else { 105 | position = 3; 106 | } 107 | } else if (feature_value < smallest_values[5]) { 108 | if (feature_value < smallest_values[4]) { 109 | position = 4; 110 | } else { 111 | position = 5; 112 | } 113 | } else if (feature_value < smallest_values[6]) { 114 | position = 6; 115 | } else { 116 | position = 7; 117 | } 118 | } else if (feature_value < smallest_values[15]) { 119 | if (feature_value < smallest_values[11]) { 120 | if (feature_value < smallest_values[9]) { 121 | if (feature_value < smallest_values[8]) { 122 | position = 8; 123 | } else { 124 | position = 9; 125 | } 126 | } else if (feature_value < smallest_values[10]) { 127 | position = 10; 128 | } else { 129 | position = 11; 130 | } 131 | } else if (feature_value < smallest_values[13]) { 132 | if (feature_value < smallest_values[12]) { 133 | position = 12; 134 | } else { 135 | position = 13; 136 | } 137 | } else if (feature_value < smallest_values[14]) { 138 | position = 14; 139 | } else { 140 | position = 15; 141 | } 142 | } 143 | 144 | // If we have detected a new small value, insert it at the correct position 145 | // and shift larger values up. 146 | if (position > -1) { 147 | for (i = 15; i > position; i--) { 148 | smallest_values[i] = smallest_values[i - 1]; 149 | age[i] = age[i - 1]; 150 | } 151 | smallest_values[position] = feature_value; 152 | age[position] = 1; 153 | } 154 | 155 | // Get |current_median|. 156 | if (self->frame_counter > 2) { 157 | current_median = smallest_values[2]; 158 | } else if (self->frame_counter > 0) { 159 | current_median = smallest_values[0]; 160 | } 161 | 162 | // Smooth the median value. 163 | if (self->frame_counter > 0) { 164 | if (current_median < self->mean_value[channel]) { 165 | alpha = kSmoothingDown; // 0.2 in Q15. 166 | } else { 167 | alpha = kSmoothingUp; // 0.99 in Q15. 168 | } 169 | } 170 | tmp32 = (alpha + 1) * self->mean_value[channel]; 171 | tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; 172 | tmp32 += 16384; 173 | self->mean_value[channel] = (int16_t) (tmp32 >> 15); 174 | 175 | return self->mean_value[channel]; 176 | } 177 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // This file includes specific signal processing tools used in vad_core.c. 12 | 13 | #ifndef COMMON_AUDIO_VAD_VAD_SP_H_ 14 | #define COMMON_AUDIO_VAD_VAD_SP_H_ 15 | 16 | #include "webrtc/common_audio/vad/vad_core.h" 17 | 18 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8. 19 | // 20 | // Inputs: 21 | // - signal_in : Input signal. 22 | // - in_length : Length of input signal in samples. 23 | // 24 | // Input & Output: 25 | // - filter_state : Current filter states of the two all-pass filters. The 26 | // |filter_state| is updated after all samples have been 27 | // processed. 28 | // 29 | // Output: 30 | // - signal_out : Downsampled signal (of length |in_length| / 2). 31 | void WebRtcVad_Downsampling(const int16_t* signal_in, 32 | int16_t* signal_out, 33 | int32_t* filter_state, 34 | size_t in_length); 35 | 36 | // Updates and returns the smoothed feature minimum. As minimum we use the 37 | // median of the five smallest feature values in a 100 frames long window. 38 | // As long as |handle->frame_counter| is zero, that is, we haven't received any 39 | // "valid" data, FindMinimum() outputs the default value of 1600. 40 | // 41 | // Inputs: 42 | // - feature_value : New feature value to update with. 43 | // - channel : Channel number. 44 | // 45 | // Input & Output: 46 | // - handle : State information of the VAD. 47 | // 48 | // Returns: 49 | // : Smoothed minimum value for a moving window. 50 | int16_t WebRtcVad_FindMinimum(VadInstT* handle, 51 | int16_t feature_value, 52 | int channel); 53 | 54 | #endif // COMMON_AUDIO_VAD_VAD_SP_H_ 55 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/common_audio/vad/webrtc_vad.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/include/webrtc_vad.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 17 | #include "webrtc/common_audio/vad/vad_core.h" 18 | 19 | static const int kInitCheck = 42; 20 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; 21 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); 22 | static const int kMaxFrameLengthMs = 30; 23 | 24 | VadInst* WebRtcVad_Create() { 25 | VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT)); 26 | 27 | WebRtcSpl_Init(); 28 | self->init_flag = 0; 29 | 30 | return (VadInst*)self; 31 | } 32 | 33 | void WebRtcVad_Free(VadInst* handle) { 34 | free(handle); 35 | } 36 | 37 | // TODO(bjornv): Move WebRtcVad_InitCore() code here. 38 | int WebRtcVad_Init(VadInst* handle) { 39 | // Initialize the core VAD component. 40 | return WebRtcVad_InitCore((VadInstT*) handle); 41 | } 42 | 43 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here. 44 | int WebRtcVad_set_mode(VadInst* handle, int mode) { 45 | VadInstT* self = (VadInstT*) handle; 46 | 47 | if (handle == NULL) { 48 | return -1; 49 | } 50 | if (self->init_flag != kInitCheck) { 51 | return -1; 52 | } 53 | 54 | return WebRtcVad_set_mode_core(self, mode); 55 | } 56 | 57 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 58 | size_t frame_length) { 59 | int vad = -1; 60 | VadInstT* self = (VadInstT*) handle; 61 | 62 | if (handle == NULL) { 63 | return -1; 64 | } 65 | 66 | if (self->init_flag != kInitCheck) { 67 | return -1; 68 | } 69 | if (audio_frame == NULL) { 70 | return -1; 71 | } 72 | if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { 73 | return -1; 74 | } 75 | 76 | if (fs == 48000) { 77 | vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); 78 | } else if (fs == 32000) { 79 | vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); 80 | } else if (fs == 16000) { 81 | vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); 82 | } else if (fs == 8000) { 83 | vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); 84 | } 85 | 86 | if (vad > 0) { 87 | vad = 1; 88 | } 89 | return vad; 90 | } 91 | 92 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { 93 | int return_value = -1; 94 | size_t i; 95 | int valid_length_ms; 96 | size_t valid_length; 97 | 98 | // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and 99 | // see if we have a matching pair. 100 | for (i = 0; i < kRatesSize; i++) { 101 | if (kValidRates[i] == rate) { 102 | for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; 103 | valid_length_ms += 10) { 104 | valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms); 105 | if (frame_length == valid_length) { 106 | return_value = 0; 107 | break; 108 | } 109 | } 110 | break; 111 | } 112 | } 113 | 114 | return return_value; 115 | } 116 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/checks.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2006 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Most of this was borrowed (with minor modifications) from V8's and Chromium's 12 | // src/base/logging.cc. 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #if defined(WEBRTC_ANDROID) 19 | #define RTC_LOG_TAG_ANDROID "rtc" 20 | #include // NOLINT 21 | #endif 22 | 23 | #if defined(WEBRTC_WIN) 24 | #include 25 | #endif 26 | 27 | #if defined(WEBRTC_WIN) 28 | #define LAST_SYSTEM_ERROR (::GetLastError()) 29 | #elif defined(__native_client__) && __native_client__ 30 | #define LAST_SYSTEM_ERROR (0) 31 | #elif defined(WEBRTC_POSIX) 32 | #include 33 | #define LAST_SYSTEM_ERROR (errno) 34 | #endif // WEBRTC_WIN 35 | 36 | #include "webrtc/rtc_base/checks.h" 37 | 38 | namespace { 39 | #if defined(__GNUC__) 40 | __attribute__((__format__(__printf__, 2, 3))) 41 | #endif 42 | void AppendFormat(std::string* s, const char* fmt, ...) { 43 | va_list args, copy; 44 | va_start(args, fmt); 45 | va_copy(copy, args); 46 | const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy); 47 | va_end(copy); 48 | 49 | if (predicted_length > 0) { 50 | const size_t size = s->size(); 51 | s->resize(size + predicted_length); 52 | // Pass "+ 1" to vsnprintf to include space for the '\0'. 53 | std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args); 54 | } 55 | va_end(args); 56 | } 57 | } 58 | 59 | namespace rtc { 60 | namespace webrtc_checks_impl { 61 | 62 | // Reads one argument from args, appends it to s and advances fmt. 63 | // Returns true iff an argument was sucessfully parsed. 64 | bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) { 65 | if (**fmt == CheckArgType::kEnd) 66 | return false; 67 | 68 | switch (**fmt) { 69 | case CheckArgType::kInt: 70 | AppendFormat(s, "%d", va_arg(*args, int)); 71 | break; 72 | case CheckArgType::kLong: 73 | AppendFormat(s, "%ld", va_arg(*args, long)); 74 | break; 75 | case CheckArgType::kLongLong: 76 | AppendFormat(s, "%lld", va_arg(*args, long long)); 77 | break; 78 | case CheckArgType::kUInt: 79 | AppendFormat(s, "%u", va_arg(*args, unsigned)); 80 | break; 81 | case CheckArgType::kULong: 82 | AppendFormat(s, "%lu", va_arg(*args, unsigned long)); 83 | break; 84 | case CheckArgType::kULongLong: 85 | AppendFormat(s, "%llu", va_arg(*args, unsigned long long)); 86 | break; 87 | case CheckArgType::kDouble: 88 | AppendFormat(s, "%g", va_arg(*args, double)); 89 | break; 90 | case CheckArgType::kLongDouble: 91 | AppendFormat(s, "%Lg", va_arg(*args, long double)); 92 | break; 93 | case CheckArgType::kCharP: 94 | s->append(va_arg(*args, const char*)); 95 | break; 96 | case CheckArgType::kStdString: 97 | s->append(*va_arg(*args, const std::string*)); 98 | break; 99 | case CheckArgType::kVoidP: 100 | AppendFormat(s, "%p", va_arg(*args, const void*)); 101 | break; 102 | default: 103 | s->append("[Invalid CheckArgType]"); 104 | return false; 105 | } 106 | (*fmt)++; 107 | return true; 108 | } 109 | 110 | RTC_NORETURN void FatalLog(const char* file, 111 | int line, 112 | const char* message, 113 | const CheckArgType* fmt, 114 | ...) { 115 | va_list args; 116 | va_start(args, fmt); 117 | 118 | std::string s; 119 | AppendFormat(&s, 120 | "\n\n" 121 | "#\n" 122 | "# Fatal error in: %s, line %d\n" 123 | "# last system error: %u\n" 124 | "# Check failed: %s", 125 | file, line, LAST_SYSTEM_ERROR, message); 126 | 127 | if (*fmt == CheckArgType::kCheckOp) { 128 | // This log message was generated by RTC_CHECK_OP, so we have to complete 129 | // the error message using the operands that have been passed as the first 130 | // two arguments. 131 | fmt++; 132 | 133 | std::string s1, s2; 134 | if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2)) 135 | AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str()); 136 | } else { 137 | s.append("\n# "); 138 | } 139 | 140 | // Append all the user-supplied arguments to the message. 141 | while (ParseArg(&args, &fmt, &s)) 142 | ; 143 | 144 | va_end(args); 145 | 146 | const char* output = s.c_str(); 147 | 148 | #if defined(WEBRTC_ANDROID) 149 | __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output); 150 | #endif 151 | 152 | fflush(stdout); 153 | fprintf(stderr, "%s", output); 154 | fflush(stderr); 155 | abort(); 156 | } 157 | 158 | } // namespace webrtc_checks_impl 159 | } // namespace rtc 160 | 161 | // Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros. 162 | RTC_NORETURN void rtc_FatalMessage(const char* file, int line, 163 | const char* msg) { 164 | static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = { 165 | rtc::webrtc_checks_impl::CheckArgType::kEnd}; 166 | FatalLog(file, line, msg, t); 167 | } 168 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/compile_assert_c.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_COMPILE_ASSERT_C_H_ 12 | #define RTC_BASE_COMPILE_ASSERT_C_H_ 13 | 14 | // Use this macro to verify at compile time that certain restrictions are met. 15 | // The argument is the boolean expression to evaluate. 16 | // Example: 17 | // RTC_COMPILE_ASSERT(sizeof(foo) < 128); 18 | // Note: In C++, use static_assert instead! 19 | #define RTC_COMPILE_ASSERT(expression) \ 20 | switch (0) { \ 21 | case 0: \ 22 | case expression:; \ 23 | } 24 | 25 | #endif // RTC_BASE_COMPILE_ASSERT_C_H_ 26 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/sanitizer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_SANITIZER_H_ 12 | #define RTC_BASE_SANITIZER_H_ 13 | 14 | #include // For size_t. 15 | 16 | #ifdef __cplusplus 17 | #include 18 | #endif 19 | 20 | #if defined(__has_feature) 21 | #if __has_feature(address_sanitizer) 22 | #define RTC_HAS_ASAN 1 23 | #endif 24 | #if __has_feature(memory_sanitizer) 25 | #define RTC_HAS_MSAN 1 26 | #endif 27 | #endif 28 | #ifndef RTC_HAS_ASAN 29 | #define RTC_HAS_ASAN 0 30 | #endif 31 | #ifndef RTC_HAS_MSAN 32 | #define RTC_HAS_MSAN 0 33 | #endif 34 | 35 | #if RTC_HAS_ASAN 36 | #include 37 | #endif 38 | #if RTC_HAS_MSAN 39 | #include 40 | #endif 41 | 42 | #ifdef __has_attribute 43 | #if __has_attribute(no_sanitize) 44 | #define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what))) 45 | #endif 46 | #endif 47 | #ifndef RTC_NO_SANITIZE 48 | #define RTC_NO_SANITIZE(what) 49 | #endif 50 | 51 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements) 52 | // as being unaddressable, so that reads and writes are not allowed. ASan may 53 | // narrow the range to the nearest alignment boundaries. 54 | static inline void rtc_AsanPoison(const volatile void* ptr, 55 | size_t element_size, 56 | size_t num_elements) { 57 | #if RTC_HAS_ASAN 58 | ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements); 59 | #endif 60 | } 61 | 62 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements) 63 | // as being addressable, so that reads and writes are allowed. ASan may widen 64 | // the range to the nearest alignment boundaries. 65 | static inline void rtc_AsanUnpoison(const volatile void* ptr, 66 | size_t element_size, 67 | size_t num_elements) { 68 | #if RTC_HAS_ASAN 69 | ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements); 70 | #endif 71 | } 72 | 73 | // Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements) 74 | // as being uninitialized. 75 | static inline void rtc_MsanMarkUninitialized(const volatile void* ptr, 76 | size_t element_size, 77 | size_t num_elements) { 78 | #if RTC_HAS_MSAN 79 | __msan_poison(ptr, element_size * num_elements); 80 | #endif 81 | } 82 | 83 | // Force an MSan check (if any bits in the memory range [ptr, ptr + 84 | // element_size * num_elements) are uninitialized the call will crash with an 85 | // MSan report). 86 | static inline void rtc_MsanCheckInitialized(const volatile void* ptr, 87 | size_t element_size, 88 | size_t num_elements) { 89 | #if RTC_HAS_MSAN 90 | __msan_check_mem_is_initialized(ptr, element_size * num_elements); 91 | #endif 92 | } 93 | 94 | #ifdef __cplusplus 95 | 96 | namespace rtc { 97 | namespace sanitizer_impl { 98 | 99 | template 100 | constexpr bool IsTriviallyCopyable() { 101 | return static_cast(std::is_trivially_copy_constructible::value && 102 | (std::is_trivially_copy_assignable::value || 103 | !std::is_copy_assignable::value) && 104 | std::is_trivially_destructible::value); 105 | } 106 | 107 | } // namespace sanitizer_impl 108 | 109 | template 110 | inline void AsanPoison(const T& mem) { 111 | rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size()); 112 | } 113 | 114 | template 115 | inline void AsanUnpoison(const T& mem) { 116 | rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size()); 117 | } 118 | 119 | template 120 | inline void MsanMarkUninitialized(const T& mem) { 121 | rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size()); 122 | } 123 | 124 | template 125 | inline T MsanUninitialized(T t) { 126 | #if RTC_HAS_MSAN 127 | // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it 128 | // becomes available in downstream projects. 129 | static_assert(sanitizer_impl::IsTriviallyCopyable(), ""); 130 | #endif 131 | rtc_MsanMarkUninitialized(&t, sizeof(T), 1); 132 | return t; 133 | } 134 | 135 | template 136 | inline void MsanCheckInitialized(const T& mem) { 137 | rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size()); 138 | } 139 | 140 | } // namespace rtc 141 | 142 | #endif // __cplusplus 143 | 144 | #endif // RTC_BASE_SANITIZER_H_ 145 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/system/arch.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // This file contains platform-specific typedefs and defines. 12 | // Much of it is derived from Chromium's build/build_config.h. 13 | 14 | #ifndef RTC_BASE_SYSTEM_ARCH_H_ 15 | #define RTC_BASE_SYSTEM_ARCH_H_ 16 | 17 | // Processor architecture detection. For more info on what's defined, see: 18 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx 19 | // http://www.agner.org/optimize/calling_conventions.pdf 20 | // or with gcc, run: "echo | gcc -E -dM -" 21 | #if defined(_M_X64) || defined(__x86_64__) 22 | #define WEBRTC_ARCH_X86_FAMILY 23 | #define WEBRTC_ARCH_X86_64 24 | #define WEBRTC_ARCH_64_BITS 25 | #define WEBRTC_ARCH_LITTLE_ENDIAN 26 | #elif defined(__aarch64__) 27 | #define WEBRTC_ARCH_ARM_FAMILY 28 | #define WEBRTC_ARCH_64_BITS 29 | #define WEBRTC_ARCH_LITTLE_ENDIAN 30 | #elif defined(_M_IX86) || defined(__i386__) 31 | #define WEBRTC_ARCH_X86_FAMILY 32 | #define WEBRTC_ARCH_X86 33 | #define WEBRTC_ARCH_32_BITS 34 | #define WEBRTC_ARCH_LITTLE_ENDIAN 35 | #elif defined(__ARMEL__) 36 | #define WEBRTC_ARCH_ARM_FAMILY 37 | #define WEBRTC_ARCH_32_BITS 38 | #define WEBRTC_ARCH_LITTLE_ENDIAN 39 | #elif defined(__MIPSEL__) 40 | #define WEBRTC_ARCH_MIPS_FAMILY 41 | #if defined(__LP64__) 42 | #define WEBRTC_ARCH_64_BITS 43 | #else 44 | #define WEBRTC_ARCH_32_BITS 45 | #endif 46 | #define WEBRTC_ARCH_LITTLE_ENDIAN 47 | #elif defined(__pnacl__) 48 | #define WEBRTC_ARCH_32_BITS 49 | #define WEBRTC_ARCH_LITTLE_ENDIAN 50 | #else 51 | #error Please add support for your architecture in typedefs.h 52 | #endif 53 | 54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) 55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN 56 | #endif 57 | 58 | #endif // RTC_BASE_SYSTEM_ARCH_H_ 59 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/system/inline.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_SYSTEM_INLINE_H_ 12 | #define RTC_BASE_SYSTEM_INLINE_H_ 13 | 14 | #if defined(_MSC_VER) 15 | 16 | #define RTC_FORCE_INLINE __forceinline 17 | #define RTC_NO_INLINE __declspec(noinline) 18 | 19 | #elif defined(__GNUC__) 20 | 21 | #define RTC_FORCE_INLINE __attribute__((__always_inline__)) 22 | #define RTC_NO_INLINE __attribute__((__noinline__)) 23 | 24 | #else 25 | 26 | #define RTC_FORCE_INLINE 27 | #define RTC_NO_INLINE 28 | 29 | #endif 30 | 31 | #endif // RTC_BASE_SYSTEM_INLINE_H_ 32 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/rtc_base/type_traits.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_TYPE_TRAITS_H_ 12 | #define RTC_BASE_TYPE_TRAITS_H_ 13 | 14 | #include 15 | #include 16 | 17 | namespace rtc { 18 | 19 | // Determines if the given class has zero-argument .data() and .size() methods 20 | // whose return values are convertible to T* and size_t, respectively. 21 | template 22 | class HasDataAndSize { 23 | private: 24 | template < 25 | typename C, 26 | typename std::enable_if< 27 | std::is_convertible().data()), T*>::value && 28 | std::is_convertible().size()), 29 | std::size_t>::value>::type* = nullptr> 30 | static int Test(int); 31 | 32 | template 33 | static char Test(...); 34 | 35 | public: 36 | static constexpr bool value = std::is_same(0)), int>::value; 37 | }; 38 | 39 | namespace test_has_data_and_size { 40 | 41 | template 42 | struct Test1 { 43 | DR data(); 44 | SR size(); 45 | }; 46 | static_assert(HasDataAndSize, int>::value, ""); 47 | static_assert(HasDataAndSize, const int>::value, ""); 48 | static_assert(HasDataAndSize, const int>::value, ""); 49 | static_assert(!HasDataAndSize, int>::value, 50 | "implicit cast of const int* to int*"); 51 | static_assert(!HasDataAndSize, int>::value, 52 | "implicit cast of char* to int*"); 53 | 54 | struct Test2 { 55 | int* data; 56 | size_t size; 57 | }; 58 | static_assert(!HasDataAndSize::value, 59 | ".data and .size aren't functions"); 60 | 61 | struct Test3 { 62 | int* data(); 63 | }; 64 | static_assert(!HasDataAndSize::value, ".size() is missing"); 65 | 66 | class Test4 { 67 | int* data(); 68 | size_t size(); 69 | }; 70 | static_assert(!HasDataAndSize::value, 71 | ".data() and .size() are private"); 72 | 73 | } // namespace test_has_data_and_size 74 | 75 | namespace type_traits_impl { 76 | 77 | // Determines if the given type is an enum that converts implicitly to 78 | // an integral type. 79 | template 80 | struct IsIntEnum { 81 | private: 82 | // This overload is used if the type is an enum, and unary plus 83 | // compiles and turns it into an integral type. 84 | template ::value && 87 | std::is_integral())>::value>::type* = 88 | nullptr> 89 | static int Test(int); 90 | 91 | // Otherwise, this overload is used. 92 | template 93 | static char Test(...); 94 | 95 | public: 96 | static constexpr bool value = 97 | std::is_same::type>(0)), 98 | int>::value; 99 | }; 100 | 101 | } // namespace type_traits_impl 102 | 103 | // Determines if the given type is integral, or an enum that 104 | // converts implicitly to an integral type. 105 | template 106 | struct IsIntlike { 107 | private: 108 | using X = typename std::remove_reference::type; 109 | 110 | public: 111 | static constexpr bool value = 112 | std::is_integral::value || type_traits_impl::IsIntEnum::value; 113 | }; 114 | 115 | namespace test_enum_intlike { 116 | 117 | enum E1 { e1 }; 118 | enum { e2 }; 119 | enum class E3 { e3 }; 120 | struct S {}; 121 | 122 | static_assert(type_traits_impl::IsIntEnum::value, ""); 123 | static_assert(type_traits_impl::IsIntEnum::value, ""); 124 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 125 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 126 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 127 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 128 | 129 | static_assert(IsIntlike::value, ""); 130 | static_assert(IsIntlike::value, ""); 131 | static_assert(!IsIntlike::value, ""); 132 | static_assert(IsIntlike::value, ""); 133 | static_assert(!IsIntlike::value, ""); 134 | static_assert(!IsIntlike::value, ""); 135 | 136 | } // namespace test_enum_intlike 137 | 138 | } // namespace rtc 139 | 140 | #endif // RTC_BASE_TYPE_TRAITS_H_ 141 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/system_wrappers/include/cpu_features_wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ 12 | #define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ 13 | 14 | #include 15 | 16 | #if defined(__cplusplus) || defined(c_plusplus) 17 | extern "C" { 18 | #endif 19 | 20 | // List of features in x86. 21 | typedef enum { kSSE2, kSSE3 } CPUFeature; 22 | 23 | // List of features in ARM. 24 | enum { 25 | kCPUFeatureARMv7 = (1 << 0), 26 | kCPUFeatureVFPv3 = (1 << 1), 27 | kCPUFeatureNEON = (1 << 2), 28 | kCPUFeatureLDREXSTREX = (1 << 3) 29 | }; 30 | 31 | typedef int (*WebRtc_CPUInfo)(CPUFeature feature); 32 | 33 | // Returns true if the CPU supports the feature. 34 | extern WebRtc_CPUInfo WebRtc_GetCPUInfo; 35 | 36 | // No CPU feature is available => straight C path. 37 | extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM; 38 | 39 | // Return the features in an ARM device. 40 | // It detects the features in the hardware platform, and returns supported 41 | // values in the above enum definition as a bitmask. 42 | extern uint64_t WebRtc_GetCPUFeaturesARM(void); 43 | 44 | #if defined(__cplusplus) || defined(c_plusplus) 45 | } // extern "C" 46 | #endif 47 | 48 | #endif // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ 49 | -------------------------------------------------------------------------------- /cpp_onnx/third_party/webrtc/typedefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // This file contains platform-specific typedefs and defines. 12 | // Much of it is derived from Chromium's build/build_config.h. 13 | 14 | #ifndef WEBRTC_TYPEDEFS_H_ 15 | #define WEBRTC_TYPEDEFS_H_ 16 | 17 | // Processor architecture detection. For more info on what's defined, see: 18 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx 19 | // http://www.agner.org/optimize/calling_conventions.pdf 20 | // or with gcc, run: "echo | gcc -E -dM -" 21 | #if defined(_M_X64) || defined(__x86_64__) 22 | #define WEBRTC_ARCH_X86_FAMILY 23 | #define WEBRTC_ARCH_X86_64 24 | #define WEBRTC_ARCH_64_BITS 25 | #define WEBRTC_ARCH_LITTLE_ENDIAN 26 | #elif defined(__aarch64__) 27 | #define WEBRTC_ARCH_64_BITS 28 | #define WEBRTC_ARCH_LITTLE_ENDIAN 29 | #elif defined(_M_IX86) || defined(__i386__) 30 | #define WEBRTC_ARCH_X86_FAMILY 31 | #define WEBRTC_ARCH_X86 32 | #define WEBRTC_ARCH_32_BITS 33 | #define WEBRTC_ARCH_LITTLE_ENDIAN 34 | #elif defined(__ARMEL__) 35 | // TODO(ajm): We'd prefer to control platform defines here, but this is 36 | // currently provided by the Android makefiles. Commented to avoid duplicate 37 | // definition warnings. 38 | //#define WEBRTC_ARCH_ARM 39 | // TODO(ajm): Chromium uses the following two defines. Should we switch? 40 | //#define WEBRTC_ARCH_ARM_FAMILY 41 | //#define WEBRTC_ARCH_ARMEL 42 | #define WEBRTC_ARCH_32_BITS 43 | #define WEBRTC_ARCH_LITTLE_ENDIAN 44 | #elif defined(__MIPSEL__) 45 | #define WEBRTC_ARCH_32_BITS 46 | #define WEBRTC_ARCH_LITTLE_ENDIAN 47 | #elif defined(__pnacl__) 48 | #define WEBRTC_ARCH_32_BITS 49 | #define WEBRTC_ARCH_LITTLE_ENDIAN 50 | #elif defined(__PPC__) 51 | #if defined(__PPC64__) 52 | #define WEBRTC_ARCH_64_BITS 53 | #else 54 | #define WEBRTC_ARCH_32_BITS 55 | #endif 56 | #define WEBRTC_ARCH_BIG_ENDIAN 57 | #else 58 | #error Please add support for your architecture in typedefs.h 59 | #endif 60 | 61 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) 62 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN 63 | #endif 64 | 65 | #if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \ 66 | (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON)) 67 | #define WEBRTC_CPU_DETECTION 68 | #endif 69 | 70 | #if !defined(_MSC_VER) 71 | #include 72 | #else 73 | // Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h. 74 | typedef signed char int8_t; 75 | typedef signed short int16_t; 76 | typedef signed int int32_t; 77 | typedef __int64 int64_t; 78 | typedef unsigned char uint8_t; 79 | typedef unsigned short uint16_t; 80 | typedef unsigned int uint32_t; 81 | typedef unsigned __int64 uint64_t; 82 | #endif 83 | 84 | // Borrowed from Chromium's base/compiler_specific.h. 85 | // Annotate a virtual method indicating it must be overriding a virtual 86 | // method in the parent class. 87 | // Use like: 88 | // virtual void foo() OVERRIDE; 89 | #if defined(_MSC_VER) 90 | #define OVERRIDE override 91 | #elif defined(__clang__) 92 | // Clang defaults to C++03 and warns about using override. Squelch that. 93 | // Intentionally no push/pop here so all users of OVERRIDE ignore the warning 94 | // too. This is like passing -Wno-c++11-extensions, except that GCC won't die 95 | // (because it won't see this pragma). 96 | #pragma clang diagnostic ignored "-Wc++11-extensions" 97 | #define OVERRIDE override 98 | #elif defined(__GNUC__) && __cplusplus >= 201103 && \ 99 | (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 100 | // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled. 101 | #define OVERRIDE override 102 | #else 103 | #define OVERRIDE 104 | #endif 105 | 106 | // Annotate a function indicating the caller must examine the return value. 107 | // Use like: 108 | // int foo() WARN_UNUSED_RESULT; 109 | // TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and 110 | // libjingle are merged. 111 | #if !defined(WARN_UNUSED_RESULT) 112 | #if defined(__GNUC__) 113 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) 114 | #else 115 | #define WARN_UNUSED_RESULT 116 | #endif 117 | #endif // WARN_UNUSED_RESULT 118 | 119 | // Put after a variable that might not be used, to prevent compiler warnings: 120 | // int result ATTRIBUTE_UNUSED = DoSomething(); 121 | // assert(result == 17); 122 | #ifndef ATTRIBUTE_UNUSED 123 | #if defined(__GNUC__) || defined(__clang__) 124 | #define ATTRIBUTE_UNUSED __attribute__((unused)) 125 | #else 126 | #define ATTRIBUTE_UNUSED 127 | #endif 128 | #endif 129 | 130 | // Macro to be used for switch-case fallthrough (required for enabling 131 | // -Wimplicit-fallthrough warning on Clang). 132 | #ifndef FALLTHROUGH 133 | #if defined(__clang__) 134 | #define FALLTHROUGH() [[clang::fallthrough]] 135 | #else 136 | #define FALLTHROUGH() do { } while (0) 137 | #endif 138 | #endif 139 | 140 | // Annotate a function that will not return control flow to the caller. 141 | #if defined(_MSC_VER) 142 | #define NO_RETURN __declspec(noreturn) 143 | #elif defined(__GNUC__) 144 | #define NO_RETURN __attribute__((noreturn)) 145 | #else 146 | #define NO_RETURN 147 | #endif 148 | 149 | #endif // WEBRTC_TYPEDEFS_H_ 150 | -------------------------------------------------------------------------------- /cpp_onnx/wave/asr_example.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/asr_example.wav -------------------------------------------------------------------------------- /cpp_onnx/wave/long.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/long.wav -------------------------------------------------------------------------------- /cpp_onnx/wave/short.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/short.wav -------------------------------------------------------------------------------- /cpp_onnx/wave/test.pcm.bytes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.bytes -------------------------------------------------------------------------------- /cpp_onnx/wave/test.pcm.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.wav -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x64/libfftw3-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x64/libfftw3f-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3f-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x64/libfftw3l-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3l-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x64/onnxruntime.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/onnxruntime.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x86/libfftw3-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x86/libfftw3f-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3f-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x86/libfftw3l-3.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3l-3.dll -------------------------------------------------------------------------------- /cpp_onnx/win/bin/x86/onnxruntime.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/onnxruntime.dll -------------------------------------------------------------------------------- /cpp_onnx/win/images/sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/images/sample.png -------------------------------------------------------------------------------- /cpp_onnx/win/include/cpu_provider_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "onnxruntime_c_api.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * \param use_arena zero: false. non-zero: true. 12 | */ 13 | ORT_EXPORT 14 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena) 15 | ORT_ALL_ARGS_NONNULL; 16 | 17 | #ifdef __cplusplus 18 | } 19 | #endif 20 | -------------------------------------------------------------------------------- /cpp_onnx/win/include/onnxruntime_run_options_config_keys.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | /* 7 | * This file defines RunOptions Config Keys and format of the Config Values. 8 | * 9 | * The Naming Convention for a RunOptions Config Key, 10 | * "[Area][.[SubArea1].[SubArea2]...].[Keyname]" 11 | * Such as "ep.cuda.use_arena" 12 | * The Config Key cannot be empty 13 | * The maximum length of the Config Key is 128 14 | * 15 | * The string format of a RunOptions Config Value is defined individually for each Config. 16 | * The maximum length of the Config Value is 1024 17 | */ 18 | 19 | // Key for enabling shrinkages of user listed device memory arenas. 20 | // Expects a list of semi-colon separated key value pairs separated by colon in the following format: 21 | // "device_0:device_id_0;device_1:device_id_1" 22 | // No white-spaces allowed in the provided list string. 23 | // Currently, the only supported devices are : "cpu", "gpu" (case sensitive). 24 | // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled. 25 | // Example usage: "cpu:0;gpu:0" (or) "gpu:0" 26 | // By default, the value for this key is empty (i.e.) no memory arenas are shrunk 27 | static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage"; 28 | 29 | // Set to '1' to not synchronize execution providers with CPU at the end of session run. 30 | // Per default it will be set to '0' 31 | // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream. 32 | static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers"; 33 | -------------------------------------------------------------------------------- /cpp_onnx/win/include/provider_options.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | namespace onnxruntime { 11 | 12 | // data types for execution provider options 13 | 14 | using ProviderOptions = std::unordered_map; 15 | using ProviderOptionsVector = std::vector; 16 | using ProviderOptionsMap = std::unordered_map; 17 | 18 | } // namespace onnxruntime 19 | -------------------------------------------------------------------------------- /cpp_onnx/win/include/tensorrt_provider_factory.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. All rights reserved. 2 | // Licensed under the MIT License. 3 | 4 | #include "onnxruntime_c_api.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3f-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3f-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3l-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/libfftw3l-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x64/onnxruntime.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/onnxruntime.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3f-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3f-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3l-3.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.exp -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/libfftw3l-3.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.lib -------------------------------------------------------------------------------- /cpp_onnx/win/lib/x86/onnxruntime.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/onnxruntime.lib -------------------------------------------------------------------------------- /cpp_onnx/win/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /python/.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tar filter=lfs diff=lfs merge=lfs -text 29 | *.tflite filter=lfs diff=lfs merge=lfs -text 30 | *.tgz filter=lfs diff=lfs merge=lfs -text 31 | *.wasm filter=lfs diff=lfs merge=lfs -text 32 | *.xz filter=lfs diff=lfs merge=lfs -text 33 | *.zip filter=lfs diff=lfs merge=lfs -text 34 | *.zst filter=lfs diff=lfs merge=lfs -text 35 | *tfevents* filter=lfs diff=lfs merge=lfs -text 36 | -------------------------------------------------------------------------------- /python/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://gitee.com/SWHL/autoflake 3 | rev: v2.1.1 4 | hooks: 5 | - id: autoflake 6 | args: 7 | [ 8 | "--recursive", 9 | "--in-place", 10 | "--remove-all-unused-imports", 11 | "--remove-unused-variable", 12 | "--ignore-init-module-imports", 13 | ] 14 | files: \.py$ 15 | - repo: https://gitee.com/SWHL/black 16 | rev: 23.1.0 17 | hooks: 18 | - id: black 19 | files: \.py$ -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 |
2 |
3 |

Rapid Paraformer

4 |
5 | 6 | 7 | 8 | 9 | PyPI 10 | SemVer2.0 11 | 12 |
13 | 14 | ## 简介 15 | rapid_paraformer是一个基于阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)的语音识别工具。 16 | 17 | 🎉该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR) 18 | 19 | 本仓库仅对模型做了转换,只采用ONNXRuntime推理引擎 20 | 21 | ## TODO 22 | - [ ] 整合vad + asr + pun三个模型,打造可部署使用的方案 23 | 24 | ## 安装 25 | ```bash 26 | pip install rapid_paraformer 27 | ``` 28 | 29 | ## 模型下载 30 | 方法一:从Hugging Face上下载([link](https://huggingface.co/SWHL/RapidParaformer)) 31 | ```python 32 | from rapid_paraformer import download_hf_model 33 | 34 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".") 35 | ``` 36 | 37 | 方法二:([Google Drive](https://drive.google.com/drive/folders/1RVQtMe0eB_k6G5TJlmXwPELx4VtF2oCw?usp=sharing) | [百度网盘](https://pan.baidu.com/s/1zf8Ta6QxFHY3Z75fHNYKrQ?pwd=6ekq)) 38 | ```bash 39 | resources 40 | ├── [ 700] config.yaml 41 | └── [4.0K] models 42 | ├── [ 11K] am.mvn 43 | ├── [824M] asr_paraformerv2.onnx 44 | └── [ 50K] token_list.pkl 45 | ``` 46 | 47 | ## 模型转换 48 | 基于modescope下的notebook环境自助转换: 49 | 1. 打开[快速体验](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) 50 | 2. 打开notebook → Cell中输入以下命令, 执行即可。 51 | ```bash 52 | !python -m funasr.export.export_model --model-name 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' --export-dir "./export" 53 | ``` 54 | 55 | ## 使用 56 | ```python 57 | from rapid_paraformer import RapidParaformer 58 | 59 | config_path = "resources/config.yaml" 60 | 61 | paraformer = RapidParaformer(config_path) 62 | 63 | wav_path = [ 64 | "test_wavs/0478_00017.wav", 65 | "test_wavs/asr_example_zh.wav", 66 | ] 67 | 68 | result = paraformer(wav_path) 69 | print(result) 70 | # ['y', '欢迎大家来体验达摩院推出的语音识别模型'] 71 | ``` 72 | -------------------------------------------------------------------------------- /python/demo.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from rapid_paraformer import RapidParaformer, download_hf_model 5 | 6 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".") 7 | 8 | config_path = "resources/config.yaml" 9 | 10 | paraformer = RapidParaformer(config_path) 11 | 12 | wav_path = [ 13 | "test_wavs/0478_00017.wav", 14 | "test_wavs/asr_example_zh.wav", 15 | ] 16 | 17 | print(wav_path) 18 | result = paraformer(wav_path) 19 | print(result) 20 | -------------------------------------------------------------------------------- /python/docs/doc_whl.md: -------------------------------------------------------------------------------- 1 | See [link](https://github.com/RapidAI/RapidASR/tree/main/python) for details. -------------------------------------------------------------------------------- /python/rapid_paraformer/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from .main import RapidParaformer 5 | from .utils import download_hf_model 6 | -------------------------------------------------------------------------------- /python/rapid_paraformer/kaldifeat/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | from .feature import compute_fbank_feats, compute_mfcc_feats, apply_cmvn_sliding 3 | from .ivector import compute_vad 4 | -------------------------------------------------------------------------------- /python/rapid_paraformer/kaldifeat/ivector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .feature import sliding_window 4 | 5 | 6 | # ---------- compute-vad ---------- 7 | 8 | def compute_vad(log_energy, energy_mean_scale=0.5, energy_threshold=0.5, frames_context=0, proportion_threshold=0.6): 9 | """ Apply voice activity detection 10 | 11 | :param log_energy: Log mel energy. 12 | :param energy_mean_scale: If this is set to s, to get the actual threshold we let m be the mean log-energy of the file, and use s*m + vad-energy-threshold (float, default = 0.5) 13 | :param energy_threshold: Constant term in energy threshold for VAD (also see energy_mean_scale) (float, default = 5) 14 | :param frames_context: Number of frames of context on each side of central frame, in window for which energy is monitored (int, default = 0) 15 | :param proportion_threshold: Parameter controlling the proportion of frames within the window that need to have more energy than the threshold (float, default = 0.6) 16 | :return: A vector of boolean that are True if we judge the frame voiced and False otherwise. 17 | """ 18 | assert len(log_energy.shape) == 1 19 | assert energy_mean_scale >= 0 20 | assert frames_context >= 0 21 | assert 0 < proportion_threshold < 1 22 | dtype = log_energy.dtype 23 | energy_threshold += energy_mean_scale * log_energy.mean() 24 | if frames_context > 0: 25 | num_frames = len(log_energy) 26 | window_size = frames_context * 2 + 1 27 | log_energy_pad = np.concatenate([ 28 | np.zeros(frames_context, dtype=dtype), 29 | log_energy, 30 | np.zeros(frames_context, dtype=dtype) 31 | ]) 32 | log_energy_window = sliding_window(log_energy_pad, window_size, 1) 33 | num_count = np.count_nonzero(log_energy_window > energy_threshold, axis=1) 34 | den_count = np.ones(num_frames, dtype=dtype) * window_size 35 | max_den_count = np.arange(frames_context + 1, min(window_size, num_frames) + 1, dtype=dtype) 36 | den_count[:-(frames_context + 2):-1] = max_den_count 37 | den_count[:frames_context + 1] = np.min([den_count[:frames_context + 1], max_den_count], axis=0) 38 | vad = num_count / den_count >= proportion_threshold 39 | else: 40 | vad = log_energy > energy_threshold 41 | return vad 42 | 43 | # ---------- compute-vad ---------- 44 | -------------------------------------------------------------------------------- /python/rapid_paraformer/main.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | from pathlib import Path 5 | from typing import List, Tuple, Union 6 | 7 | import librosa 8 | import numpy as np 9 | 10 | from .utils import ( 11 | CharTokenizer, 12 | Hypothesis, 13 | ONNXRuntimeError, 14 | OrtInferSession, 15 | TokenIDConverter, 16 | WavFrontend, 17 | get_logger, 18 | read_yaml, 19 | ) 20 | 21 | logging = get_logger() 22 | 23 | 24 | class RapidParaformer: 25 | def __init__(self, config_path: Union[str, Path]) -> None: 26 | if not Path(config_path).exists(): 27 | raise FileNotFoundError(f"{config_path} does not exist.") 28 | 29 | config = read_yaml(config_path) 30 | 31 | self.converter = TokenIDConverter(**config["TokenIDConverter"]) 32 | self.tokenizer = CharTokenizer(**config["CharTokenizer"]) 33 | self.frontend = WavFrontend( 34 | cmvn_file=config["WavFrontend"]["cmvn_file"], 35 | **config["WavFrontend"]["frontend_conf"], 36 | ) 37 | self.ort_infer = OrtInferSession(config["Model"]) 38 | self.batch_size = config["Model"]["batch_size"] 39 | 40 | def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List: 41 | waveform_list = self.load_data(wav_content) 42 | waveform_nums = len(waveform_list) 43 | 44 | asr_res = [] 45 | for beg_idx in range(0, waveform_nums, self.batch_size): 46 | end_idx = min(waveform_nums, beg_idx + self.batch_size) 47 | 48 | feats, feats_len = self.extract_feat(waveform_list[beg_idx:end_idx]) 49 | 50 | try: 51 | am_scores, valid_token_lens = self.infer(feats, feats_len) 52 | except ONNXRuntimeError: 53 | logging.warning("input wav is silence or noise") 54 | preds = [] 55 | else: 56 | preds = self.decode(am_scores, valid_token_lens) 57 | 58 | asr_res.extend(preds) 59 | return asr_res 60 | 61 | def load_data(self, wav_content: Union[str, np.ndarray, List[str]]) -> List: 62 | def load_wav(path: str) -> np.ndarray: 63 | waveform, _ = librosa.load(path, sr=None) 64 | return waveform[None, ...] 65 | 66 | if isinstance(wav_content, np.ndarray): 67 | return [wav_content] 68 | 69 | if isinstance(wav_content, str): 70 | return [load_wav(wav_content)] 71 | 72 | if isinstance(wav_content, list): 73 | return [load_wav(path) for path in wav_content] 74 | 75 | raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]") 76 | 77 | def extract_feat( 78 | self, waveform_list: List[np.ndarray] 79 | ) -> Tuple[np.ndarray, np.ndarray]: 80 | feats, feats_len = [], [] 81 | for waveform in waveform_list: 82 | speech, _ = self.frontend.fbank(waveform) 83 | feat, feat_len = self.frontend.lfr_cmvn(speech) 84 | feats.append(feat) 85 | feats_len.append(feat_len) 86 | 87 | feats = self.pad_feats(feats, np.max(feats_len)) 88 | feats_len = np.array(feats_len).astype(np.int32) 89 | return feats, feats_len 90 | 91 | @staticmethod 92 | def pad_feats(feats: List[np.ndarray], max_feat_len: int) -> np.ndarray: 93 | def pad_feat(feat: np.ndarray, cur_len: int) -> np.ndarray: 94 | pad_width = ((0, max_feat_len - cur_len), (0, 0)) 95 | return np.pad(feat, pad_width, "constant", constant_values=0) 96 | 97 | feat_res = [pad_feat(feat, feat.shape[0]) for feat in feats] 98 | feats = np.array(feat_res).astype(np.float32) 99 | return feats 100 | 101 | def infer( 102 | self, feats: np.ndarray, feats_len: np.ndarray 103 | ) -> Tuple[np.ndarray, np.ndarray]: 104 | am_scores, token_nums = self.ort_infer([feats, feats_len]) 105 | return am_scores, token_nums 106 | 107 | def decode(self, am_scores: np.ndarray, token_nums: int) -> List[str]: 108 | return [ 109 | self.decode_one(am_score, token_num) 110 | for am_score, token_num in zip(am_scores, token_nums) 111 | ] 112 | 113 | def decode_one(self, am_score: np.ndarray, valid_token_num: int) -> List[str]: 114 | yseq = am_score.argmax(axis=-1) 115 | score = am_score.max(axis=-1) 116 | score = np.sum(score, axis=-1) 117 | 118 | # pad with mask tokens to ensure compatibility with sos/eos tokens 119 | # asr_model.sos:1 asr_model.eos:2 120 | yseq = np.array([1] + yseq.tolist() + [2]) 121 | hyp = Hypothesis(yseq=yseq, score=score) 122 | 123 | # remove sos/eos and get results 124 | last_pos = -1 125 | token_int = hyp.yseq[1:last_pos].tolist() 126 | 127 | # remove blank symbol id, which is assumed to be 0 128 | token_int = list(filter(lambda x: x not in (0, 2), token_int)) 129 | 130 | # Change integer-ids to tokens 131 | token = self.converter.ids2tokens(token_int) 132 | text = self.tokenizer.tokens2text(token) 133 | return text[: valid_token_num - 1] 134 | 135 | 136 | if __name__ == "__main__": 137 | project_dir = Path(__file__).resolve().parent.parent 138 | cfg_path = project_dir / "resources" / "config.yaml" 139 | paraformer = RapidParaformer(cfg_path) 140 | 141 | wav_file = "0478_00017.wav" 142 | for i in range(1000): 143 | result = paraformer(wav_file) 144 | print(result) 145 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | librosa 2 | numpy 3 | onnxruntime 4 | typeguard==2.13.3 5 | huggingface_hub -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import sys 5 | from pathlib import Path 6 | from typing import List, Union 7 | 8 | import setuptools 9 | from get_pypi_latest_version import GetPyPiLatestVersion 10 | 11 | 12 | def read_txt(txt_path: Union[Path, str]) -> List[str]: 13 | with open(txt_path, "r", encoding="utf-8") as f: 14 | data = [v.rstrip("\n") for v in f] 15 | return data 16 | 17 | 18 | def get_readme() -> str: 19 | root_dir = Path(__file__).resolve().parent 20 | readme_path = str(root_dir / "docs" / "doc_whl.md") 21 | with open(readme_path, "r", encoding="utf-8") as f: 22 | readme = f.read() 23 | return readme 24 | 25 | 26 | MODULE_NAME = "rapid_paraformer" 27 | 28 | obtainer = GetPyPiLatestVersion() 29 | try: 30 | latest_version = obtainer(MODULE_NAME) 31 | except ValueError: 32 | latest_version = "0.0.1" 33 | 34 | VERSION_NUM = obtainer.version_add_one(latest_version) 35 | 36 | if len(sys.argv) > 2: 37 | match_str = " ".join(sys.argv[2:]) 38 | matched_versions = obtainer.extract_version(match_str) 39 | if matched_versions: 40 | VERSION_NUM = matched_versions 41 | sys.argv = sys.argv[:2] 42 | 43 | setuptools.setup( 44 | name=MODULE_NAME, 45 | version=VERSION_NUM, 46 | platforms="Any", 47 | description="Tool of speech recognition.", 48 | long_description=get_readme(), 49 | long_description_content_type="text/markdown", 50 | author="SWHL", 51 | author_email="liekkaskono@163.com", 52 | url="https://github.com/RapidAI/RapidASR", 53 | license="Apache-2.0", 54 | include_package_data=True, 55 | install_requires=read_txt("requirements.txt"), 56 | packages=[MODULE_NAME, f"{MODULE_NAME}/kaldifeat"], 57 | package_data={"": ["*.md", "LICENSE"]}, 58 | keywords=["asr,paraformer,wenet"], 59 | classifiers=[ 60 | "Programming Language :: Python :: 3.6", 61 | "Programming Language :: Python :: 3.7", 62 | "Programming Language :: Python :: 3.8", 63 | "Programming Language :: Python :: 3.9", 64 | "Programming Language :: Python :: 3.10", 65 | "Programming Language :: Python :: 3.11", 66 | "Programming Language :: Python :: 3.12", 67 | ], 68 | python_requires=">=3.6,<3.13", 69 | entry_points={ 70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"], 71 | }, 72 | ) 73 | -------------------------------------------------------------------------------- /python/test_wavs/0478_00017.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/0478_00017.wav -------------------------------------------------------------------------------- /python/test_wavs/asr_example_zh.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/asr_example_zh.wav -------------------------------------------------------------------------------- /python/tests/test_infer.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | # @Author: SWHL 3 | # @Contact: liekkaskono@163.com 4 | import os 5 | from pathlib import Path 6 | 7 | import pytest 8 | import librosa 9 | 10 | project_dir = Path(__file__).resolve().parent.parent 11 | os.sys.path.append(str(project_dir)) 12 | 13 | from rapid_paraformer import RapidParaformer 14 | 15 | 16 | cfg_path = project_dir / 'resources' / 'config.yaml' 17 | paraformer = RapidParaformer(cfg_path) 18 | 19 | 20 | def test_input_by_path(): 21 | wav_file = 'test_wavs/0478_00017.wav' 22 | result = paraformer(wav_file) 23 | assert result[0][:5] == '呃说不配合' 24 | 25 | 26 | def test_input_by_ndarray(): 27 | wav_file = 'test_wavs/0478_00017.wav' 28 | waveform, _ = librosa.load(wav_file) 29 | result = paraformer(waveform[None, ...]) 30 | assert result[0][:5] == '呃说不配合' 31 | 32 | 33 | def test_input_by_str_list(): 34 | wave_list = [ 35 | 'test_wavs/0478_00017.wav', 36 | 'test_wavs/asr_example_zh.wav', 37 | ] 38 | result = paraformer(wave_list) 39 | assert result[0][:5] == '呃说不配合' 40 | 41 | 42 | def test_empty(): 43 | wav_file = None 44 | with pytest.raises(TypeError) as exc_info: 45 | paraformer(wav_file) 46 | raise TypeError() 47 | assert exc_info.type is TypeError 48 | --------------------------------------------------------------------------------