├── .github
└── workflows
│ └── gen_whl_to_pypi.yml
├── .gitignore
├── LICENSE
├── README.md
├── cpp_onnx
├── CMakeLists.txt
├── CMakeSettings.json
├── api.md
├── images
│ ├── demo.png
│ └── threadnum.png
├── include
│ ├── Audio.h
│ ├── ComDefine.h
│ ├── Model.h
│ ├── librapidasrapi.h
│ ├── webrtc_vad.h
│ └── win_func.h
├── models
│ ├── readme.md
│ └── vocab.txt
├── readme.md
├── src
│ ├── Audio.cpp
│ ├── CMakeLists.txt
│ ├── CommonStruct.h
│ ├── FeatureExtract.cpp
│ ├── FeatureExtract.h
│ ├── FeatureQueue.cpp
│ ├── FeatureQueue.h
│ ├── Model.cpp
│ ├── SpeechWrap.cpp
│ ├── SpeechWrap.h
│ ├── Tensor.h
│ ├── Vocab.cpp
│ ├── Vocab.h
│ ├── alignedmem.cpp
│ ├── alignedmem.h
│ ├── commonfunc.h
│ ├── librapidasrapi.cpp
│ ├── paraformer_onnx.cpp
│ ├── paraformer_onnx.h
│ ├── precomp.h
│ ├── predefine_coe.h
│ ├── tmp.h
│ ├── util.cpp
│ └── util.h
├── tester
│ ├── CMakeLists.txt
│ └── tester.cpp
├── third_party
│ └── webrtc
│ │ ├── CMakeLists.txt
│ │ ├── common_audio
│ │ ├── signal_processing
│ │ │ ├── complex_bit_reverse.c
│ │ │ ├── complex_fft.c
│ │ │ ├── complex_fft_tables.h
│ │ │ ├── cross_correlation.c
│ │ │ ├── division_operations.c
│ │ │ ├── dot_product_with_scale.cc
│ │ │ ├── dot_product_with_scale.h
│ │ │ ├── downsample_fast.c
│ │ │ ├── energy.c
│ │ │ ├── get_scaling_square.c
│ │ │ ├── include
│ │ │ │ ├── real_fft.h
│ │ │ │ ├── signal_processing_library.h
│ │ │ │ └── spl_inl.h
│ │ │ ├── min_max_operations.c
│ │ │ ├── resample_48khz.c
│ │ │ ├── resample_by_2_internal.c
│ │ │ ├── resample_by_2_internal.h
│ │ │ ├── resample_fractional.c
│ │ │ ├── spl_init.c
│ │ │ ├── spl_inl.c
│ │ │ ├── spl_sqrt.c
│ │ │ └── vector_scaling_operations.c
│ │ ├── third_party
│ │ │ └── spl_sqrt_floor
│ │ │ │ ├── spl_sqrt_floor.c
│ │ │ │ └── spl_sqrt_floor.h
│ │ └── vad
│ │ │ ├── include
│ │ │ └── webrtc_vad.h
│ │ │ ├── vad_core.c
│ │ │ ├── vad_core.h
│ │ │ ├── vad_filterbank.c
│ │ │ ├── vad_filterbank.h
│ │ │ ├── vad_gmm.c
│ │ │ ├── vad_gmm.h
│ │ │ ├── vad_sp.c
│ │ │ ├── vad_sp.h
│ │ │ └── webrtc_vad.c
│ │ ├── rtc_base
│ │ ├── checks.cc
│ │ ├── checks.h
│ │ ├── compile_assert_c.h
│ │ ├── numerics
│ │ │ └── safe_compare.h
│ │ ├── sanitizer.h
│ │ ├── system
│ │ │ ├── arch.h
│ │ │ └── inline.h
│ │ └── type_traits.h
│ │ ├── system_wrappers
│ │ └── include
│ │ │ └── cpu_features_wrapper.h
│ │ └── typedefs.h
├── wave
│ ├── asr_example.wav
│ ├── long.wav
│ ├── short.wav
│ ├── test.pcm.bytes
│ └── test.pcm.wav
└── win
│ ├── bin
│ ├── x64
│ │ ├── libfftw3-3.dll
│ │ ├── libfftw3f-3.dll
│ │ ├── libfftw3l-3.dll
│ │ └── onnxruntime.dll
│ └── x86
│ │ ├── libfftw3-3.dll
│ │ ├── libfftw3f-3.dll
│ │ ├── libfftw3l-3.dll
│ │ └── onnxruntime.dll
│ ├── images
│ └── sample.png
│ ├── include
│ ├── cpu_provider_factory.h
│ ├── fftw3.h
│ ├── onnxruntime_c_api.h
│ ├── onnxruntime_cxx_api.h
│ ├── onnxruntime_cxx_inline.h
│ ├── onnxruntime_run_options_config_keys.h
│ ├── onnxruntime_session_options_config_keys.h
│ ├── provider_options.h
│ └── tensorrt_provider_factory.h
│ ├── lib
│ ├── x64
│ │ ├── libfftw3-3.def
│ │ ├── libfftw3-3.exp
│ │ ├── libfftw3-3.lib
│ │ ├── libfftw3f-3.def
│ │ ├── libfftw3f-3.exp
│ │ ├── libfftw3f-3.lib
│ │ ├── libfftw3l-3.def
│ │ ├── libfftw3l-3.exp
│ │ ├── libfftw3l-3.lib
│ │ └── onnxruntime.lib
│ └── x86
│ │ ├── libfftw3-3.def
│ │ ├── libfftw3-3.exp
│ │ ├── libfftw3-3.lib
│ │ ├── libfftw3f-3.def
│ │ ├── libfftw3f-3.exp
│ │ ├── libfftw3f-3.lib
│ │ ├── libfftw3l-3.def
│ │ ├── libfftw3l-3.exp
│ │ ├── libfftw3l-3.lib
│ │ └── onnxruntime.lib
│ └── readme.md
└── python
├── .gitattributes
├── .pre-commit-config.yaml
├── README.md
├── demo.py
├── docs
└── doc_whl.md
├── rapid_paraformer
├── __init__.py
├── kaldifeat
│ ├── LICENSE
│ ├── README.md
│ ├── __init__.py
│ ├── feature.py
│ └── ivector.py
├── main.py
└── utils.py
├── requirements.txt
├── setup.py
├── test_wavs
├── 0478_00017.wav
└── asr_example_zh.wav
└── tests
└── test_infer.py
/.github/workflows/gen_whl_to_pypi.yml:
--------------------------------------------------------------------------------
1 | name: Push rapid_paraformer to pypi
2 |
3 | on:
4 | push:
5 | # branches: [ main ]
6 | # paths:
7 | # - 'python/rapid_paraformer/**'
8 | # - 'python/docs/doc_whl.md'
9 | # - 'python/setup.py'
10 | # - '.github/workflows/gen_whl_to_pypi.yml'
11 | tags:
12 | - v*
13 |
14 | # env:
15 | # RESOURCES_URL: https://github.com/RapidAI/RapidLatexOCR/releases/download/v0.0.0/models.zip
16 |
17 | jobs:
18 | # UnitTesting:
19 | # runs-on: ubuntu-latest
20 | # steps:
21 | # - name: Pull latest code
22 | # uses: actions/checkout@v3
23 |
24 | # - name: Set up Python 3.7
25 | # uses: actions/setup-python@v4
26 | # with:
27 | # python-version: '3.7'
28 | # architecture: 'x64'
29 |
30 | # - name: Display Python version
31 | # run: python -c "import sys; print(sys.version)"
32 |
33 | # - name: Download models
34 | # run: |
35 | # wget $RESOURCES_URL
36 | # ZIP_NAME=${RESOURCES_URL##*/}
37 | # DIR_NAME=${ZIP_NAME%.*}
38 | # unzip $ZIP_NAME
39 |
40 | # - name: Unit testings with rapid_latex_ocr
41 | # run: |
42 | # pip install -r requirements.txt
43 | # pip install pytest
44 | # pytest tests/test*.py
45 |
46 | GenerateWHL_PushPyPi:
47 | runs-on: ubuntu-latest
48 |
49 | steps:
50 | - uses: actions/checkout@v3
51 |
52 | - name: Set up Python 3.7
53 | uses: actions/setup-python@v4
54 | with:
55 | python-version: '3.7'
56 | architecture: 'x64'
57 |
58 | - name: Run setup.py
59 | run: |
60 | cd python
61 | pip install -r requirements.txt
62 | python -m pip install --upgrade pip
63 | pip install wheel get_pypi_latest_version
64 | python setup.py bdist_wheel ${{ github.ref_name }}
65 |
66 | # - name: Publish distribution 📦 to Test PyPI
67 | # uses: pypa/gh-action-pypi-publish@v1.5.0
68 | # with:
69 | # password: ${{ secrets.TEST_PYPI_API_TOKEN }}
70 | # repository_url: https://test.pypi.org/legacy/
71 | # packages_dir: dist/
72 |
73 | - name: Publish distribution 📦 to PyPI
74 | uses: pypa/gh-action-pypi-publish@v1.5.0
75 | with:
76 | password: ${{ secrets.PYPI_API_TOKEN }}
77 | packages_dir: python/dist/
78 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.onnx
2 | *.json
3 |
4 | # Created by .ignore support plugin (hsz.mobi)
5 | ### Python template
6 | # Byte-compiled / optimized / DLL files
7 | __pycache__/
8 | *.py[cod]
9 | *$py.class
10 | .pytest_cache
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | pip-wheel-metadata/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | # *.manifest
40 | # *.spec
41 | *.res
42 |
43 | # Installer logs
44 | pip-log.txt
45 | pip-delete-this-directory.txt
46 |
47 | # Unit test / coverage reports
48 | htmlcov/
49 | .tox/
50 | .nox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *.cover
57 | *.py,cover
58 | .hypothesis/
59 | .pytest_cache/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 | local_settings.py
68 | db.sqlite3
69 | db.sqlite3-journal
70 |
71 | # Flask stuff:
72 | instance/
73 | .webassets-cache
74 |
75 | # Scrapy stuff:
76 | .scrapy
77 |
78 | # Sphinx documentation
79 | docs/_build/
80 |
81 | # PyBuilder
82 | target/
83 |
84 | # Jupyter Notebook
85 | .ipynb_checkpoints
86 |
87 | # IPython
88 | profile_default/
89 | ipython_config.py
90 |
91 | # pyenv
92 | .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | #idea
139 | .vs
140 | .vscode
141 | .idea
142 | /models
143 |
144 | #models
145 |
146 | *.ttf
147 | *.ttc
148 |
149 |
150 | *.bin
151 | *.mapping
152 | *.xml
153 |
154 | *.pdiparams
155 | *.pdiparams.info
156 | *.pdmodel
157 |
158 | .DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 RapidAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Rapid ASR
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | - 🎉 推出知识星球[RapidAI私享群](https://t.zsxq.com/0duLBZczw),这里的提问会优先得到回答和支持,也会享受到RapidAI组织后续持续优质的服务。欢迎大家的加入。
11 | - Paraformer模型出自阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)。
12 | - 本仓库仅对模型做了转换,只采用ONNXRuntime推理引擎。该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR)。
13 | - 项目仍会持续更新,欢迎关注。
14 | - QQ群号:645751008
15 |
16 | #### 📖文档导航
17 | - 语音识别:
18 | - rapid_paraformer:
19 | - [rapid_paraformer-Python](./python/README.md)
20 | - [rapid_C++/C](./cpp_onnx/readme.md)
21 | - [rapid_wenet](https://github.com/RapidAI/RapidASR/tree/rapid_wenet)
22 | - [Python](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/python)
23 | - [C++](https://github.com/RapidAI/RapidASR/tree/rapid_wenet/cpp)
24 | - [rapid_paddlespeech-Python](https://github.com/RapidAI/RapidASR/tree/rapid_paddlespeech)
25 | - 标点符号
26 | - [RapidPunc](https://github.com/RapidAI/RapidPunc)
27 |
28 | #### 📆TODO以及任务认领
29 | - 参见这里:[link](https://github.com/RapidAI/RapidASR/issues/15)
30 |
31 | #### 🎨整体框架
32 | ```mermaid
33 | flowchart LR
34 |
35 | A([wav]) --RapidVad--> B([各个小段的音频]) --RapidASR--> C([识别的文本内容]) --RapidPunc--> D([最终识别内容])
36 | ```
37 |
38 | #### 📣更新日志
39 |
40 | 详情
41 | - 2023-08-21 v2.0.4 update:
42 | - 添加whl包支持
43 | - 更新文档
44 | - 2023-02-25
45 | - 添加C++版本推理,使用onnxruntime引擎,预/后处理代码来自: [FastASR](https://github.com/chenkui164/FastASR)
46 | - 2023-02-14 v2.0.3 update:
47 | - 修复librosa读取wav文件错误
48 | - 修复fbank与torch下fbank提取结果不一致bug
49 | - 2023-02-11 v2.0.2 update:
50 | - 模型和推理代码解耦(`rapid_paraformer`和`resources`)
51 | - 支持批量推理(通过`resources/config.yaml`中`batch_size`指定)
52 | - 增加多种输入方式(`Union[str, np.ndarray, List[str]]`)
53 | - 2023-02-10 v2.0.1 update:
54 | - 添加对输入音频为噪音或者静音的文件推理结果捕捉。
55 |
56 |
57 |
--------------------------------------------------------------------------------
/cpp_onnx/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.10)
2 |
3 | #-DONNXRUNTIME_DIR=D:\thirdpart\onnxruntime
4 | project(FastASR)
5 |
6 | set(CMAKE_CXX_STANDARD 11)
7 | set(CMAKE_POSITION_INDEPENDENT_CODE ON)
8 |
9 | # for onnxruntime
10 |
11 | IF(WIN32)
12 |
13 |
14 | if(CMAKE_CL_64)
15 | link_directories(${ONNXRUNTIME_DIR}\\lib)
16 | else()
17 | add_definitions(-D_WIN_X86)
18 | endif()
19 | ELSE()
20 |
21 |
22 | link_directories(${ONNXRUNTIME_DIR}/lib)
23 |
24 | endif()
25 |
26 | #option(FASTASR_BUILD_PYTHON_MODULE "build python module, using FastASR in Python" OFF)
27 |
28 | add_subdirectory("./third_party/webrtc")
29 | add_subdirectory(src)
30 | add_subdirectory(tester)
31 |
--------------------------------------------------------------------------------
/cpp_onnx/CMakeSettings.json:
--------------------------------------------------------------------------------
1 | {
2 | "configurations": [
3 | {
4 | "name": "x64-Debug",
5 | "generator": "Ninja",
6 | "configurationType": "Debug",
7 | "inheritEnvironments": [ "msvc_x64_x64" ],
8 | "buildRoot": "${projectDir}\\out\\build\\${name}",
9 | "installRoot": "${projectDir}\\out\\install\\${name}",
10 | "buildCommandArgs": "",
11 | "ctestCommandArgs": ""
12 | },
13 | {
14 | "name": "x64-Release",
15 | "generator": "Ninja",
16 | "configurationType": "RelWithDebInfo",
17 | "buildRoot": "${projectDir}\\out\\build\\${name}",
18 | "installRoot": "${projectDir}\\out\\install\\${name}",
19 | "cmakeCommandArgs": "",
20 | "buildCommandArgs": "",
21 | "ctestCommandArgs": "",
22 | "inheritEnvironments": [ "msvc_x64_x64" ]
23 | },
24 | {
25 | "name": "Linux-GCC-Debug",
26 | "generator": "Unix Makefiles",
27 | "configurationType": "Debug",
28 | "cmakeExecutable": "cmake",
29 | "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ],
30 | "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1",
31 | "buildCommandArgs": "",
32 | "ctestCommandArgs": "",
33 | "inheritEnvironments": [ "linux_x64" ],
34 | "remoteMachineName": "${defaultRemoteMachineName}",
35 | "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src",
36 | "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}",
37 | "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}",
38 | "remoteCopySources": true,
39 | "rsyncCommandArgs": "-t --delete",
40 | "remoteCopyBuildOutput": false,
41 | "remoteCopySourcesMethod": "rsync"
42 | }
43 | ]
44 | }
--------------------------------------------------------------------------------
/cpp_onnx/api.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | 参考代码: [tester.cpp](tester/tester.cpp)
4 |
5 | ```
6 | 初始化程序库
7 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread);
8 |
9 |
10 |
11 | // if not give a fnCallback ,it should be NULL
12 | 识别内存缓冲区,完整的wav文件数据,包括文件头
13 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
14 | 识别内存缓冲区,只包括采样点数据,不包括wav文件头
15 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
16 | 识别文件,只包括采样点数据,不包括wav文件头
17 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
18 |
19 | 识别音频文件,完整的wav文件数据,包括文件头
20 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
21 |
22 | 获取识别后的文本和相关数据
23 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
24 |
25 | 获取结果块个数
26 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result);
27 |
28 | 释放返回的结果块内存
29 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result);
30 |
31 |
32 | 使用完成后清理程序库
33 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle);
34 |
35 | 获取结果块中的数据所表示的音频长度,单位秒
36 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
37 |
38 |
39 | ```
40 |
--------------------------------------------------------------------------------
/cpp_onnx/images/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/demo.png
--------------------------------------------------------------------------------
/cpp_onnx/images/threadnum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/images/threadnum.png
--------------------------------------------------------------------------------
/cpp_onnx/include/Audio.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef AUDIO_H
3 | #define AUDIO_H
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | class AudioFrame {
12 | private:
13 | int start;
14 | int end;
15 | int len;
16 |
17 | public:
18 | AudioFrame();
19 | AudioFrame(int len);
20 |
21 | ~AudioFrame();
22 | int set_start(int val);
23 | int set_end(int val, int max_len);
24 | int get_start();
25 | int get_len();
26 | int disp();
27 | };
28 |
29 | class Audio {
30 | private:
31 | float *speech_data;
32 | int16_t *speech_buff;
33 | int speech_len;
34 | int speech_align_len;
35 | int16_t sample_rate;
36 | int offset;
37 | float align_size;
38 | int data_type;
39 | queue frame_queue;
40 |
41 | public:
42 | Audio(int data_type);
43 | Audio(int data_type, int size);
44 | ~Audio();
45 | void disp();
46 | bool loadwav(const char* filename);
47 | bool loadwav(const char* buf, int nLen);
48 | bool loadpcmwav(const char* buf, int nFileLen);
49 | bool loadpcmwav(const char* filename);
50 | int fetch_chunck(float *&dout, int len);
51 | int fetch(float *&dout, int &len, int &flag);
52 | void padding();
53 | void split();
54 | float get_time_len();
55 |
56 | int get_queue_size() { return (int)frame_queue.size(); }
57 | };
58 |
59 | #endif
60 |
--------------------------------------------------------------------------------
/cpp_onnx/include/ComDefine.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef COMDEFINE_H
3 | #define COMDEFINE_H
4 |
5 | #define S_BEGIN 0
6 | #define S_MIDDLE 1
7 | #define S_END 2
8 | #define S_ALL 3
9 | #define S_ERR 4
10 |
11 | #endif
12 |
--------------------------------------------------------------------------------
/cpp_onnx/include/Model.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef MODEL_H
3 | #define MODEL_H
4 |
5 | #include
6 |
7 | class Model {
8 | public:
9 | virtual ~Model(){};
10 | virtual void reset() = 0;
11 | virtual std::string forward_chunk(float *din, int len, int flag) = 0;
12 | virtual std::string forward(float *din, int len, int flag) = 0;
13 | virtual std::string rescoring() = 0;
14 | };
15 |
16 | Model *create_model(const char *path,int nThread=0);
17 | #endif
18 |
--------------------------------------------------------------------------------
/cpp_onnx/include/librapidasrapi.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 |
4 | #ifdef WIN32
5 |
6 |
7 | #ifdef _RPASR_API_EXPORT
8 |
9 | #define _RAPIDASRAPI __declspec(dllexport)
10 | #else
11 | #define _RAPIDASRAPI __declspec(dllimport)
12 | #endif
13 |
14 |
15 | #else
16 | #define _RAPIDASRAPI
17 | #endif
18 |
19 |
20 |
21 |
22 |
23 | #ifndef _WIN32
24 |
25 | #define RPASR_CALLBCK_PREFIX __attribute__((__stdcall__))
26 |
27 | #else
28 | #define RPASR_CALLBCK_PREFIX __stdcall
29 | #endif
30 |
31 |
32 | #ifdef __cplusplus
33 |
34 | extern "C" {
35 | #endif
36 |
37 | typedef void* RPASR_HANDLE;
38 |
39 | typedef void* RPASR_RESULT;
40 |
41 | typedef unsigned char RPASR_BOOL;
42 |
43 | #define RPASR_TRUE 1
44 | #define RPASR_FALSE 0
45 | #define QM_DEFAULT_THREAD_NUM 4
46 |
47 |
48 | typedef enum
49 | {
50 | RASR_NONE=-1,
51 | RASRM_CTC_GREEDY_SEARCH=0,
52 | RASRM_CTC_RPEFIX_BEAM_SEARCH = 1,
53 | RASRM_ATTENSION_RESCORING = 2,
54 |
55 | }RPASR_MODE;
56 |
57 | typedef enum {
58 |
59 | RPASR_MODEL_PADDLE = 0,
60 | RPASR_MODEL_PADDLE_2 = 1,
61 | RPASR_MODEL_K2 = 2,
62 | RPASR_MODEL_PARAFORMER = 3,
63 |
64 | }RPASR_MODEL_TYPE;
65 |
66 |
67 | typedef void (* QM_CALLBACK)(int nCurStep, int nTotal); // nTotal: total steps; nCurStep: Current Step.
68 |
69 | // APIs for qmasr
70 |
71 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThread);
72 |
73 |
74 |
75 | // if not give a fnCallback ,it should be NULL
76 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
77 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback);
78 |
79 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback);
80 |
81 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback);
82 |
83 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex);
84 |
85 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result);
86 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result);
87 |
88 |
89 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE Handle);
90 |
91 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result);
92 |
93 | #ifdef __cplusplus
94 |
95 | }
96 | #endif
--------------------------------------------------------------------------------
/cpp_onnx/include/webrtc_vad.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This header file includes the VAD API calls. Specific function calls are
13 | * given below.
14 | */
15 |
16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18 |
19 | #include
20 | #include
21 |
22 | typedef struct WebRtcVadInst VadInst;
23 |
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 |
28 | // Creates an instance to the VAD structure.
29 | VadInst* WebRtcVad_Create(void);
30 |
31 | // Frees the dynamic memory of a specified VAD instance.
32 | //
33 | // - handle [i] : Pointer to VAD instance that should be freed.
34 | void WebRtcVad_Free(VadInst* handle);
35 |
36 | // Initializes a VAD instance.
37 | //
38 | // - handle [i/o] : Instance that should be initialized.
39 | //
40 | // returns : 0 - (OK),
41 | // -1 - (null pointer or Default mode could not be set).
42 | int WebRtcVad_Init(VadInst* handle);
43 |
44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
45 | // restrictive in reporting speech. Put in other words the probability of being
46 | // speech when the VAD returns 1 is increased with increasing mode. As a
47 | // consequence also the missed detection rate goes up.
48 | //
49 | // - handle [i/o] : VAD instance.
50 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3).
51 | //
52 | // returns : 0 - (OK),
53 | // -1 - (null pointer, mode could not be set or the VAD instance
54 | // has not been initialized).
55 | int WebRtcVad_set_mode(VadInst* handle, int mode);
56 |
57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates
58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
59 | //
60 | // - handle [i/o] : VAD Instance. Needs to be initialized by
61 | // WebRtcVad_Init() before call.
62 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000
63 | // - audio_frame [i] : Audio frame buffer.
64 | // - frame_length [i] : Length of audio frame buffer in number of samples.
65 | //
66 | // returns : 1 - (Active Voice),
67 | // 0 - (Non-active Voice),
68 | // -1 - (Error)
69 | int WebRtcVad_Process(VadInst* handle,
70 | int fs,
71 | const int16_t* audio_frame,
72 | size_t frame_length);
73 |
74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10,
75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
76 | //
77 | // - rate [i] : Sampling frequency (Hz).
78 | // - frame_length [i] : Speech frame buffer length in number of samples.
79 | //
80 | // returns : 0 - (valid combination), -1 - (invalid combination)
81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
82 |
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 |
87 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
88 |
--------------------------------------------------------------------------------
/cpp_onnx/include/win_func.h:
--------------------------------------------------------------------------------
1 | #include
2 | #ifdef WIN32
3 | #include
4 | #else
5 | #include
6 | #endif
7 | #ifdef WIN32
8 | int gettimeofday(struct timeval* tp, void* tzp)
9 | {
10 | time_t clock;
11 | struct tm tm;
12 | SYSTEMTIME wtm;
13 |
14 | GetLocalTime(&wtm);
15 | tm.tm_year = wtm.wYear - 1900;
16 | tm.tm_mon = wtm.wMonth - 1;
17 | tm.tm_mday = wtm.wDay;
18 | tm.tm_hour = wtm.wHour;
19 | tm.tm_min = wtm.wMinute;
20 | tm.tm_sec = wtm.wSecond;
21 | tm.tm_isdst = -1;
22 |
23 | clock = mktime(&tm);
24 | tp->tv_sec = clock;
25 | tp->tv_usec = wtm.wMilliseconds * 1000;
26 | return (0);
27 | }
28 | #endif
--------------------------------------------------------------------------------
/cpp_onnx/models/readme.md:
--------------------------------------------------------------------------------
1 | Place model.onnx here!
2 |
--------------------------------------------------------------------------------
/cpp_onnx/readme.md:
--------------------------------------------------------------------------------
1 |
2 | ## 特别鸣谢
3 |
4 | 本程序中的预处理及后处理代码,来自于:https://github.com/chenkui164/FastASR
5 |
6 |
7 | ## 线程数与性能关系
8 |
9 | 测试环境Rocky Linux 8,仅测试cpp版本结果(未测python版本),@acely
10 |
11 | 简述:
12 | 在3台配置不同的机器上分别编译并测试,在fftw和onnxruntime版本都相同的前提下,识别同一个30分钟的音频文件,分别测试不同onnx线程数量的表现。
13 |
14 | 
15 |
16 | 目前可以总结出大致规律:
17 |
18 | 并非onnx线程数越多越好
19 | 2线程比1线程提升显著,线程再多则提升较小
20 | 线程数等于CPU物理核心数时效率最好
21 | 实操建议:
22 |
23 | 大部分场景用3-4线程性价比最高
24 | 低配机器用2线程合适
25 |
26 |
27 |
28 | ## API
29 | [API文档](api.md)
30 |
31 | ## 演示
32 |
33 | 
34 |
35 | ## 注意
36 | 本程序只支持 采样率16000hz, 位深16bit的 **单声道** 音频。
37 |
38 | ## 快速使用
39 |
40 | ### Windows
41 |
42 | 安装Vs2022 打开cpp_onnx目录下的cmake工程,直接 build即可。 本仓库已经准备好所有相关依赖库。
43 |
44 | Windows下已经预置fftw3、onnxruntime及openblas库
45 |
46 |
47 | ### Linux
48 | See the bottom of this page: Building Guidance
49 |
50 |
51 | ### 运行程序
52 |
53 | tester /path/to/models/dir /path/to/wave/file
54 |
55 | 例如: tester /data/models /data/test.wav
56 |
57 | /data/models 需要包括如下两个文件: model.onnx 和vocab.txt
58 |
59 |
60 | ## 支持平台
61 | - Windows
62 | - Linux/Unix
63 |
64 | ## 依赖
65 | - fftw3
66 | - onnxruntime
67 |
68 | ## 导出onnx格式模型文件
69 | 安装 modelscope与FunASR,依赖:torch,torchaudio,安装过程[详细参考文档](https://github.com/alibaba-damo-academy/FunASR/wiki)
70 | ```shell
71 | pip install "modelscope[audio_asr]" -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
72 | git clone https://github.com/alibaba/FunASR.git && cd FunASR
73 | pip install --editable ./
74 | ```
75 | 导出onnx模型,[详见](https://github.com/alibaba-damo-academy/FunASR/tree/main/funasr/export),参考示例,从modelscope中模型导出:
76 |
77 | ```
78 | python -m funasr.export.export_model 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' "./export" true
79 | ```
80 |
81 | ## Building Guidance for Linux/Unix
82 |
83 | ```
84 | git clone https://github.com/RapidAI/RapidASR.git
85 | cd RapidASR/cpp_onnx/
86 | mkdir build
87 | cd build
88 | # download an appropriate onnxruntime from https://github.com/microsoft/onnxruntime/releases/tag/v1.14.0
89 | # here we get a copy of onnxruntime for linux 64
90 | wget https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz
91 | # ls
92 | # onnxruntime-linux-x64-1.14.0 onnxruntime-linux-x64-1.14.0.tgz
93 |
94 | #install fftw3-dev
95 | apt install libfftw3-dev
96 |
97 | # build
98 | cmake -DCMAKE_BUILD_TYPE=release .. -DONNXRUNTIME_DIR=/mnt/c/Users/ma139/RapidASR/cpp_onnx/build/onnxruntime-linux-x64-1.14.0
99 | make
100 |
101 | # then in the subfolder tester of current direcotry, you will see a program, tester
102 |
103 | ````
104 |
105 | ### The structure of a qualified onnxruntime package.
106 | ```
107 | onnxruntime_xxx
108 | ├───include
109 | └───lib
110 | ```
111 |
--------------------------------------------------------------------------------
/cpp_onnx/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 | file(GLOB files1 "*.cpp")
3 | file(GLOB files4 "paraformer/*.cpp")
4 |
5 | set(files ${files1} ${files2} ${files3} ${files4})
6 |
7 | # message("${files}")
8 |
9 | add_library(rapidasr ${files})
10 |
11 | if(WIN32)
12 |
13 | set(EXTRA_LIBS libfftw3f-3 webrtcvad)
14 | if(CMAKE_CL_64)
15 | target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
16 | else()
17 | target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
18 | endif()
19 | target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
20 |
21 | target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
22 | else()
23 |
24 | set(EXTRA_LIBS fftw3f webrtcvad pthread)
25 | target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
26 | target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")
27 |
28 | target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include")
29 | target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib")
30 |
31 | target_include_directories(rapidasr PUBLIC "/usr/include")
32 | target_link_directories(rapidasr PUBLIC "/usr/lib64")
33 |
34 | target_include_directories(rapidasr PUBLIC ${FFTW3F_INCLUDE_DIR})
35 | target_link_directories(rapidasr PUBLIC ${FFTW3F_LIBRARY_DIR})
36 | include_directories(${ONNXRUNTIME_DIR}/include)
37 | endif()
38 |
39 | include_directories(${CMAKE_SOURCE_DIR}/include)
40 | target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS})
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/cpp_onnx/src/CommonStruct.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef COMMONSTRUCT_H
3 | #define COMMONSTRUCT_H
4 |
5 |
6 | #endif
7 |
--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureExtract.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef FEATUREEXTRACT_H
3 | #define FEATUREEXTRACT_H
4 |
5 | #include
6 | #include
7 |
8 | #include "FeatureQueue.h"
9 | #include "SpeechWrap.h"
10 | #include "Tensor.h"
11 |
12 | class FeatureExtract {
13 | private:
14 | SpeechWrap speech;
15 | FeatureQueue fqueue;
16 | int mode;
17 |
18 | float *fft_input;
19 | fftwf_complex *fft_out;
20 | fftwf_plan p;
21 |
22 | void fftw_init();
23 | void melspect(float *din, float *dout);
24 | void global_cmvn(float *din);
25 |
26 | public:
27 | FeatureExtract(int mode);
28 | ~FeatureExtract();
29 | int size();
30 | int status();
31 | void reset();
32 | void insert(float *din, int len, int flag);
33 | bool fetch(Tensor *&dout);
34 | };
35 |
36 | #endif
37 |
--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureQueue.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 | FeatureQueue::FeatureQueue()
3 | {
4 | buff = new Tensor(67, 80);
5 | window_size = 67;
6 | buff_idx = 0;
7 | }
8 |
9 | FeatureQueue::~FeatureQueue()
10 | {
11 | delete buff;
12 | }
13 |
14 | void FeatureQueue::reinit(int size)
15 | {
16 | delete buff;
17 | buff = new Tensor(size, 80);
18 | buff_idx = 0;
19 | window_size = size;
20 | }
21 |
22 | void FeatureQueue::reset()
23 | {
24 | buff_idx = 0;
25 | }
26 |
27 | void FeatureQueue::push(float *din, int flag)
28 | {
29 | int offset = buff_idx * 80;
30 | memcpy(buff->buff + offset, din, 80 * sizeof(float));
31 | buff_idx++;
32 |
33 | if (flag == S_END) {
34 | Tensor *tmp = new Tensor(buff_idx, 80);
35 | memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
36 | feature_queue.push(tmp);
37 | buff_idx = 0;
38 | } else if (buff_idx == window_size) {
39 | feature_queue.push(buff);
40 | Tensor *tmp = new Tensor(window_size, 80);
41 | memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
42 | 3 * 80 * sizeof(float));
43 | buff_idx = 3;
44 | buff = tmp;
45 | }
46 | }
47 |
48 | Tensor *FeatureQueue::pop()
49 | {
50 |
51 | Tensor *tmp = feature_queue.front();
52 | feature_queue.pop();
53 | return tmp;
54 | }
55 |
56 | int FeatureQueue::size()
57 | {
58 | return feature_queue.size();
59 | }
60 |
--------------------------------------------------------------------------------
/cpp_onnx/src/FeatureQueue.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef FEATUREQUEUE_H
3 | #define FEATUREQUEUE_H
4 |
5 | #include "Tensor.h"
6 | #include
7 | #include
8 | using namespace std;
9 |
10 |
11 | class FeatureQueue {
12 | private:
13 | queue *> feature_queue;
14 | Tensor *buff;
15 | int buff_idx;
16 | int window_size;
17 |
18 | public:
19 | FeatureQueue();
20 | ~FeatureQueue();
21 | void reinit(int size);
22 | void reset();
23 | void push(float *din, int flag);
24 | Tensor *pop();
25 | int size();
26 | };
27 |
28 | #endif
29 |
--------------------------------------------------------------------------------
/cpp_onnx/src/Model.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 |
3 | Model *create_model(const char *path,int nThread)
4 | {
5 | Model *mm;
6 |
7 |
8 | mm = new paraformer::ModelImp(path, nThread);
9 |
10 | return mm;
11 | }
12 |
--------------------------------------------------------------------------------
/cpp_onnx/src/SpeechWrap.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 |
3 | SpeechWrap::SpeechWrap()
4 | {
5 | cache_size = 0;
6 | }
7 |
8 | SpeechWrap::~SpeechWrap()
9 | {
10 | }
11 |
12 | void SpeechWrap::reset()
13 | {
14 | cache_size = 0;
15 | }
16 |
17 | void SpeechWrap::load(float *din, int len)
18 | {
19 | in = din;
20 | in_size = len;
21 | total_size = cache_size + in_size;
22 | }
23 |
24 | int SpeechWrap::size()
25 | {
26 | return total_size;
27 | }
28 |
29 | void SpeechWrap::update(int offset)
30 | {
31 | int in_offset = offset - cache_size;
32 | cache_size = (total_size - offset);
33 | memcpy(cache, in + in_offset, cache_size * sizeof(float));
34 | }
35 |
36 | float &SpeechWrap::operator[](int i)
37 | {
38 | return i < cache_size ? cache[i] : in[i - cache_size];
39 | }
40 |
--------------------------------------------------------------------------------
/cpp_onnx/src/SpeechWrap.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef SPEECHWRAP_H
3 | #define SPEECHWRAP_H
4 |
5 | #include
6 |
7 | class SpeechWrap {
8 | private:
9 | float cache[400];
10 | int cache_size;
11 | float *in;
12 | int in_size;
13 | int total_size;
14 | int next_cache_size;
15 |
16 | public:
17 | SpeechWrap();
18 | ~SpeechWrap();
19 | void load(float *din, int len);
20 | void update(int offset);
21 | void reset();
22 | int size();
23 | float &operator[](int i);
24 | };
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------
/cpp_onnx/src/Tensor.h:
--------------------------------------------------------------------------------
1 | #ifndef TENSOR_H
2 | #define TENSOR_H
3 |
4 | #include "alignedmem.h"
5 |
6 | using namespace std;
7 |
8 | template class Tensor {
9 | private:
10 | void alloc_buff();
11 | void free_buff();
12 | int mem_size;
13 |
14 | public:
15 | T *buff;
16 | int size[4];
17 | int buff_size;
18 | Tensor(Tensor *in);
19 | Tensor(int a);
20 | Tensor(int a, int b);
21 | Tensor(int a, int b, int c);
22 | Tensor(int a, int b, int c, int d);
23 | ~Tensor();
24 | void zeros();
25 | void shape();
26 | void disp();
27 | void dump(const char *mode);
28 | void concat(Tensor *din, int dim);
29 | void resize(int a, int b, int c, int d);
30 | void add(float coe, Tensor *in);
31 | void add(Tensor *in);
32 | void add(Tensor *in1, Tensor *in2);
33 | void reload(Tensor *in);
34 | };
35 |
36 | template Tensor::Tensor(int a) : size{1, 1, 1, a}
37 | {
38 | alloc_buff();
39 | }
40 |
41 | template Tensor::Tensor(int a, int b) : size{1, 1, a, b}
42 | {
43 | alloc_buff();
44 | }
45 |
46 | template Tensor::Tensor(int a, int b, int c) : size{1, a, b, c}
47 | {
48 |
49 | alloc_buff();
50 | }
51 |
52 | template
53 | Tensor::Tensor(int a, int b, int c, int d) : size{a, b, c, d}
54 | {
55 | alloc_buff();
56 | }
57 |
58 | template Tensor::Tensor(Tensor *in)
59 | {
60 | memcpy(size, in->size, 4 * sizeof(int));
61 | alloc_buff();
62 | memcpy(buff, in->buff, in->buff_size * sizeof(T));
63 | }
64 |
65 | template Tensor::~Tensor()
66 | {
67 | free_buff();
68 | }
69 |
70 | template void Tensor::alloc_buff()
71 | {
72 | buff_size = size[0] * size[1] * size[2] * size[3];
73 | mem_size = buff_size;
74 | buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
75 | }
76 |
77 | template void Tensor::free_buff()
78 | {
79 | aligned_free(buff);
80 | }
81 |
82 | template void Tensor::zeros()
83 | {
84 | memset(buff, 0, buff_size * sizeof(T));
85 | }
86 |
87 | template void Tensor::shape()
88 | {
89 | printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]);
90 | }
91 |
92 | // TODO:: fix it!!!!
93 | template void Tensor::concat(Tensor *din, int dim)
94 | {
95 | memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T));
96 | buff_size += din->buff_size;
97 | size[dim] += din->size[dim];
98 | }
99 |
100 | // TODO:: fix it!!!!
101 | template void Tensor::resize(int a, int b, int c, int d)
102 | {
103 | size[0] = a;
104 | size[1] = b;
105 | size[2] = c;
106 | size[3] = d;
107 | buff_size = size[0] * size[1] * size[2] * size[3];
108 | }
109 |
110 | template void Tensor::add(float coe, Tensor *in)
111 | {
112 | int i;
113 | for (i = 0; i < buff_size; i++) {
114 | buff[i] = buff[i] + coe * in->buff[i];
115 | }
116 | }
117 |
118 | template void Tensor::add(Tensor *in)
119 | {
120 | int i;
121 | for (i = 0; i < buff_size; i++) {
122 | buff[i] = buff[i] + in->buff[i];
123 | }
124 | }
125 |
126 | template void Tensor::add(Tensor *in1, Tensor *in2)
127 | {
128 | int i;
129 | for (i = 0; i < buff_size; i++) {
130 | buff[i] = buff[i] + in1->buff[i] + in2->buff[i];
131 | }
132 | }
133 |
134 | template void Tensor::reload(Tensor *in)
135 | {
136 | memcpy(buff, in->buff, in->buff_size * sizeof(T));
137 | }
138 |
139 | template void Tensor::disp()
140 | {
141 | int i;
142 | for (i = 0; i < buff_size; i++) {
143 | cout << buff[i] << " ";
144 | }
145 | cout << endl;
146 | }
147 |
148 | template void Tensor::dump(const char *mode)
149 | {
150 | FILE *fp;
151 | fp = fopen("tmp.bin", mode);
152 | fwrite(buff, 1, buff_size * sizeof(T), fp);
153 | fclose(fp);
154 | }
155 | #endif
156 |
--------------------------------------------------------------------------------
/cpp_onnx/src/Vocab.cpp:
--------------------------------------------------------------------------------
1 | #include "Vocab.h"
2 |
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | using namespace std;
10 |
11 | Vocab::Vocab(const char *filename)
12 | {
13 | ifstream in(filename);
14 | string line;
15 |
16 | if (in) // 有该文件
17 | {
18 | while (getline(in, line)) // line中不包括每行的换行符
19 | {
20 | vocab.push_back(line);
21 | }
22 | // cout << vocab[1719] << endl;
23 | }
24 | // else // 没有该文件
25 | //{
26 | // cout << "no such file" << endl;
27 | // }
28 | }
29 | Vocab::~Vocab()
30 | {
31 | }
32 |
33 | string Vocab::vector2string(vector in)
34 | {
35 | int i;
36 | stringstream ss;
37 | for (auto it = in.begin(); it != in.end(); it++) {
38 | ss << vocab[*it];
39 | }
40 |
41 | return ss.str();
42 | }
43 |
44 | int str2int(string str)
45 | {
46 | const char *ch_array = str.c_str();
47 | if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
48 | ((ch_array[2] & 0xc0) != 0x80))
49 | return 0;
50 |
51 | int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
52 | (ch_array[2] & 0x3f);
53 | return val;
54 | }
55 |
56 | bool Vocab::isChinese(string ch)
57 | {
58 | if (ch.size() != 3) {
59 | return false;
60 | }
61 |
62 | int unicode = str2int(ch);
63 | if (unicode >= 19968 && unicode <= 40959) {
64 | return true;
65 | }
66 |
67 | return false;
68 | }
69 |
70 |
71 | string Vocab::vector2stringV2(vector in)
72 | {
73 | int i;
74 | list words;
75 |
76 | int is_pre_english = false;
77 | int pre_english_len = 0;
78 |
79 | int is_combining = false;
80 | string combine = "";
81 |
82 | for (auto it = in.begin(); it != in.end(); it++) {
83 | string word = vocab[*it];
84 |
85 | // step1 space character skips
86 | if (word == "" || word == "" || word == "")
87 | continue;
88 |
89 | // step2 combie phoneme to full word
90 | {
91 | int sub_word = !(word.find("@@") == string::npos);
92 |
93 | // process word start and middle part
94 | if (sub_word) {
95 | combine += word.erase(word.length() - 2);
96 | is_combining = true;
97 | continue;
98 | }
99 | // process word end part
100 | else if (is_combining) {
101 | combine += word;
102 | is_combining = false;
103 | word = combine;
104 | combine = "";
105 | }
106 | }
107 |
108 | // step3 process english word deal with space , turn abbreviation to upper case
109 | {
110 |
111 | // input word is chinese, not need process
112 | if (isChinese(word)) {
113 | words.push_back(word);
114 | is_pre_english = false;
115 | }
116 | // input word is english word
117 | else {
118 |
119 | // pre word is chinese
120 | if (!is_pre_english) {
121 | word[0] = word[0] - 32;
122 | words.push_back(word);
123 | pre_english_len = word.size();
124 |
125 | }
126 |
127 | // pre word is english word
128 | else {
129 |
130 | // single letter turn to upper case
131 | if (word.size() == 1) {
132 | word[0] = word[0] - 32;
133 | }
134 |
135 | if (pre_english_len > 1) {
136 | words.push_back(" ");
137 | words.push_back(word);
138 | pre_english_len = word.size();
139 | }
140 | else {
141 | if (word.size() > 1) {
142 | words.push_back(" ");
143 | }
144 | words.push_back(word);
145 | pre_english_len = word.size();
146 | }
147 | }
148 |
149 | is_pre_english = true;
150 |
151 | }
152 | }
153 | }
154 |
155 | // for (auto it = words.begin(); it != words.end(); it++) {
156 | // cout << *it << endl;
157 | // }
158 |
159 | stringstream ss;
160 | for (auto it = words.begin(); it != words.end(); it++) {
161 | ss << *it;
162 | }
163 |
164 | return ss.str();
165 | }
166 |
167 | int Vocab::size()
168 | {
169 | return vocab.size();
170 | }
171 |
--------------------------------------------------------------------------------
/cpp_onnx/src/Vocab.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef VOCAB_H
3 | #define VOCAB_H
4 |
5 | #include
6 | #include
7 | #include
8 | using namespace std;
9 |
10 | class Vocab {
11 | private:
12 | vector vocab;
13 | bool isChinese(string ch);
14 | bool isEnglish(string ch);
15 |
16 | public:
17 | Vocab(const char *filename);
18 | ~Vocab();
19 | int size();
20 | string vector2string(vector in);
21 | string vector2stringV2(vector in);
22 | };
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/cpp_onnx/src/alignedmem.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 | void *aligned_malloc(size_t alignment, size_t required_bytes)
3 | {
4 | void *p1; // original block
5 | void **p2; // aligned block
6 | int offset = alignment - 1 + sizeof(void *);
7 | if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
8 | return NULL;
9 | }
10 | p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
11 | p2[-1] = p1;
12 | return p2;
13 | }
14 |
15 | void aligned_free(void *p)
16 | {
17 | free(((void **)p)[-1]);
18 | }
19 |
--------------------------------------------------------------------------------
/cpp_onnx/src/alignedmem.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef ALIGNEDMEM_H
3 | #define ALIGNEDMEM_H
4 |
5 |
6 |
7 | extern void *aligned_malloc(size_t alignment, size_t required_bytes);
8 | extern void aligned_free(void *p);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/cpp_onnx/src/commonfunc.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 |
4 | typedef struct
5 | {
6 | std::string msg;
7 | float snippet_time;
8 | }RPASR_RECOG_RESULT;
9 |
10 |
11 | #ifdef _WIN32
12 | #include
13 |
14 |
15 |
16 | inline std::wstring string2wstring(const std::string& str, const std::string& locale)
17 | {
18 | typedef std::codecvt_byname F;
19 | std::wstring_convert strCnv(new F(locale));
20 | return strCnv.from_bytes(str);
21 | }
22 |
23 | inline std::wstring strToWstr(std::string str) {
24 | if (str.length() == 0)
25 | return L"";
26 | return string2wstring(str, "zh-CN");
27 |
28 | }
29 |
30 | #endif
31 |
32 |
33 |
34 | inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
35 | size_t numInputNodes = session->GetInputCount();
36 | if (numInputNodes > 0) {
37 | Ort::AllocatorWithDefaultOptions allocator;
38 | {
39 | auto t = session->GetInputNameAllocated(nIndex, allocator);
40 | inputName = t.get();
41 |
42 | }
43 | }
44 | }
45 |
46 | inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
47 | size_t numOutputNodes = session->GetOutputCount();
48 | if (numOutputNodes > 0) {
49 | Ort::AllocatorWithDefaultOptions allocator;
50 | {
51 | auto t = session->GetOutputNameAllocated(nIndex, allocator);
52 | outputName = t.get();
53 |
54 | }
55 | }
56 | }
--------------------------------------------------------------------------------
/cpp_onnx/src/librapidasrapi.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 | #ifdef __cplusplus
3 |
4 |
5 |
6 | // void __attribute__ ((visibility ("default"))) fun();
7 | extern "C" {
8 | #endif
9 |
10 |
11 | // APIs for qmasr
12 | _RAPIDASRAPI RPASR_HANDLE RapidAsrInit(const char* szModelDir, int nThreadNum)
13 | {
14 |
15 |
16 | Model* mm = create_model(szModelDir, nThreadNum);
17 |
18 | return mm;
19 | }
20 |
21 |
22 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
23 | {
24 |
25 |
26 | Model* pRecogObj = (Model*)handle;
27 |
28 | if (!pRecogObj)
29 | return nullptr;
30 |
31 | Audio audio(1);
32 | audio.loadwav(szBuf,nLen);
33 | audio.split();
34 |
35 | float* buff;
36 | int len;
37 | int flag=0;
38 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
39 | pResult->snippet_time = audio.get_time_len();
40 | int nStep = 0;
41 | int nTotal = audio.get_queue_size();
42 | while (audio.fetch(buff, len, flag) > 0) {
43 | pRecogObj->reset();
44 | string msg = pRecogObj->forward(buff, len, flag);
45 | pResult->msg += msg;
46 | nStep++;
47 | if (fnCallback)
48 | fnCallback(nStep, nTotal);
49 | }
50 |
51 |
52 | return pResult;
53 | }
54 |
55 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMBuffer(RPASR_HANDLE handle, const char* szBuf, int nLen, RPASR_MODE Mode, QM_CALLBACK fnCallback)
56 | {
57 |
58 | Model* pRecogObj = (Model*)handle;
59 |
60 | if (!pRecogObj)
61 | return nullptr;
62 |
63 | Audio audio(1);
64 | audio.loadpcmwav(szBuf, nLen);
65 | audio.split();
66 |
67 | float* buff;
68 | int len;
69 | int flag = 0;
70 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
71 | pResult->snippet_time = audio.get_time_len();
72 | int nStep = 0;
73 | int nTotal = audio.get_queue_size();
74 | while (audio.fetch(buff, len, flag) > 0) {
75 | pRecogObj->reset();
76 | string msg = pRecogObj->forward(buff, len, flag);
77 | pResult->msg += msg;
78 | nStep++;
79 | if (fnCallback)
80 | fnCallback(nStep, nTotal);
81 | }
82 |
83 |
84 | return pResult;
85 |
86 | }
87 |
88 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogPCMFile(RPASR_HANDLE handle, const char* szFileName, RPASR_MODE Mode, QM_CALLBACK fnCallback)
89 | {
90 |
91 | Model* pRecogObj = (Model*)handle;
92 |
93 | if (!pRecogObj)
94 | return nullptr;
95 |
96 | Audio audio(1);
97 | audio.loadpcmwav(szFileName);
98 | audio.split();
99 |
100 | float* buff;
101 | int len;
102 | int flag = 0;
103 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
104 | pResult->snippet_time = audio.get_time_len();
105 | int nStep = 0;
106 | int nTotal = audio.get_queue_size();
107 | while (audio.fetch(buff, len, flag) > 0) {
108 | pRecogObj->reset();
109 | string msg = pRecogObj->forward(buff, len, flag);
110 | pResult->msg += msg;
111 | nStep++;
112 | if (fnCallback)
113 | fnCallback(nStep, nTotal);
114 | }
115 |
116 |
117 | return pResult;
118 |
119 | }
120 |
121 | _RAPIDASRAPI RPASR_RESULT RapidAsrRecogFile(RPASR_HANDLE handle, const char* szWavfile, RPASR_MODE Mode, QM_CALLBACK fnCallback)
122 | {
123 | Model* pRecogObj = (Model*)handle;
124 |
125 | if (!pRecogObj)
126 | return nullptr;
127 |
128 | Audio audio(1);
129 | if(!audio.loadwav(szWavfile))
130 | return nullptr;
131 | audio.split();
132 |
133 | float* buff;
134 | int len;
135 | int flag = 0;
136 | int nStep = 0;
137 | int nTotal = audio.get_queue_size();
138 | RPASR_RECOG_RESULT* pResult = new RPASR_RECOG_RESULT;
139 | pResult->snippet_time = audio.get_time_len();
140 | while (audio.fetch(buff, len, flag) > 0) {
141 | pRecogObj->reset();
142 | string msg = pRecogObj->forward(buff, len, flag);
143 | pResult->msg+= msg;
144 | nStep++;
145 | if (fnCallback)
146 | fnCallback(nStep, nTotal);
147 | }
148 |
149 |
150 |
151 |
152 | return pResult;
153 | }
154 |
155 | _RAPIDASRAPI const int RapidAsrGetRetNumber(RPASR_RESULT Result)
156 | {
157 | if (!Result)
158 | return 0;
159 |
160 | return 1;
161 |
162 | }
163 |
164 |
165 | _RAPIDASRAPI const float RapidAsrGetRetSnippetTime(RPASR_RESULT Result)
166 | {
167 | if (!Result)
168 | return 0.0f;
169 |
170 | return ((RPASR_RECOG_RESULT*)Result)->snippet_time;
171 |
172 | }
173 |
174 | _RAPIDASRAPI const char* RapidAsrGetResult(RPASR_RESULT Result,int nIndex)
175 | {
176 | RPASR_RECOG_RESULT * pResult = (RPASR_RECOG_RESULT*)Result;
177 | if(!pResult)
178 | return nullptr;
179 |
180 | return pResult->msg.c_str();
181 |
182 | }
183 |
184 | _RAPIDASRAPI void RapidAsrFreeResult(RPASR_RESULT Result)
185 | {
186 |
187 | if (Result)
188 | {
189 | delete (RPASR_RECOG_RESULT*)Result;
190 |
191 | }
192 | }
193 |
194 | _RAPIDASRAPI void RapidAsrUninit(RPASR_HANDLE handle)
195 | {
196 |
197 | Model* pRecogObj = (Model*)handle;
198 |
199 |
200 | if (!pRecogObj)
201 | return;
202 |
203 | delete pRecogObj;
204 |
205 | }
206 |
207 |
208 |
209 | #ifdef __cplusplus
210 |
211 | }
212 | #endif
213 |
214 |
--------------------------------------------------------------------------------
/cpp_onnx/src/paraformer_onnx.cpp:
--------------------------------------------------------------------------------
1 | #include "precomp.h"
2 |
3 | using namespace std;
4 | using namespace paraformer;
5 |
6 | ModelImp::ModelImp(const char* path,int nNumThread)
7 | {
8 | string model_path = pathAppend(path, "model.onnx");
9 | string vocab_path = pathAppend(path, "vocab.txt");
10 |
11 | fe = new FeatureExtract(3);
12 |
13 | sessionOptions.SetInterOpNumThreads(nNumThread);
14 | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
15 |
16 | #ifdef _WIN32
17 | wstring wstrPath = strToWstr(model_path);
18 | m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions);
19 | #else
20 | m_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
21 | #endif
22 |
23 | string strName;
24 | getInputName(m_session, strName);
25 | m_strInputNames.push_back(strName.c_str());
26 | getInputName(m_session, strName,1);
27 | m_strInputNames.push_back(strName);
28 |
29 | getOutputName(m_session, strName);
30 | m_strOutputNames.push_back(strName);
31 | getOutputName(m_session, strName,1);
32 | m_strOutputNames.push_back(strName);
33 |
34 | for (auto& item : m_strInputNames)
35 | m_szInputNames.push_back(item.c_str());
36 | for (auto& item : m_strOutputNames)
37 | m_szOutputNames.push_back(item.c_str());
38 | vocab = new Vocab(vocab_path.c_str());
39 | }
40 |
41 | ModelImp::~ModelImp()
42 | {
43 | if(fe)
44 | delete fe;
45 | if (m_session)
46 | {
47 | delete m_session;
48 | m_session = nullptr;
49 | }
50 | if(vocab)
51 | delete vocab;
52 | }
53 |
54 | void ModelImp::reset()
55 | {
56 | fe->reset();
57 | }
58 |
59 | void ModelImp::apply_lfr(Tensor*& din)
60 | {
61 | int mm = din->size[2];
62 | int ll = ceil(mm / 6.0);
63 | Tensor* tmp = new Tensor(ll, 560);
64 | int out_offset = 0;
65 | for (int i = 0; i < ll; i++) {
66 | for (int j = 0; j < 7; j++) {
67 | int idx = i * 6 + j - 3;
68 | if (idx < 0) {
69 | idx = 0;
70 | }
71 | if (idx >= mm) {
72 | idx = mm - 1;
73 | }
74 | memcpy(tmp->buff + out_offset, din->buff + idx * 80,
75 | sizeof(float) * 80);
76 | out_offset += 80;
77 | }
78 | }
79 | delete din;
80 | din = tmp;
81 | }
82 |
83 | void ModelImp::apply_cmvn(Tensor* din)
84 | {
85 | const float* var;
86 | const float* mean;
87 | float scale = 22.6274169979695;
88 | int m = din->size[2];
89 | int n = din->size[3];
90 |
91 | var = (const float*)paraformer_cmvn_var_hex;
92 | mean = (const float*)paraformer_cmvn_mean_hex;
93 | for (int i = 0; i < m; i++) {
94 | for (int j = 0; j < n; j++) {
95 | int idx = i * n + j;
96 | din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
97 | }
98 | }
99 | }
100 |
101 | string ModelImp::greedy_search(float * in, int nLen )
102 | {
103 | vector hyps;
104 | int Tmax = nLen;
105 | for (int i = 0; i < Tmax; i++) {
106 | int max_idx;
107 | float max_val;
108 | findmax(in + i * 8404, 8404, max_val, max_idx);
109 | hyps.push_back(max_idx);
110 | }
111 |
112 | return vocab->vector2stringV2(hyps);
113 | }
114 |
115 | string ModelImp::forward(float* din, int len, int flag)
116 | {
117 |
118 | Tensor* in;
119 | fe->insert(din, len, flag);
120 | fe->fetch(in);
121 | apply_lfr(in);
122 | apply_cmvn(in);
123 | Ort::RunOptions run_option;
124 |
125 | std::array input_shape_{ in->size[0],in->size[2],in->size[3] };
126 | Ort::Value onnx_feats = Ort::Value::CreateTensor(m_memoryInfo,
127 | in->buff,
128 | in->buff_size,
129 | input_shape_.data(),
130 | input_shape_.size());
131 |
132 | std::vector feats_len{ in->size[2] };
133 | std::vector feats_len_dim{ 1 };
134 | Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
135 | m_memoryInfo,
136 | feats_len.data(),
137 | feats_len.size() * sizeof(int32_t),
138 | feats_len_dim.data(),
139 | feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
140 | std::vector input_onnx;
141 | input_onnx.emplace_back(std::move(onnx_feats));
142 | input_onnx.emplace_back(std::move(onnx_feats_len));
143 |
144 | string result;
145 | try {
146 |
147 | auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
148 | std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
149 |
150 |
151 | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies());
152 | float* floatData = outputTensor[0].GetTensorMutableData();
153 | auto encoder_out_lens = outputTensor[1].GetTensorMutableData();
154 | result = greedy_search(floatData, *encoder_out_lens);
155 | }
156 | catch (...)
157 | {
158 | result = "";
159 | }
160 |
161 |
162 | if(in)
163 | delete in;
164 |
165 | return result;
166 | }
167 |
168 | string ModelImp::forward_chunk(float* din, int len, int flag)
169 | {
170 |
171 | printf("Not Imp!!!!!!\n");
172 | return "Hello";
173 | }
174 |
175 | string ModelImp::rescoring()
176 | {
177 | printf("Not Imp!!!!!!\n");
178 | return "Hello";
179 | }
180 |
--------------------------------------------------------------------------------
/cpp_onnx/src/paraformer_onnx.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 |
4 | #ifndef PARAFORMER_MODELIMP_H
5 | #define PARAFORMER_MODELIMP_H
6 |
7 |
8 |
9 |
10 |
11 | namespace paraformer {
12 |
13 | class ModelImp : public Model {
14 | private:
15 | FeatureExtract* fe;
16 |
17 | Vocab* vocab;
18 |
19 | void apply_lfr(Tensor*& din);
20 | void apply_cmvn(Tensor* din);
21 |
22 |
23 | string greedy_search( float* in, int nLen);
24 |
25 | #ifdef _WIN_X86
26 | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
27 | #else
28 | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
29 | #endif
30 |
31 | Ort::Session* m_session = nullptr;
32 | Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "paraformer");
33 | Ort::SessionOptions sessionOptions = Ort::SessionOptions();
34 |
35 | vector m_strInputNames, m_strOutputNames;
36 | vector m_szInputNames;
37 | vector m_szOutputNames;
38 | //string m_strInputName, m_strInputNameLen;
39 | //string m_strOutputName, m_strOutputNameLen;
40 |
41 | public:
42 | ModelImp(const char* path, int nNumThread=0);
43 | ~ModelImp();
44 | void reset();
45 | string forward_chunk(float* din, int len, int flag);
46 | string forward(float* din, int len, int flag);
47 | string rescoring();
48 |
49 | };
50 |
51 | } // namespace paraformer
52 | #endif
53 |
--------------------------------------------------------------------------------
/cpp_onnx/src/precomp.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | // system
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 |
18 |
19 | #include
20 |
21 | using namespace std;
22 | // third part
23 |
24 | #include
25 | #include "onnxruntime_run_options_config_keys.h"
26 | #include "onnxruntime_cxx_api.h"
27 |
28 |
29 | // mine
30 |
31 | #include "commonfunc.h"
32 | #include
33 | #include "predefine_coe.h"
34 |
35 | #include
36 | //#include "alignedmem.h"
37 | #include "Vocab.h"
38 | #include "Tensor.h"
39 | #include "util.h"
40 | #include "CommonStruct.h"
41 | #include "FeatureExtract.h"
42 | #include "FeatureQueue.h"
43 | #include "SpeechWrap.h"
44 | #include
45 | #include "Model.h"
46 | #include "paraformer_onnx.h"
47 | #include "librapidasrapi.h"
48 |
49 |
50 | using namespace paraformer;
51 |
--------------------------------------------------------------------------------
/cpp_onnx/src/tmp.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef WENETPARAMS_H
3 | #define WENETPARAMS_H
4 | // #pragma pack(1)
5 |
6 | #define vocab_size 5538
7 |
8 | typedef struct {
9 | float conv0_weight[512 * 9];
10 | float conv0_bias[512];
11 |
12 | float conv1_weight[512 * 512 * 9];
13 | float conv1_bias[512];
14 |
15 | float out0_weight[9728 * 512];
16 | float out0_bias[512];
17 |
18 | } EncEmbedParams;
19 |
20 | typedef struct {
21 | float linear_q_weight[512 * 512];
22 | float linear_q_bias[512];
23 | float linear_k_weight[512 * 512];
24 | float linear_k_bias[512];
25 | float linear_v_weight[512 * 512];
26 | float linear_v_bias[512];
27 | float linear_out_weight[512 * 512];
28 | float linear_out_bias[512];
29 | } SelfAttnParams;
30 |
31 | typedef struct {
32 | SelfAttnParams linear0;
33 | float linear_pos_weight[512 * 512];
34 | float pos_bias_u[512];
35 | float pos_bias_v[512];
36 |
37 | } EncSelfAttnParams;
38 |
39 | typedef struct {
40 | float w1_weight[512 * 2048];
41 | float w1_bias[2048];
42 | float w2_weight[2048 * 512];
43 | float w2_bias[512];
44 | } FeedForwardParams;
45 |
46 | typedef struct {
47 | float weight[512];
48 | float bias[512];
49 | } NormParams;
50 |
51 | typedef struct {
52 | float pointwise_conv1_weight[1024 * 512];
53 | float pointwise_conv1_bias[1024];
54 |
55 | float depthwise_conv_weight[512 * 15];
56 | float depthwise_conv_bias[512];
57 |
58 | float pointwise_conv2_weight[512 * 512];
59 | float pointwise_conv2_bias[512];
60 | NormParams norm;
61 | } EncConvParams;
62 |
63 | typedef struct {
64 | EncSelfAttnParams self_attn;
65 | FeedForwardParams feedforward;
66 | FeedForwardParams feedforward_macaron;
67 | EncConvParams conv_module;
68 | NormParams norm_ff;
69 | NormParams norm_mha;
70 | NormParams norm_macaron;
71 | NormParams norm_conv;
72 | NormParams norm_final;
73 | // float concat_weight[1024 * 512];
74 | // float concat_bias[512];
75 | } SubEncoderParams;
76 |
77 | typedef struct {
78 | EncEmbedParams embed;
79 | SubEncoderParams sub_encoder[12];
80 | NormParams after_norm;
81 | } EncoderParams;
82 |
83 | typedef struct {
84 | SelfAttnParams self_attn;
85 | SelfAttnParams src_attn;
86 | FeedForwardParams feedward;
87 | NormParams norm1;
88 | NormParams norm2;
89 | NormParams norm3;
90 | // float concat_weight1[1024 * 512];
91 | // float concat_bias1[512];
92 | // float concat_weight2[1024 * 512];
93 | // float concat_bias2[512];
94 | } SubDecoderParams;
95 |
96 | typedef struct {
97 | float embed_weight[vocab_size * 512];
98 | SubDecoderParams sub_decoder[6];
99 | NormParams after_norm;
100 | float output_weight[vocab_size * 512];
101 | float output_bias[vocab_size];
102 | } DecoderParams;
103 |
104 | typedef struct {
105 | EncoderParams encoder;
106 | float ctc_weight[512 * vocab_size];
107 | float ctc_bias[vocab_size];
108 | DecoderParams decoder;
109 | } WenetParams;
110 |
111 | // #pragma pack()
112 | #endif
113 |
--------------------------------------------------------------------------------
/cpp_onnx/src/util.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "precomp.h"
3 |
4 | float *loadparams(const char *filename)
5 | {
6 |
7 | FILE *fp;
8 | fp = fopen(filename, "rb");
9 | fseek(fp, 0, SEEK_END);
10 | uint32_t nFileLen = ftell(fp);
11 | fseek(fp, 0, SEEK_SET);
12 |
13 | float *params_addr = (float *)aligned_malloc(32, nFileLen);
14 | int n = fread(params_addr, 1, nFileLen, fp);
15 | fclose(fp);
16 |
17 | return params_addr;
18 | }
19 |
20 | int val_align(int val, int align)
21 | {
22 | float tmp = ceil((float)val / (float)align) * (float)align;
23 | return (int)tmp;
24 | }
25 |
26 | void disp_params(float *din, int size)
27 | {
28 | int i;
29 | for (i = 0; i < size; i++) {
30 | printf("%f ", din[i]);
31 | }
32 | printf("\n");
33 | }
34 | void SaveDataFile(const char *filename, void *data, uint32_t len)
35 | {
36 | FILE *fp;
37 | fp = fopen(filename, "wb+");
38 | fwrite(data, 1, len, fp);
39 | fclose(fp);
40 | }
41 |
42 | void basic_norm(Tensor *&din, float norm)
43 | {
44 |
45 | int Tmax = din->size[2];
46 |
47 | int i, j;
48 | for (i = 0; i < Tmax; i++) {
49 | float sum = 0;
50 | for (j = 0; j < 512; j++) {
51 | int ii = i * 512 + j;
52 | sum += din->buff[ii] * din->buff[ii];
53 | }
54 | float mean = sqrt(sum / 512 + norm);
55 | for (j = 0; j < 512; j++) {
56 | int ii = i * 512 + j;
57 | din->buff[ii] = din->buff[ii] / mean;
58 | }
59 | }
60 | }
61 |
62 | void findmax(float *din, int len, float &max_val, int &max_idx)
63 | {
64 | int i;
65 | max_val = -INFINITY;
66 | max_idx = -1;
67 | for (i = 0; i < len; i++) {
68 | if (din[i] > max_val) {
69 | max_val = din[i];
70 | max_idx = i;
71 | }
72 | }
73 | }
74 |
75 | string pathAppend(const string &p1, const string &p2)
76 | {
77 |
78 | char sep = '/';
79 | string tmp = p1;
80 |
81 | #ifdef _WIN32
82 | sep = '\\';
83 | #endif
84 |
85 | if (p1[p1.length()-1] != sep) { // Need to add a
86 | tmp += sep; // path separator
87 | return (tmp + p2);
88 | } else
89 | return (p1 + p2);
90 | }
91 |
92 | void relu(Tensor *din)
93 | {
94 | int i;
95 | for (i = 0; i < din->buff_size; i++) {
96 | float val = din->buff[i];
97 | din->buff[i] = val < 0 ? 0 : val;
98 | }
99 | }
100 |
101 | void swish(Tensor *din)
102 | {
103 | int i;
104 | for (i = 0; i < din->buff_size; i++) {
105 | float val = din->buff[i];
106 | din->buff[i] = val / (1 + exp(-val));
107 | }
108 | }
109 |
110 | void sigmoid(Tensor *din)
111 | {
112 | int i;
113 | for (i = 0; i < din->buff_size; i++) {
114 | float val = din->buff[i];
115 | din->buff[i] = 1 / (1 + exp(-val));
116 | }
117 | }
118 |
119 | void doubleswish(Tensor *din)
120 | {
121 | int i;
122 | for (i = 0; i < din->buff_size; i++) {
123 | float val = din->buff[i];
124 | din->buff[i] = val / (1 + exp(-val + 1));
125 | }
126 | }
127 |
128 | void softmax(float *din, int mask, int len)
129 | {
130 | float *tmp = (float *)malloc(mask * sizeof(float));
131 | int i;
132 | float sum = 0;
133 | float max = -INFINITY;
134 |
135 | for (i = 0; i < mask; i++) {
136 | max = max < din[i] ? din[i] : max;
137 | }
138 |
139 | for (i = 0; i < mask; i++) {
140 | tmp[i] = exp(din[i] - max);
141 | sum += tmp[i];
142 | }
143 | for (i = 0; i < mask; i++) {
144 | din[i] = tmp[i] / sum;
145 | }
146 | free(tmp);
147 | for (i = mask; i < len; i++) {
148 | din[i] = 0;
149 | }
150 | }
151 |
152 | void log_softmax(float *din, int len)
153 | {
154 | float *tmp = (float *)malloc(len * sizeof(float));
155 | int i;
156 | float sum = 0;
157 | for (i = 0; i < len; i++) {
158 | tmp[i] = exp(din[i]);
159 | sum += tmp[i];
160 | }
161 | for (i = 0; i < len; i++) {
162 | din[i] = log(tmp[i] / sum);
163 | }
164 | free(tmp);
165 | }
166 |
167 | void glu(Tensor *din, Tensor *dout)
168 | {
169 | int mm = din->buff_size / 1024;
170 | int i, j;
171 | for (i = 0; i < mm; i++) {
172 | for (j = 0; j < 512; j++) {
173 | int in_off = i * 1024 + j;
174 | int out_off = i * 512 + j;
175 | float a = din->buff[in_off];
176 | float b = din->buff[in_off + 512];
177 | dout->buff[out_off] = a / (1 + exp(-b));
178 | }
179 | }
180 | }
181 |
--------------------------------------------------------------------------------
/cpp_onnx/src/util.h:
--------------------------------------------------------------------------------
1 |
2 |
3 | #ifndef UTIL_H
4 | #define UTIL_H
5 |
6 | using namespace std;
7 |
8 | extern float *loadparams(const char *filename);
9 |
10 | extern void SaveDataFile(const char *filename, void *data, uint32_t len);
11 | extern void relu(Tensor *din);
12 | extern void swish(Tensor *din);
13 | extern void sigmoid(Tensor *din);
14 | extern void doubleswish(Tensor *din);
15 |
16 | extern void softmax(float *din, int mask, int len);
17 |
18 | extern void log_softmax(float *din, int len);
19 | extern int val_align(int val, int align);
20 | extern void disp_params(float *din, int size);
21 |
22 | extern void basic_norm(Tensor *&din, float norm);
23 |
24 | extern void findmax(float *din, int len, float &max_val, int &max_idx);
25 |
26 | extern void glu(Tensor *din, Tensor *dout);
27 |
28 | string pathAppend(const string &p1, const string &p2);
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/cpp_onnx/tester/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | if(WIN32)
4 | if(CMAKE_CL_64)
5 | link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x64 )
6 | else()
7 | link_directories( ${CMAKE_SOURCE_DIR}/win/lib/x86 )
8 | endif()
9 | endif()
10 |
11 | set(EXTRA_LIBS rapidasr)
12 |
13 |
14 | include_directories(${CMAKE_SOURCE_DIR}/include)
15 | set(EXECNAME "tester")
16 |
17 | add_executable(${EXECNAME} "tester.cpp")
18 | target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
19 |
20 |
21 |
--------------------------------------------------------------------------------
/cpp_onnx/tester/tester.cpp:
--------------------------------------------------------------------------------
1 |
2 | #ifndef _WIN32
3 | #include
4 | #else
5 | #include
6 | #endif
7 |
8 | #include "librapidasrapi.h"
9 |
10 | #include
11 |
12 | using namespace std;
13 |
14 | int main(int argc, char *argv[])
15 | {
16 |
17 | if (argc < 2)
18 | {
19 | printf("Usage: %s /path/to/model_dir /path/to/wav/file", argv[0]);
20 | exit(-1);
21 | }
22 | struct timeval start, end;
23 | gettimeofday(&start, NULL);
24 | int nThreadNum = 4;
25 | RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
26 |
27 | if (!AsrHanlde)
28 | {
29 | printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
30 | exit(-1);
31 | }
32 |
33 |
34 |
35 | gettimeofday(&end, NULL);
36 | long seconds = (end.tv_sec - start.tv_sec);
37 | long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
38 | printf("Model initialization takes %lfs.\n", (double)modle_init_micros / 1000000);
39 |
40 |
41 |
42 | gettimeofday(&start, NULL);
43 |
44 | RPASR_RESULT Result=RapidAsrRecogFile(AsrHanlde, argv[2], RASR_NONE, NULL);
45 | gettimeofday(&end, NULL);
46 | float snippet_time = 0.0f;
47 | if (Result)
48 | {
49 | string msg = RapidAsrGetResult(Result, 0);
50 | setbuf(stdout, NULL);
51 | cout << "Result: \"";
52 | cout << msg << endl;
53 | cout << "\"." << endl;
54 | snippet_time = RapidAsrGetRetSnippetTime(Result);
55 | RapidAsrFreeResult(Result);
56 | }
57 | else
58 | {
59 | cout <<("no return data!");
60 | }
61 |
62 | printf("Audio length %lfs.\n", (double)snippet_time);
63 |
64 | seconds = (end.tv_sec - start.tv_sec);
65 | long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
66 | printf("Model inference takes %lfs.\n", (double)taking_micros / 1000000);
67 |
68 | printf("Model inference RTF: %04lf.\n", (double)taking_micros/ (snippet_time*1000000));
69 |
70 | RapidAsrUninit(AsrHanlde);
71 |
72 | return 0;
73 | }
74 |
75 |
76 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | if(WIN32)
4 | add_definitions(-DWEBRTC_WIN)
5 | else()
6 | add_definitions(-DWEBRTC_POSIX)
7 | endif()
8 |
9 |
10 | include_directories("..")
11 |
12 | file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc")
13 |
14 | message("${files}")
15 |
16 | add_library(webrtcvad ${files})
17 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/complex_bit_reverse.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 |
13 | /* Tables for data buffer indexes that are bit reversed and thus need to be
14 | * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
15 | * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
16 | * operation. Same for index_8.
17 | */
18 |
19 | /* Indexes for the case of stages == 7. */
20 | static const int16_t index_7[112] = {
21 | 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
22 | 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
23 | 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
24 | 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
25 | 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
26 | 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
27 | 103, 115, 111, 123
28 | };
29 |
30 | /* Indexes for the case of stages == 8. */
31 | static const int16_t index_8[240] = {
32 | 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
33 | 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
34 | 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
35 | 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
36 | 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
37 | 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
38 | 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
39 | 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
40 | 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
41 | 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
42 | 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
43 | 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
44 | 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
45 | 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
46 | 203, 211, 207, 243, 215, 235, 223, 251, 239, 247
47 | };
48 |
49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
50 | /* For any specific value of stages, we know exactly the indexes that are
51 | * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
52 | * stages are 7 and 8, so we use tables to save unnecessary iterations and
53 | * calculations for these two cases.
54 | */
55 | if (stages == 7 || stages == 8) {
56 | int m = 0;
57 | int length = 112;
58 | const int16_t* index = index_7;
59 |
60 | if (stages == 8) {
61 | length = 240;
62 | index = index_8;
63 | }
64 |
65 | /* Decimation in time. Swap the elements with bit-reversed indexes. */
66 | for (m = 0; m < length; m += 2) {
67 | /* We declare a int32_t* type pointer, to load both the 16-bit real
68 | * and imaginary elements from complex_data in one instruction, reducing
69 | * complexity.
70 | */
71 | int32_t* complex_data_ptr = (int32_t*)complex_data;
72 | int32_t temp = 0;
73 |
74 | temp = complex_data_ptr[index[m]]; /* Real and imaginary */
75 | complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
76 | complex_data_ptr[index[m + 1]] = temp;
77 | }
78 | }
79 | else {
80 | int m = 0, mr = 0, l = 0;
81 | int n = 1 << stages;
82 | int nn = n - 1;
83 |
84 | /* Decimation in time - re-order data */
85 | for (m = 1; m <= nn; ++m) {
86 | int32_t* complex_data_ptr = (int32_t*)complex_data;
87 | int32_t temp = 0;
88 |
89 | /* Find out indexes that are bit-reversed. */
90 | l = n;
91 | do {
92 | l >>= 1;
93 | } while (l > nn - mr);
94 | mr = (mr & (l - 1)) + l;
95 |
96 | if (mr <= m) {
97 | continue;
98 | }
99 |
100 | /* Swap the elements with bit-reversed indexes.
101 | * This is similar to the loop in the stages == 7 or 8 cases.
102 | */
103 | temp = complex_data_ptr[m]; /* Real and imaginary */
104 | complex_data_ptr[m] = complex_data_ptr[mr];
105 | complex_data_ptr[mr] = temp;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/cross_correlation.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 |
13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
15 | const int16_t* seq1,
16 | const int16_t* seq2,
17 | size_t dim_seq,
18 | size_t dim_cross_correlation,
19 | int right_shifts,
20 | int step_seq2) {
21 | size_t i = 0, j = 0;
22 |
23 | for (i = 0; i < dim_cross_correlation; i++) {
24 | int32_t corr = 0;
25 | for (j = 0; j < dim_seq; j++)
26 | corr += (seq1[j] * seq2[j]) >> right_shifts;
27 | seq2 += step_seq2;
28 | *cross_correlation++ = corr;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/division_operations.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains implementations of the divisions
14 | * WebRtcSpl_DivU32U16()
15 | * WebRtcSpl_DivW32W16()
16 | * WebRtcSpl_DivW32W16ResW16()
17 | * WebRtcSpl_DivResultInQ31()
18 | * WebRtcSpl_DivW32HiLow()
19 | *
20 | * The description header can be found in signal_processing_library.h
21 | *
22 | */
23 |
24 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
25 | #include "webrtc/rtc_base/sanitizer.h"
26 |
27 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
28 | {
29 | // Guard against division with 0
30 | if (den != 0)
31 | {
32 | return (uint32_t)(num / den);
33 | } else
34 | {
35 | return (uint32_t)0xFFFFFFFF;
36 | }
37 | }
38 |
39 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
40 | {
41 | // Guard against division with 0
42 | if (den != 0)
43 | {
44 | return (int32_t)(num / den);
45 | } else
46 | {
47 | return (int32_t)0x7FFFFFFF;
48 | }
49 | }
50 |
51 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
52 | {
53 | // Guard against division with 0
54 | if (den != 0)
55 | {
56 | return (int16_t)(num / den);
57 | } else
58 | {
59 | return (int16_t)0x7FFF;
60 | }
61 | }
62 |
63 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
64 | {
65 | int32_t L_num = num;
66 | int32_t L_den = den;
67 | int32_t div = 0;
68 | int k = 31;
69 | int change_sign = 0;
70 |
71 | if (num == 0)
72 | return 0;
73 |
74 | if (num < 0)
75 | {
76 | change_sign++;
77 | L_num = -num;
78 | }
79 | if (den < 0)
80 | {
81 | change_sign++;
82 | L_den = -den;
83 | }
84 | while (k--)
85 | {
86 | div <<= 1;
87 | L_num <<= 1;
88 | if (L_num >= L_den)
89 | {
90 | L_num -= L_den;
91 | div++;
92 | }
93 | }
94 | if (change_sign == 1)
95 | {
96 | div = -div;
97 | }
98 | return div;
99 | }
100 |
101 | int32_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
102 | WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
103 | {
104 | int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
105 | int32_t tmpW32;
106 |
107 | approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
108 | // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
109 |
110 | // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
111 | tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
112 | // tmpW32 = den * approx
113 |
114 | tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
115 | // UBSan: 2147483647 - -2 cannot be represented in type 'int'
116 |
117 | // Store tmpW32 in hi and low format
118 | tmp_hi = (int16_t)(tmpW32 >> 16);
119 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
120 |
121 | // tmpW32 = 1/den in Q29
122 | tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
123 |
124 | // 1/den in hi and low format
125 | tmp_hi = (int16_t)(tmpW32 >> 16);
126 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
127 |
128 | // Store num in hi and low format
129 | num_hi = (int16_t)(num >> 16);
130 | num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
131 |
132 | // num * (1/den) by 32 bit multiplication (result in Q28)
133 |
134 | tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
135 | (num_low * tmp_hi >> 15);
136 |
137 | // Put result in Q31 (convert from Q28)
138 | tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
139 |
140 | return tmpW32;
141 | }
142 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
12 |
13 | #include "webrtc/rtc_base/numerics/safe_conversions.h"
14 |
15 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
16 | const int16_t* vector2,
17 | size_t length,
18 | int scaling) {
19 | int64_t sum = 0;
20 | size_t i = 0;
21 |
22 | /* Unroll the loop to improve performance. */
23 | for (i = 0; i + 3 < length; i += 4) {
24 | sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
25 | sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
26 | sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
27 | sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
28 | }
29 | for (; i < length; i++) {
30 | sum += (vector1[i] * vector2[i]) >> scaling;
31 | }
32 |
33 | return rtc::saturated_cast(sum);
34 | }
35 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/dot_product_with_scale.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
13 |
14 | #include
15 | #include
16 |
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 |
21 | // Calculates the dot product between two (int16_t) vectors.
22 | //
23 | // Input:
24 | // - vector1 : Vector 1
25 | // - vector2 : Vector 2
26 | // - vector_length : Number of samples used in the dot product
27 | // - scaling : The number of right bit shifts to apply on each term
28 | // during calculation to avoid overflow, i.e., the
29 | // output will be in Q(-|scaling|)
30 | //
31 | // Return value : The dot product in Q(-scaling)
32 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
33 | const int16_t* vector2,
34 | size_t length,
35 | int scaling);
36 |
37 | #ifdef __cplusplus
38 | }
39 | #endif // __cplusplus
40 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
41 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/downsample_fast.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 |
13 | #include "webrtc/rtc_base/checks.h"
14 | #include "webrtc/rtc_base/sanitizer.h"
15 |
16 | // TODO(Bjornv): Change the function parameter order to WebRTC code style.
17 | // C version of WebRtcSpl_DownsampleFast() for generic platforms.
18 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
19 | size_t data_in_length,
20 | int16_t* data_out,
21 | size_t data_out_length,
22 | const int16_t* __restrict coefficients,
23 | size_t coefficients_length,
24 | int factor,
25 | size_t delay) {
26 | int16_t* const original_data_out = data_out;
27 | size_t i = 0;
28 | size_t j = 0;
29 | int32_t out_s32 = 0;
30 | size_t endpos = delay + factor * (data_out_length - 1) + 1;
31 |
32 | // Return error if any of the running conditions doesn't meet.
33 | if (data_out_length == 0 || coefficients_length == 0
34 | || data_in_length < endpos) {
35 | return -1;
36 | }
37 |
38 | rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
39 | coefficients_length);
40 |
41 | for (i = delay; i < endpos; i += factor) {
42 | out_s32 = 2048; // Round value, 0.5 in Q12.
43 |
44 | for (j = 0; j < coefficients_length; j++) {
45 | // Negative overflow is permitted here, because this is
46 | // auto-regressive filters, and the state for each batch run is
47 | // stored in the "negative" positions of the output vector.
48 | rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
49 | sizeof(data_in[0]), 1);
50 | // out_s32 is in Q12 domain.
51 | out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
52 | }
53 |
54 | out_s32 >>= 12; // Q0.
55 |
56 | // Saturate and store the output.
57 | *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
58 | }
59 |
60 | RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
61 | rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
62 | data_out_length);
63 |
64 | return 0;
65 | }
66 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/energy.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains the function WebRtcSpl_Energy().
14 | * The description header can be found in signal_processing_library.h
15 | *
16 | */
17 |
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 |
20 | int32_t WebRtcSpl_Energy(int16_t* vector,
21 | size_t vector_length,
22 | int* scale_factor)
23 | {
24 | int32_t en = 0;
25 | size_t i;
26 | int scaling =
27 | WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
28 | size_t looptimes = vector_length;
29 | int16_t *vectorptr = vector;
30 |
31 | for (i = 0; i < looptimes; i++)
32 | {
33 | en += (*vectorptr * *vectorptr) >> scaling;
34 | vectorptr++;
35 | }
36 | *scale_factor = scaling;
37 |
38 | return en;
39 | }
40 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/get_scaling_square.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains the function WebRtcSpl_GetScalingSquare().
14 | * The description header can be found in signal_processing_library.h
15 | *
16 | */
17 |
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 |
20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
21 | size_t in_vector_length,
22 | size_t times)
23 | {
24 | int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
25 | size_t i;
26 | int16_t smax = -1;
27 | int16_t sabs;
28 | int16_t *sptr = in_vector;
29 | int16_t t;
30 | size_t looptimes = in_vector_length;
31 |
32 | for (i = looptimes; i > 0; i--)
33 | {
34 | sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
35 | smax = (sabs > smax ? sabs : smax);
36 | }
37 | t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
38 |
39 | if (smax == 0)
40 | {
41 | return 0; // Since norm(0) returns 0
42 | } else
43 | {
44 | return (t > nbits) ? 0 : nbits - t;
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/real_fft.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
13 |
14 | #include
15 |
16 | // For ComplexFFT(), the maximum fft order is 10;
17 | // WebRTC APM uses orders of only 7 and 8.
18 | enum { kMaxFFTOrder = 10 };
19 |
20 | struct RealFFT;
21 |
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 |
26 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
27 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
28 |
29 | // Compute an FFT for a real-valued signal of length of 2^order,
30 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
31 | // specification structure, which must be initialized prior to calling the FFT
32 | // function with WebRtcSpl_CreateRealFFT().
33 | // The relationship between the input and output sequences can
34 | // be expressed in terms of the DFT, i.e.:
35 | // x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
36 | // n=0,1,2,...N-1
37 | // N=2^order.
38 | // The conjugate-symmetric output sequence is represented using a CCS vector,
39 | // which is of length N+2, and is organized as follows:
40 | // Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1
41 | // Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0
42 | // where R[n] and I[n], respectively, denote the real and imaginary components
43 | // for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length.
44 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
45 | // the foldover frequency.
46 | //
47 | // Input Arguments:
48 | // self - pointer to preallocated and initialized FFT specification structure.
49 | // real_data_in - the input signal. For an ARM Neon platform, it must be
50 | // aligned on a 32-byte boundary.
51 | //
52 | // Output Arguments:
53 | // complex_data_out - the output complex signal with (2^order + 2) 16-bit
54 | // elements. For an ARM Neon platform, it must be different
55 | // from real_data_in, and aligned on a 32-byte boundary.
56 | //
57 | // Return Value:
58 | // 0 - FFT calculation is successful.
59 | // -1 - Error with bad arguments (null pointers).
60 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
61 | const int16_t* real_data_in,
62 | int16_t* complex_data_out);
63 |
64 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of
65 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
66 | // the specification structure, which must be initialized prior to calling the
67 | // FFT function with WebRtcSpl_CreateRealFFT().
68 | // For a transform of length M, the input sequence is represented using a packed
69 | // CCS vector of length M+2, which is explained in the comments for
70 | // WebRtcSpl_RealForwardFFTC above.
71 | //
72 | // Input Arguments:
73 | // self - pointer to preallocated and initialized FFT specification structure.
74 | // complex_data_in - the input complex signal with (2^order + 2) 16-bit
75 | // elements. For an ARM Neon platform, it must be aligned on
76 | // a 32-byte boundary.
77 | //
78 | // Output Arguments:
79 | // real_data_out - the output real signal. For an ARM Neon platform, it must
80 | // be different to complex_data_in, and aligned on a 32-byte
81 | // boundary.
82 | //
83 | // Return Value:
84 | // 0 or a positive number - a value that the elements in the |real_data_out|
85 | // should be shifted left with in order to get
86 | // correct physical values.
87 | // -1 - Error with bad arguments (null pointers).
88 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
89 | const int16_t* complex_data_in,
90 | int16_t* real_data_out);
91 |
92 | #ifdef __cplusplus
93 | }
94 | #endif
95 |
96 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
97 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/include/spl_inl.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // This header file includes the inline functions in
12 | // the fix point signal processing library.
13 |
14 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
15 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
16 |
17 | #include "webrtc/rtc_base/compile_assert_c.h"
18 |
19 | extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
20 |
21 | // Don't call this directly except in tests!
22 | static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
23 | // Normalize n by rounding up to the nearest number that is a sequence of 0
24 | // bits followed by a sequence of 1 bits. This number has the same number of
25 | // leading zeros as the original n. There are exactly 33 such values.
26 | n |= n >> 1;
27 | n |= n >> 2;
28 | n |= n >> 4;
29 | n |= n >> 8;
30 | n |= n >> 16;
31 |
32 | // Multiply the modified n with a constant selected (by exhaustive search)
33 | // such that each of the 33 possible values of n give a product whose 6 most
34 | // significant bits are unique. Then look up the answer in the table.
35 | return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
36 | }
37 |
38 | // Don't call this directly except in tests!
39 | static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
40 | const int leading_zeros = n >> 32 == 0 ? 32 : 0;
41 | return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
42 | (uint32_t)(n >> (32 - leading_zeros)));
43 | }
44 |
45 | // Returns the number of leading zero bits in the argument.
46 | static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
47 | #ifdef __GNUC__
48 | RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
49 | return n == 0 ? 32 : __builtin_clz(n);
50 | #else
51 | return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
52 | #endif
53 | }
54 |
55 | // Returns the number of leading zero bits in the argument.
56 | static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
57 | #ifdef __GNUC__
58 | RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT
59 | return n == 0 ? 64 : __builtin_clzll(n);
60 | #else
61 | return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
62 | #endif
63 | }
64 |
65 | #ifdef WEBRTC_ARCH_ARM_V7
66 | #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
67 | #else
68 |
69 | #if defined(MIPS32_LE)
70 | #include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
71 | #endif
72 |
73 | #if !defined(MIPS_DSP_R1_LE)
74 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
75 | int16_t out16 = (int16_t)value32;
76 |
77 | if (value32 > 32767)
78 | out16 = 32767;
79 | else if (value32 < -32768)
80 | out16 = -32768;
81 |
82 | return out16;
83 | }
84 |
85 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
86 | // Do the addition in unsigned numbers, since signed overflow is undefined
87 | // behavior.
88 | const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
89 |
90 | // a + b can't overflow if a and b have different signs. If they have the
91 | // same sign, a + b also has the same sign iff it didn't overflow.
92 | if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
93 | // The direction of the overflow is obvious from the sign of a + b.
94 | return sum < 0 ? INT32_MAX : INT32_MIN;
95 | }
96 | return sum;
97 | }
98 |
99 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
100 | // Do the subtraction in unsigned numbers, since signed overflow is undefined
101 | // behavior.
102 | const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
103 |
104 | // a - b can't overflow if a and b have the same sign. If they have different
105 | // signs, a - b has the same sign as a iff it didn't overflow.
106 | if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
107 | // The direction of the overflow is obvious from the sign of a - b.
108 | return diff < 0 ? INT32_MAX : INT32_MIN;
109 | }
110 | return diff;
111 | }
112 |
113 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
114 | return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
115 | }
116 |
117 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
118 | return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
119 | }
120 | #endif // #if !defined(MIPS_DSP_R1_LE)
121 |
122 | #if !defined(MIPS32_LE)
123 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
124 | return 32 - WebRtcSpl_CountLeadingZeros32(n);
125 | }
126 |
127 | // Return the number of steps a can be left-shifted without overflow,
128 | // or 0 if a == 0.
129 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
130 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
131 | }
132 |
133 | // Return the number of steps a can be left-shifted without overflow,
134 | // or 0 if a == 0.
135 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
136 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
137 | }
138 |
139 | // Return the number of steps a can be left-shifted without overflow,
140 | // or 0 if a == 0.
141 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
142 | const int32_t a32 = a;
143 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
144 | }
145 |
146 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
147 | return (a * b + c);
148 | }
149 | #endif // #if !defined(MIPS32_LE)
150 |
151 | #endif // WEBRTC_ARCH_ARM_V7
152 |
153 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
154 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/min_max_operations.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This file contains the implementation of functions
13 | * WebRtcSpl_MaxAbsValueW16C()
14 | * WebRtcSpl_MaxAbsValueW32C()
15 | * WebRtcSpl_MaxValueW16C()
16 | * WebRtcSpl_MaxValueW32C()
17 | * WebRtcSpl_MinValueW16C()
18 | * WebRtcSpl_MinValueW32C()
19 | * WebRtcSpl_MaxAbsIndexW16()
20 | * WebRtcSpl_MaxIndexW16()
21 | * WebRtcSpl_MaxIndexW32()
22 | * WebRtcSpl_MinIndexW16()
23 | * WebRtcSpl_MinIndexW32()
24 | *
25 | */
26 |
27 | #include
28 |
29 | #include "webrtc/rtc_base/checks.h"
30 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
31 |
32 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
33 | // WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
34 | // TODO(kma): Move the next six functions into min_max_operations_c.c.
35 |
36 | // Maximum absolute value of word16 vector. C version for generic platforms.
37 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
38 | size_t i = 0;
39 | int absolute = 0, maximum = 0;
40 |
41 | RTC_DCHECK_GT(length, 0);
42 |
43 | for (i = 0; i < length; i++) {
44 | absolute = abs((int)vector[i]);
45 |
46 | if (absolute > maximum) {
47 | maximum = absolute;
48 | }
49 | }
50 |
51 | // Guard the case for abs(-32768).
52 | if (maximum > WEBRTC_SPL_WORD16_MAX) {
53 | maximum = WEBRTC_SPL_WORD16_MAX;
54 | }
55 |
56 | return (int16_t)maximum;
57 | }
58 |
59 | // Maximum absolute value of word32 vector. C version for generic platforms.
60 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
61 | // Use uint32_t for the local variables, to accommodate the return value
62 | // of abs(0x80000000), which is 0x80000000.
63 |
64 | uint32_t absolute = 0, maximum = 0;
65 | size_t i = 0;
66 |
67 | RTC_DCHECK_GT(length, 0);
68 |
69 | for (i = 0; i < length; i++) {
70 | absolute = abs((int)vector[i]);
71 | if (absolute > maximum) {
72 | maximum = absolute;
73 | }
74 | }
75 |
76 | maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
77 |
78 | return (int32_t)maximum;
79 | }
80 |
81 | // Maximum value of word16 vector. C version for generic platforms.
82 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
83 | int16_t maximum = WEBRTC_SPL_WORD16_MIN;
84 | size_t i = 0;
85 |
86 | RTC_DCHECK_GT(length, 0);
87 |
88 | for (i = 0; i < length; i++) {
89 | if (vector[i] > maximum)
90 | maximum = vector[i];
91 | }
92 | return maximum;
93 | }
94 |
95 | // Maximum value of word32 vector. C version for generic platforms.
96 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
97 | int32_t maximum = WEBRTC_SPL_WORD32_MIN;
98 | size_t i = 0;
99 |
100 | RTC_DCHECK_GT(length, 0);
101 |
102 | for (i = 0; i < length; i++) {
103 | if (vector[i] > maximum)
104 | maximum = vector[i];
105 | }
106 | return maximum;
107 | }
108 |
109 | // Minimum value of word16 vector. C version for generic platforms.
110 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
111 | int16_t minimum = WEBRTC_SPL_WORD16_MAX;
112 | size_t i = 0;
113 |
114 | RTC_DCHECK_GT(length, 0);
115 |
116 | for (i = 0; i < length; i++) {
117 | if (vector[i] < minimum)
118 | minimum = vector[i];
119 | }
120 | return minimum;
121 | }
122 |
123 | // Minimum value of word32 vector. C version for generic platforms.
124 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
125 | int32_t minimum = WEBRTC_SPL_WORD32_MAX;
126 | size_t i = 0;
127 |
128 | RTC_DCHECK_GT(length, 0);
129 |
130 | for (i = 0; i < length; i++) {
131 | if (vector[i] < minimum)
132 | minimum = vector[i];
133 | }
134 | return minimum;
135 | }
136 |
137 | // Index of maximum absolute value in a word16 vector.
138 | size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
139 | // Use type int for local variables, to accomodate the value of abs(-32768).
140 |
141 | size_t i = 0, index = 0;
142 | int absolute = 0, maximum = 0;
143 |
144 | RTC_DCHECK_GT(length, 0);
145 |
146 | for (i = 0; i < length; i++) {
147 | absolute = abs((int)vector[i]);
148 |
149 | if (absolute > maximum) {
150 | maximum = absolute;
151 | index = i;
152 | }
153 | }
154 |
155 | return index;
156 | }
157 |
158 | // Index of maximum value in a word16 vector.
159 | size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
160 | size_t i = 0, index = 0;
161 | int16_t maximum = WEBRTC_SPL_WORD16_MIN;
162 |
163 | RTC_DCHECK_GT(length, 0);
164 |
165 | for (i = 0; i < length; i++) {
166 | if (vector[i] > maximum) {
167 | maximum = vector[i];
168 | index = i;
169 | }
170 | }
171 |
172 | return index;
173 | }
174 |
175 | // Index of maximum value in a word32 vector.
176 | size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
177 | size_t i = 0, index = 0;
178 | int32_t maximum = WEBRTC_SPL_WORD32_MIN;
179 |
180 | RTC_DCHECK_GT(length, 0);
181 |
182 | for (i = 0; i < length; i++) {
183 | if (vector[i] > maximum) {
184 | maximum = vector[i];
185 | index = i;
186 | }
187 | }
188 |
189 | return index;
190 | }
191 |
192 | // Index of minimum value in a word16 vector.
193 | size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
194 | size_t i = 0, index = 0;
195 | int16_t minimum = WEBRTC_SPL_WORD16_MAX;
196 |
197 | RTC_DCHECK_GT(length, 0);
198 |
199 | for (i = 0; i < length; i++) {
200 | if (vector[i] < minimum) {
201 | minimum = vector[i];
202 | index = i;
203 | }
204 | }
205 |
206 | return index;
207 | }
208 |
209 | // Index of minimum value in a word32 vector.
210 | size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
211 | size_t i = 0, index = 0;
212 | int32_t minimum = WEBRTC_SPL_WORD32_MAX;
213 |
214 | RTC_DCHECK_GT(length, 0);
215 |
216 | for (i = 0; i < length; i++) {
217 | if (vector[i] < minimum) {
218 | minimum = vector[i];
219 | index = i;
220 | }
221 | }
222 |
223 | return index;
224 | }
225 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_48khz.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains resampling functions between 48 kHz and nb/wb.
14 | * The description header can be found in signal_processing_library.h
15 | *
16 | */
17 |
18 | #include
19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
21 |
22 | ////////////////////////////
23 | ///// 48 kHz -> 16 kHz /////
24 | ////////////////////////////
25 |
26 | // 48 -> 16 resampler
27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
28 | WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
29 | {
30 | ///// 48 --> 48(LP) /////
31 | // int16_t in[480]
32 | // int32_t out[480]
33 | /////
34 | WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
35 |
36 | ///// 48 --> 32 /////
37 | // int32_t in[480]
38 | // int32_t out[320]
39 | /////
40 | // copy state to and from input array
41 | memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
42 | memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
43 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
44 |
45 | ///// 32 --> 16 /////
46 | // int32_t in[320]
47 | // int16_t out[160]
48 | /////
49 | WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
50 | }
51 |
52 | // initialize state of 48 -> 16 resampler
53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
54 | {
55 | memset(state->S_48_48, 0, 16 * sizeof(int32_t));
56 | memset(state->S_48_32, 0, 8 * sizeof(int32_t));
57 | memset(state->S_32_16, 0, 8 * sizeof(int32_t));
58 | }
59 |
60 | ////////////////////////////
61 | ///// 16 kHz -> 48 kHz /////
62 | ////////////////////////////
63 |
64 | // 16 -> 48 resampler
65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
66 | WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
67 | {
68 | ///// 16 --> 32 /////
69 | // int16_t in[160]
70 | // int32_t out[320]
71 | /////
72 | WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
73 |
74 | ///// 32 --> 24 /////
75 | // int32_t in[320]
76 | // int32_t out[240]
77 | // copy state to and from input array
78 | /////
79 | memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
80 | memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
81 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
82 |
83 | ///// 24 --> 48 /////
84 | // int32_t in[240]
85 | // int16_t out[480]
86 | /////
87 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
88 | }
89 |
90 | // initialize state of 16 -> 48 resampler
91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
92 | {
93 | memset(state->S_16_32, 0, 8 * sizeof(int32_t));
94 | memset(state->S_32_24, 0, 8 * sizeof(int32_t));
95 | memset(state->S_24_48, 0, 8 * sizeof(int32_t));
96 | }
97 |
98 | ////////////////////////////
99 | ///// 48 kHz -> 8 kHz /////
100 | ////////////////////////////
101 |
102 | // 48 -> 8 resampler
103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
104 | WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
105 | {
106 | ///// 48 --> 24 /////
107 | // int16_t in[480]
108 | // int32_t out[240]
109 | /////
110 | WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
111 |
112 | ///// 24 --> 24(LP) /////
113 | // int32_t in[240]
114 | // int32_t out[240]
115 | /////
116 | WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
117 |
118 | ///// 24 --> 16 /////
119 | // int32_t in[240]
120 | // int32_t out[160]
121 | /////
122 | // copy state to and from input array
123 | memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
124 | memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
125 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
126 |
127 | ///// 16 --> 8 /////
128 | // int32_t in[160]
129 | // int16_t out[80]
130 | /////
131 | WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
132 | }
133 |
134 | // initialize state of 48 -> 8 resampler
135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
136 | {
137 | memset(state->S_48_24, 0, 8 * sizeof(int32_t));
138 | memset(state->S_24_24, 0, 16 * sizeof(int32_t));
139 | memset(state->S_24_16, 0, 8 * sizeof(int32_t));
140 | memset(state->S_16_8, 0, 8 * sizeof(int32_t));
141 | }
142 |
143 | ////////////////////////////
144 | ///// 8 kHz -> 48 kHz /////
145 | ////////////////////////////
146 |
147 | // 8 -> 48 resampler
148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
149 | WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
150 | {
151 | ///// 8 --> 16 /////
152 | // int16_t in[80]
153 | // int32_t out[160]
154 | /////
155 | WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
156 |
157 | ///// 16 --> 12 /////
158 | // int32_t in[160]
159 | // int32_t out[120]
160 | /////
161 | // copy state to and from input array
162 | memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
163 | memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
164 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
165 |
166 | ///// 12 --> 24 /////
167 | // int32_t in[120]
168 | // int16_t out[240]
169 | /////
170 | WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
171 |
172 | ///// 24 --> 48 /////
173 | // int32_t in[240]
174 | // int16_t out[480]
175 | /////
176 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
177 | }
178 |
179 | // initialize state of 8 -> 48 resampler
180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
181 | {
182 | memset(state->S_8_16, 0, 8 * sizeof(int32_t));
183 | memset(state->S_16_12, 0, 8 * sizeof(int32_t));
184 | memset(state->S_12_24, 0, 8 * sizeof(int32_t));
185 | memset(state->S_24_48, 0, 8 * sizeof(int32_t));
186 | }
187 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/resample_by_2_internal.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This header file contains some internal resampling functions.
13 | *
14 | */
15 |
16 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
17 | #define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
18 |
19 | #include
20 |
21 | /*******************************************************************
22 | * resample_by_2_fast.c
23 | * Functions for internal use in the other resample functions
24 | ******************************************************************/
25 | void WebRtcSpl_DownBy2IntToShort(int32_t* in,
26 | int32_t len,
27 | int16_t* out,
28 | int32_t* state);
29 |
30 | void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
31 | int32_t len,
32 | int32_t* out,
33 | int32_t* state);
34 |
35 | void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
36 | int32_t len,
37 | int32_t* out,
38 | int32_t* state);
39 |
40 | void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
41 | int32_t len,
42 | int32_t* out,
43 | int32_t* state);
44 |
45 | void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
46 | int32_t len,
47 | int16_t* out,
48 | int32_t* state);
49 |
50 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
51 | int32_t len,
52 | int32_t* out,
53 | int32_t* state);
54 |
55 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
56 | int32_t len,
57 | int32_t* out,
58 | int32_t* state);
59 |
60 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
61 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_init.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /* The global function contained in this file initializes SPL function
12 | * pointers, currently only for ARM platforms.
13 | *
14 | * Some code came from common/rtcd.c in the WebM project.
15 | */
16 |
17 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
18 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
19 |
20 | /* Declare function pointers. */
21 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
22 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
23 | MaxValueW16 WebRtcSpl_MaxValueW16;
24 | MaxValueW32 WebRtcSpl_MaxValueW32;
25 | MinValueW16 WebRtcSpl_MinValueW16;
26 | MinValueW32 WebRtcSpl_MinValueW32;
27 | CrossCorrelation WebRtcSpl_CrossCorrelation;
28 | DownsampleFast WebRtcSpl_DownsampleFast;
29 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
30 |
31 | #if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
32 | /* Initialize function pointers to the generic C version. */
33 | static void InitPointersToC(void) {
34 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
35 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
36 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
37 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
38 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
39 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
40 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
41 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
42 | WebRtcSpl_ScaleAndAddVectorsWithRound =
43 | WebRtcSpl_ScaleAndAddVectorsWithRoundC;
44 | }
45 | #endif
46 |
47 | #if defined(WEBRTC_HAS_NEON)
48 | /* Initialize function pointers to the Neon version. */
49 | static void InitPointersToNeon(void) {
50 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
51 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
52 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
53 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
54 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
55 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
56 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
57 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
58 | WebRtcSpl_ScaleAndAddVectorsWithRound =
59 | WebRtcSpl_ScaleAndAddVectorsWithRoundC;
60 | }
61 | #endif
62 |
63 | #if defined(MIPS32_LE)
64 | /* Initialize function pointers to the MIPS version. */
65 | static void InitPointersToMIPS(void) {
66 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
67 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
68 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
69 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
70 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
71 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
72 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
73 | #if defined(MIPS_DSP_R1_LE)
74 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
75 | WebRtcSpl_ScaleAndAddVectorsWithRound =
76 | WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
77 | #else
78 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
79 | WebRtcSpl_ScaleAndAddVectorsWithRound =
80 | WebRtcSpl_ScaleAndAddVectorsWithRoundC;
81 | #endif
82 | }
83 | #endif
84 |
85 | static void InitFunctionPointers(void) {
86 | #if defined(WEBRTC_HAS_NEON)
87 | InitPointersToNeon();
88 | #elif defined(MIPS32_LE)
89 | InitPointersToMIPS();
90 | #else
91 | InitPointersToC();
92 | #endif /* WEBRTC_HAS_NEON */
93 | }
94 |
95 | #if defined(WEBRTC_POSIX)
96 | #include
97 |
98 | static void once(void (*func)(void)) {
99 | static pthread_once_t lock = PTHREAD_ONCE_INIT;
100 | pthread_once(&lock, func);
101 | }
102 |
103 | #elif defined(_WIN32)
104 | #include
105 |
106 | static void once(void (*func)(void)) {
107 | /* Didn't use InitializeCriticalSection() since there's no race-free context
108 | * in which to execute it.
109 | *
110 | * TODO(kma): Change to different implementation (e.g.
111 | * InterlockedCompareExchangePointer) to avoid issues similar to
112 | * http://code.google.com/p/webm/issues/detail?id=467.
113 | */
114 | static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
115 | static int done = 0;
116 |
117 | EnterCriticalSection(&lock);
118 | if (!done) {
119 | func();
120 | done = 1;
121 | }
122 | LeaveCriticalSection(&lock);
123 | }
124 |
125 | /* There's no fallback version as an #else block here to ensure thread safety.
126 | * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
127 | * system should pick it up.
128 | */
129 | #endif /* WEBRTC_POSIX */
130 |
131 | void WebRtcSpl_Init(void) {
132 | once(InitFunctionPointers);
133 | }
134 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_inl.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include
12 |
13 | #include "webrtc/common_audio/signal_processing/include/spl_inl.h"
14 |
15 | // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
16 | // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
17 | // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
18 | // n.
19 | const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
20 | 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
21 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24,
22 | 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9,
23 | -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12,
24 | };
25 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/spl_sqrt.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains the function WebRtcSpl_Sqrt().
14 | * The description header can be found in signal_processing_library.h
15 | *
16 | */
17 |
18 | #include "webrtc/rtc_base/checks.h"
19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20 |
21 | int32_t WebRtcSpl_SqrtLocal(int32_t in);
22 |
23 | int32_t WebRtcSpl_SqrtLocal(int32_t in)
24 | {
25 |
26 | int16_t x_half, t16;
27 | int32_t A, B, x2;
28 |
29 | /* The following block performs:
30 | y=in/2
31 | x=y-2^30
32 | x_half=x/2^31
33 | t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
34 | + 0.875*((x_half)^5)
35 | */
36 |
37 | B = in / 2;
38 |
39 | B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
40 | x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
41 | B = B + ((int32_t)0x40000000); // B = 1 + x/2
42 | B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
43 |
44 | x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
45 | A = -x2; // A = -(x/2)^2
46 | B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
47 |
48 | A >>= 16;
49 | A = A * A * 2; // A = (x/2)^4
50 | t16 = (int16_t)(A >> 16);
51 | B += -20480 * t16 * 2; // B = B - 0.625*A
52 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
53 |
54 | A = x_half * t16 * 2; // A = (x/2)^5
55 | t16 = (int16_t)(A >> 16);
56 | B += 28672 * t16 * 2; // B = B + 0.875*A
57 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
58 |
59 | t16 = (int16_t)(x2 >> 16);
60 | A = x_half * t16 * 2; // A = x/2^3
61 |
62 | B = B + (A >> 1); // B = B + 0.5*A
63 | // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
64 |
65 | B = B + ((int32_t)32768); // Round off bit
66 |
67 | return B;
68 | }
69 |
70 | int32_t WebRtcSpl_Sqrt(int32_t value)
71 | {
72 | /*
73 | Algorithm:
74 |
75 | Six term Taylor Series is used here to compute the square root of a number
76 | y^0.5 = (1+x)^0.5 where x = y-1
77 | = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
78 | 0.5 <= x < 1
79 |
80 | Example of how the algorithm works, with ut=sqrt(in), and
81 | with in=73632 and ut=271 (even shift value case):
82 |
83 | in=73632
84 | y= in/131072
85 | x=y-1
86 | t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
87 | ut=t*(1/sqrt(2))*512
88 |
89 | or:
90 |
91 | in=73632
92 | in2=73632*2^14
93 | y= in2/2^31
94 | x=y-1
95 | t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
96 | ut=t*(1/sqrt(2))
97 | ut2=ut*2^9
98 |
99 | which gives:
100 |
101 | in = 73632
102 | in2 = 1206386688
103 | y = 0.56176757812500
104 | x = -0.43823242187500
105 | t = 0.74973506527313
106 | ut = 0.53014274874797
107 | ut2 = 2.714330873589594e+002
108 |
109 | or:
110 |
111 | in=73632
112 | in2=73632*2^14
113 | y=in2/2
114 | x=y-2^30
115 | x_half=x/2^31
116 | t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
117 | + 0.875*((x_half)^5)
118 | ut=t*(1/sqrt(2))
119 | ut2=ut*2^9
120 |
121 | which gives:
122 |
123 | in = 73632
124 | in2 = 1206386688
125 | y = 603193344
126 | x = -470548480
127 | x_half = -0.21911621093750
128 | t = 0.74973506527313
129 | ut = 0.53014274874797
130 | ut2 = 2.714330873589594e+002
131 |
132 | */
133 |
134 | int16_t x_norm, nshift, t16, sh;
135 | int32_t A;
136 |
137 | int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
138 |
139 | A = value;
140 |
141 | // The convention in this function is to calculate sqrt(abs(A)). Negate the
142 | // input if it is negative.
143 | if (A < 0) {
144 | if (A == WEBRTC_SPL_WORD32_MIN) {
145 | // This number cannot be held in an int32_t after negating.
146 | // Map it to the maximum positive value.
147 | A = WEBRTC_SPL_WORD32_MAX;
148 | } else {
149 | A = -A;
150 | }
151 | } else if (A == 0) {
152 | return 0; // sqrt(0) = 0
153 | }
154 |
155 | sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
156 | A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
157 | if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
158 | {
159 | A = A + ((int32_t)32768); // Round off bit
160 | } else
161 | {
162 | A = WEBRTC_SPL_WORD32_MAX;
163 | }
164 |
165 | x_norm = (int16_t)(A >> 16); // x_norm = AH
166 |
167 | nshift = (sh / 2);
168 | RTC_DCHECK_GE(nshift, 0);
169 |
170 | A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
171 | A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
172 | A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
173 |
174 | if (2 * nshift == sh) {
175 | // Even shift value case
176 |
177 | t16 = (int16_t)(A >> 16); // t16 = AH
178 |
179 | A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
180 | A = A + ((int32_t)32768); // Round off
181 | A = A & ((int32_t)0x7fff0000); // Round off
182 |
183 | A >>= 15; // A = A>>16
184 |
185 | } else
186 | {
187 | A >>= 16; // A = A>>16
188 | }
189 |
190 | A = A & ((int32_t)0x0000ffff);
191 | A >>= nshift; // De-normalize the result.
192 |
193 | return A;
194 | }
195 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/signal_processing/vector_scaling_operations.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 |
12 | /*
13 | * This file contains implementations of the functions
14 | * WebRtcSpl_VectorBitShiftW16()
15 | * WebRtcSpl_VectorBitShiftW32()
16 | * WebRtcSpl_VectorBitShiftW32ToW16()
17 | * WebRtcSpl_ScaleVector()
18 | * WebRtcSpl_ScaleVectorWithSat()
19 | * WebRtcSpl_ScaleAndAddVectors()
20 | * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
21 | */
22 |
23 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
24 |
25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
26 | const int16_t *in, int16_t right_shifts)
27 | {
28 | size_t i;
29 |
30 | if (right_shifts > 0)
31 | {
32 | for (i = length; i > 0; i--)
33 | {
34 | (*res++) = ((*in++) >> right_shifts);
35 | }
36 | } else
37 | {
38 | for (i = length; i > 0; i--)
39 | {
40 | (*res++) = ((*in++) * (1 << (-right_shifts)));
41 | }
42 | }
43 | }
44 |
45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
46 | size_t vector_length,
47 | const int32_t *in_vector,
48 | int16_t right_shifts)
49 | {
50 | size_t i;
51 |
52 | if (right_shifts > 0)
53 | {
54 | for (i = vector_length; i > 0; i--)
55 | {
56 | (*out_vector++) = ((*in_vector++) >> right_shifts);
57 | }
58 | } else
59 | {
60 | for (i = vector_length; i > 0; i--)
61 | {
62 | (*out_vector++) = ((*in_vector++) << (-right_shifts));
63 | }
64 | }
65 | }
66 |
67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
68 | const int32_t* in, int right_shifts) {
69 | size_t i;
70 | int32_t tmp_w32;
71 |
72 | if (right_shifts >= 0) {
73 | for (i = length; i > 0; i--) {
74 | tmp_w32 = (*in++) >> right_shifts;
75 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
76 | }
77 | } else {
78 | int left_shifts = -right_shifts;
79 | for (i = length; i > 0; i--) {
80 | tmp_w32 = (*in++) << left_shifts;
81 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
82 | }
83 | }
84 | }
85 |
86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
87 | int16_t gain, size_t in_vector_length,
88 | int16_t right_shifts)
89 | {
90 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
91 | size_t i;
92 | const int16_t *inptr;
93 | int16_t *outptr;
94 |
95 | inptr = in_vector;
96 | outptr = out_vector;
97 |
98 | for (i = 0; i < in_vector_length; i++)
99 | {
100 | *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
101 | }
102 | }
103 |
104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
105 | int16_t gain, size_t in_vector_length,
106 | int16_t right_shifts)
107 | {
108 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
109 | size_t i;
110 | const int16_t *inptr;
111 | int16_t *outptr;
112 |
113 | inptr = in_vector;
114 | outptr = out_vector;
115 |
116 | for (i = 0; i < in_vector_length; i++) {
117 | *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
118 | }
119 | }
120 |
121 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
122 | const int16_t *in2, int16_t gain2, int shift2,
123 | int16_t *out, size_t vector_length)
124 | {
125 | // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
126 | size_t i;
127 | const int16_t *in1ptr;
128 | const int16_t *in2ptr;
129 | int16_t *outptr;
130 |
131 | in1ptr = in1;
132 | in2ptr = in2;
133 | outptr = out;
134 |
135 | for (i = 0; i < vector_length; i++)
136 | {
137 | *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
138 | (int16_t)((gain2 * *in2ptr++) >> shift2);
139 | }
140 | }
141 |
142 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
143 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
144 | int16_t in_vector1_scale,
145 | const int16_t* in_vector2,
146 | int16_t in_vector2_scale,
147 | int right_shifts,
148 | int16_t* out_vector,
149 | size_t length) {
150 | size_t i = 0;
151 | int round_value = (1 << right_shifts) >> 1;
152 |
153 | if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
154 | length == 0 || right_shifts < 0) {
155 | return -1;
156 | }
157 |
158 | for (i = 0; i < length; i++) {
159 | out_vector[i] = (int16_t)((
160 | in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
161 | round_value) >> right_shifts);
162 | }
163 |
164 | return 0;
165 | }
166 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
3 | * license.
4 | *
5 | * From: Wilco Dijkstra
6 | * Date: Fri, Jun 24, 2011 at 3:20 AM
7 | * Subject: Re: sqrt routine
8 | * To: Kevin Ma
9 | * Hi Kevin,
10 | * Thanks for asking. Those routines are public domain (originally posted to
11 | * comp.sys.arm a long time ago), so you can use them freely for any purpose.
12 | * Cheers,
13 | * Wilco
14 | *
15 | * ----- Original Message -----
16 | * From: "Kevin Ma"
17 | * To:
18 | * Sent: Thursday, June 23, 2011 11:44 PM
19 | * Subject: Fwd: sqrt routine
20 | * Hi Wilco,
21 | * I saw your sqrt routine from several web sites, including
22 | * http://www.finesse.demon.co.uk/steven/sqrt.html.
23 | * Just wonder if there's any copyright information with your Successive
24 | * approximation routines, or if I can freely use it for any purpose.
25 | * Thanks.
26 | * Kevin
27 | */
28 |
29 | // Minor modifications in code style for WebRTC, 2012.
30 |
31 | #include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
32 |
33 | /*
34 | * Algorithm:
35 | * Successive approximation of the equation (root + delta) ^ 2 = N
36 | * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
37 | * Use delta = 2^i for i = 15 .. 0.
38 | *
39 | * Output precision is 16 bits. Note for large input values (close to
40 | * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
41 | * contains the MSB information (a non-sign value). Do with caution
42 | * if you need to cast the output to int16_t type.
43 | *
44 | * If the input value is negative, it returns 0.
45 | */
46 |
47 | #define WEBRTC_SPL_SQRT_ITER(N) \
48 | try1 = root + (1 << (N)); \
49 | if (value >= try1 << (N)) \
50 | { \
51 | value -= try1 << (N); \
52 | root |= 2 << (N); \
53 | }
54 |
55 | int32_t WebRtcSpl_SqrtFloor(int32_t value)
56 | {
57 | int32_t root = 0, try1;
58 |
59 | WEBRTC_SPL_SQRT_ITER (15);
60 | WEBRTC_SPL_SQRT_ITER (14);
61 | WEBRTC_SPL_SQRT_ITER (13);
62 | WEBRTC_SPL_SQRT_ITER (12);
63 | WEBRTC_SPL_SQRT_ITER (11);
64 | WEBRTC_SPL_SQRT_ITER (10);
65 | WEBRTC_SPL_SQRT_ITER ( 9);
66 | WEBRTC_SPL_SQRT_ITER ( 8);
67 | WEBRTC_SPL_SQRT_ITER ( 7);
68 | WEBRTC_SPL_SQRT_ITER ( 6);
69 | WEBRTC_SPL_SQRT_ITER ( 5);
70 | WEBRTC_SPL_SQRT_ITER ( 4);
71 | WEBRTC_SPL_SQRT_ITER ( 3);
72 | WEBRTC_SPL_SQRT_ITER ( 2);
73 | WEBRTC_SPL_SQRT_ITER ( 1);
74 | WEBRTC_SPL_SQRT_ITER ( 0);
75 |
76 | return root >> 1;
77 | }
78 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include
12 |
13 | //
14 | // WebRtcSpl_SqrtFloor(...)
15 | //
16 | // Returns the square root of the input value |value|. The precision of this
17 | // function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
18 | // If |value| is a negative number then 0 is returned.
19 | //
20 | // Algorithm:
21 | //
22 | // An iterative 4 cylce/bit routine
23 | //
24 | // Input:
25 | // - value : Value to calculate sqrt of
26 | //
27 | // Return value : Result of the sqrt calculation
28 | //
29 | int32_t WebRtcSpl_SqrtFloor(int32_t value);
30 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/include/webrtc_vad.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This header file includes the VAD API calls. Specific function calls are
13 | * given below.
14 | */
15 |
16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18 |
19 | #include
20 | #include
21 |
22 | typedef struct WebRtcVadInst VadInst;
23 |
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 |
28 | // Creates an instance to the VAD structure.
29 | VadInst* WebRtcVad_Create(void);
30 |
31 | // Frees the dynamic memory of a specified VAD instance.
32 | //
33 | // - handle [i] : Pointer to VAD instance that should be freed.
34 | void WebRtcVad_Free(VadInst* handle);
35 |
36 | // Initializes a VAD instance.
37 | //
38 | // - handle [i/o] : Instance that should be initialized.
39 | //
40 | // returns : 0 - (OK),
41 | // -1 - (null pointer or Default mode could not be set).
42 | int WebRtcVad_Init(VadInst* handle);
43 |
44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
45 | // restrictive in reporting speech. Put in other words the probability of being
46 | // speech when the VAD returns 1 is increased with increasing mode. As a
47 | // consequence also the missed detection rate goes up.
48 | //
49 | // - handle [i/o] : VAD instance.
50 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3).
51 | //
52 | // returns : 0 - (OK),
53 | // -1 - (null pointer, mode could not be set or the VAD instance
54 | // has not been initialized).
55 | int WebRtcVad_set_mode(VadInst* handle, int mode);
56 |
57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates
58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
59 | //
60 | // - handle [i/o] : VAD Instance. Needs to be initialized by
61 | // WebRtcVad_Init() before call.
62 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000
63 | // - audio_frame [i] : Audio frame buffer.
64 | // - frame_length [i] : Length of audio frame buffer in number of samples.
65 | //
66 | // returns : 1 - (Active Voice),
67 | // 0 - (Non-active Voice),
68 | // -1 - (Error)
69 | int WebRtcVad_Process(VadInst* handle,
70 | int fs,
71 | const int16_t* audio_frame,
72 | size_t frame_length);
73 |
74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10,
75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
76 | //
77 | // - rate [i] : Sampling frequency (Hz).
78 | // - frame_length [i] : Speech frame buffer length in number of samples.
79 | //
80 | // returns : 0 - (valid combination), -1 - (invalid combination)
81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
82 |
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 |
87 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
88 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_core.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This header file includes the descriptions of the core VAD calls.
13 | */
14 |
15 | #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
16 | #define COMMON_AUDIO_VAD_VAD_CORE_H_
17 |
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 |
20 | enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
21 | enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
22 | enum { kTableSize = kNumChannels * kNumGaussians };
23 | enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
24 |
25 | typedef struct VadInstT_ {
26 | int vad;
27 | int32_t downsampling_filter_states[4];
28 | WebRtcSpl_State48khzTo8khz state_48_to_8;
29 | int16_t noise_means[kTableSize];
30 | int16_t speech_means[kTableSize];
31 | int16_t noise_stds[kTableSize];
32 | int16_t speech_stds[kTableSize];
33 | // TODO(bjornv): Change to |frame_count|.
34 | int32_t frame_counter;
35 | int16_t over_hang; // Over Hang
36 | int16_t num_of_speech;
37 | // TODO(bjornv): Change to |age_vector|.
38 | int16_t index_vector[16 * kNumChannels];
39 | int16_t low_value_vector[16 * kNumChannels];
40 | // TODO(bjornv): Change to |median|.
41 | int16_t mean_value[kNumChannels];
42 | int16_t upper_state[5];
43 | int16_t lower_state[5];
44 | int16_t hp_filter_state[4];
45 | int16_t over_hang_max_1[3];
46 | int16_t over_hang_max_2[3];
47 | int16_t individual[3];
48 | int16_t total[3];
49 |
50 | int init_flag;
51 | } VadInstT;
52 |
53 | // Initializes the core VAD component. The default aggressiveness mode is
54 | // controlled by |kDefaultMode| in vad_core.c.
55 | //
56 | // - self [i/o] : Instance that should be initialized
57 | //
58 | // returns : 0 (OK), -1 (null pointer in or if the default mode can't be
59 | // set)
60 | int WebRtcVad_InitCore(VadInstT* self);
61 |
62 | /****************************************************************************
63 | * WebRtcVad_set_mode_core(...)
64 | *
65 | * This function changes the VAD settings
66 | *
67 | * Input:
68 | * - inst : VAD instance
69 | * - mode : Aggressiveness degree
70 | * 0 (High quality) - 3 (Highly aggressive)
71 | *
72 | * Output:
73 | * - inst : Changed instance
74 | *
75 | * Return value : 0 - Ok
76 | * -1 - Error
77 | */
78 |
79 | int WebRtcVad_set_mode_core(VadInstT* self, int mode);
80 |
81 | /****************************************************************************
82 | * WebRtcVad_CalcVad48khz(...)
83 | * WebRtcVad_CalcVad32khz(...)
84 | * WebRtcVad_CalcVad16khz(...)
85 | * WebRtcVad_CalcVad8khz(...)
86 | *
87 | * Calculate probability for active speech and make VAD decision.
88 | *
89 | * Input:
90 | * - inst : Instance that should be initialized
91 | * - speech_frame : Input speech frame
92 | * - frame_length : Number of input samples
93 | *
94 | * Output:
95 | * - inst : Updated filter states etc.
96 | *
97 | * Return value : VAD decision
98 | * 0 - No active speech
99 | * 1-6 - Active speech
100 | */
101 | int WebRtcVad_CalcVad48khz(VadInstT* inst,
102 | const int16_t* speech_frame,
103 | size_t frame_length);
104 | int WebRtcVad_CalcVad32khz(VadInstT* inst,
105 | const int16_t* speech_frame,
106 | size_t frame_length);
107 | int WebRtcVad_CalcVad16khz(VadInstT* inst,
108 | const int16_t* speech_frame,
109 | size_t frame_length);
110 | int WebRtcVad_CalcVad8khz(VadInstT* inst,
111 | const int16_t* speech_frame,
112 | size_t frame_length);
113 |
114 | #endif // COMMON_AUDIO_VAD_VAD_CORE_H_
115 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_filterbank.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | /*
12 | * This file includes feature calculating functionality used in vad_core.c.
13 | */
14 |
15 | #ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
16 | #define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
17 |
18 | #include "webrtc/common_audio/vad/vad_core.h"
19 |
20 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the
21 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
22 | // 80 Hz - 250 Hz
23 | // 250 Hz - 500 Hz
24 | // 500 Hz - 1000 Hz
25 | // 1000 Hz - 2000 Hz
26 | // 2000 Hz - 3000 Hz
27 | // 3000 Hz - 4000 Hz
28 | //
29 | // The values are given in Q4 and written to |features|. Further, an approximate
30 | // overall energy is returned. The return value is used in
31 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
32 | // the threshold |kMinEnergy|.
33 | //
34 | // - self [i/o] : State information of the VAD.
35 | // - data_in [i] : Input audio data, for feature extraction.
36 | // - data_length [i] : Audio data size, in number of samples.
37 | // - features [o] : 10 * log10(energy in each frequency band), Q4.
38 | // - returns : Total energy of the signal (NOTE! This value is not
39 | // exact. It is only used in a comparison.)
40 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
41 | const int16_t* data_in,
42 | size_t data_length,
43 | int16_t* features);
44 |
45 | #endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
46 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/vad/vad_gmm.h"
12 |
13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
14 |
15 | static const int32_t kCompVar = 22005;
16 | static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12.
17 |
18 | // For a normal distribution, the probability of |input| is calculated and
19 | // returned (in Q20). The formula for normal distributed probability is
20 | //
21 | // 1 / s * exp(-(x - m)^2 / (2 * s^2))
22 | //
23 | // where the parameters are given in the following Q domains:
24 | // m = |mean| (Q7)
25 | // s = |std| (Q7)
26 | // x = |input| (Q4)
27 | // in addition to the probability we output |delta| (in Q11) used when updating
28 | // the noise/speech model.
29 | int32_t WebRtcVad_GaussianProbability(int16_t input,
30 | int16_t mean,
31 | int16_t std,
32 | int16_t* delta) {
33 | int16_t tmp16, inv_std, inv_std2, exp_value = 0;
34 | int32_t tmp32;
35 |
36 | // Calculate |inv_std| = 1 / s, in Q10.
37 | // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
38 | // Q-domain: Q17 / Q7 = Q10.
39 | tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
40 | inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
41 |
42 | // Calculate |inv_std2| = 1 / s^2, in Q14.
43 | tmp16 = (inv_std >> 2); // Q10 -> Q8.
44 | // Q-domain: (Q8 * Q8) >> 2 = Q14.
45 | inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
46 | // TODO(bjornv): Investigate if changing to
47 | // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
48 | // gives better accuracy.
49 |
50 | tmp16 = (input << 3); // Q4 -> Q7
51 | tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
52 |
53 | // To be used later, when updating noise/speech model.
54 | // |delta| = (x - m) / s^2, in Q11.
55 | // Q-domain: (Q14 * Q7) >> 10 = Q11.
56 | *delta = (int16_t)((inv_std2 * tmp16) >> 10);
57 |
58 | // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
59 | // division by two with one shift.
60 | // Q-domain: (Q11 * Q7) >> 8 = Q10.
61 | tmp32 = (*delta * tmp16) >> 9;
62 |
63 | // If the exponent is small enough to give a non-zero probability we calculate
64 | // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
65 | // ~= exp2(-log2(exp(1)) * |tmp32|).
66 | if (tmp32 < kCompVar) {
67 | // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
68 | // Q-domain: (Q12 * Q10) >> 12 = Q10.
69 | tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
70 | tmp16 = -tmp16;
71 | exp_value = (0x0400 | (tmp16 & 0x03FF));
72 | tmp16 ^= 0xFFFF;
73 | tmp16 >>= 10;
74 | tmp16 += 1;
75 | // Get |exp_value| = exp(-|tmp32|) in Q10.
76 | exp_value >>= tmp16;
77 | }
78 |
79 | // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
80 | // Q-domain: Q10 * Q10 = Q20.
81 | return inv_std * exp_value;
82 | }
83 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_gmm.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // Gaussian probability calculations internally used in vad_core.c.
12 |
13 | #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
14 | #define COMMON_AUDIO_VAD_VAD_GMM_H_
15 |
16 | #include
17 |
18 | // Calculates the probability for |input|, given that |input| comes from a
19 | // normal distribution with mean and standard deviation (|mean|, |std|).
20 | //
21 | // Inputs:
22 | // - input : input sample in Q4.
23 | // - mean : mean input in the statistical model, Q7.
24 | // - std : standard deviation, Q7.
25 | //
26 | // Output:
27 | //
28 | // - delta : input used when updating the model, Q11.
29 | // |delta| = (|input| - |mean|) / |std|^2.
30 | //
31 | // Return:
32 | // (probability for |input|) =
33 | // 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
34 | int32_t WebRtcVad_GaussianProbability(int16_t input,
35 | int16_t mean,
36 | int16_t std,
37 | int16_t* delta);
38 |
39 | #endif // COMMON_AUDIO_VAD_VAD_GMM_H_
40 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/vad/vad_sp.h"
12 |
13 | #include "webrtc/rtc_base/checks.h"
14 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
15 | #include "webrtc/common_audio/vad/vad_core.h"
16 |
17 | // Allpass filter coefficients, upper and lower, in Q13.
18 | // Upper: 0.64, Lower: 0.17.
19 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13.
20 | static const int16_t kSmoothingDown = 6553; // 0.2 in Q15.
21 | static const int16_t kSmoothingUp = 32439; // 0.99 in Q15.
22 |
23 | // TODO(bjornv): Move this function to vad_filterbank.c.
24 | // Downsampling filter based on splitting filter and allpass functions.
25 | void WebRtcVad_Downsampling(const int16_t* signal_in,
26 | int16_t* signal_out,
27 | int32_t* filter_state,
28 | size_t in_length) {
29 | int16_t tmp16_1 = 0, tmp16_2 = 0;
30 | int32_t tmp32_1 = filter_state[0];
31 | int32_t tmp32_2 = filter_state[1];
32 | size_t n = 0;
33 | // Downsampling by 2 gives half length.
34 | size_t half_length = (in_length >> 1);
35 |
36 | // Filter coefficients in Q13, filter state in Q0.
37 | for (n = 0; n < half_length; n++) {
38 | // All-pass filtering upper branch.
39 | tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
40 | ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
41 | *signal_out = tmp16_1;
42 | tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
43 |
44 | // All-pass filtering lower branch.
45 | tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
46 | ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
47 | *signal_out++ += tmp16_2;
48 | tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
49 | }
50 | // Store the filter states.
51 | filter_state[0] = tmp32_1;
52 | filter_state[1] = tmp32_2;
53 | }
54 |
55 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16
56 | // smallest values the last 100 frames. Then calculates and returns the median
57 | // of the five smallest values.
58 | int16_t WebRtcVad_FindMinimum(VadInstT* self,
59 | int16_t feature_value,
60 | int channel) {
61 | int i = 0, j = 0;
62 | int position = -1;
63 | // Offset to beginning of the 16 minimum values in memory.
64 | const int offset = (channel << 4);
65 | int16_t current_median = 1600;
66 | int16_t alpha = 0;
67 | int32_t tmp32 = 0;
68 | // Pointer to memory for the 16 minimum values and the age of each value of
69 | // the |channel|.
70 | int16_t* age = &self->index_vector[offset];
71 | int16_t* smallest_values = &self->low_value_vector[offset];
72 |
73 | RTC_DCHECK_LT(channel, kNumChannels);
74 |
75 | // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
76 | // remove old values.
77 | for (i = 0; i < 16; i++) {
78 | if (age[i] != 100) {
79 | age[i]++;
80 | } else {
81 | // Too old value. Remove from memory and shift larger values downwards.
82 | for (j = i; j < 15; j++) {
83 | smallest_values[j] = smallest_values[j + 1];
84 | age[j] = age[j + 1];
85 | }
86 | age[15] = 101;
87 | smallest_values[15] = 10000;
88 | }
89 | }
90 |
91 | // Check if |feature_value| is smaller than any of the values in
92 | // |smallest_values|. If so, find the |position| where to insert the new value
93 | // (|feature_value|).
94 | if (feature_value < smallest_values[7]) {
95 | if (feature_value < smallest_values[3]) {
96 | if (feature_value < smallest_values[1]) {
97 | if (feature_value < smallest_values[0]) {
98 | position = 0;
99 | } else {
100 | position = 1;
101 | }
102 | } else if (feature_value < smallest_values[2]) {
103 | position = 2;
104 | } else {
105 | position = 3;
106 | }
107 | } else if (feature_value < smallest_values[5]) {
108 | if (feature_value < smallest_values[4]) {
109 | position = 4;
110 | } else {
111 | position = 5;
112 | }
113 | } else if (feature_value < smallest_values[6]) {
114 | position = 6;
115 | } else {
116 | position = 7;
117 | }
118 | } else if (feature_value < smallest_values[15]) {
119 | if (feature_value < smallest_values[11]) {
120 | if (feature_value < smallest_values[9]) {
121 | if (feature_value < smallest_values[8]) {
122 | position = 8;
123 | } else {
124 | position = 9;
125 | }
126 | } else if (feature_value < smallest_values[10]) {
127 | position = 10;
128 | } else {
129 | position = 11;
130 | }
131 | } else if (feature_value < smallest_values[13]) {
132 | if (feature_value < smallest_values[12]) {
133 | position = 12;
134 | } else {
135 | position = 13;
136 | }
137 | } else if (feature_value < smallest_values[14]) {
138 | position = 14;
139 | } else {
140 | position = 15;
141 | }
142 | }
143 |
144 | // If we have detected a new small value, insert it at the correct position
145 | // and shift larger values up.
146 | if (position > -1) {
147 | for (i = 15; i > position; i--) {
148 | smallest_values[i] = smallest_values[i - 1];
149 | age[i] = age[i - 1];
150 | }
151 | smallest_values[position] = feature_value;
152 | age[position] = 1;
153 | }
154 |
155 | // Get |current_median|.
156 | if (self->frame_counter > 2) {
157 | current_median = smallest_values[2];
158 | } else if (self->frame_counter > 0) {
159 | current_median = smallest_values[0];
160 | }
161 |
162 | // Smooth the median value.
163 | if (self->frame_counter > 0) {
164 | if (current_median < self->mean_value[channel]) {
165 | alpha = kSmoothingDown; // 0.2 in Q15.
166 | } else {
167 | alpha = kSmoothingUp; // 0.99 in Q15.
168 | }
169 | }
170 | tmp32 = (alpha + 1) * self->mean_value[channel];
171 | tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
172 | tmp32 += 16384;
173 | self->mean_value[channel] = (int16_t) (tmp32 >> 15);
174 |
175 | return self->mean_value[channel];
176 | }
177 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/vad_sp.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // This file includes specific signal processing tools used in vad_core.c.
12 |
13 | #ifndef COMMON_AUDIO_VAD_VAD_SP_H_
14 | #define COMMON_AUDIO_VAD_VAD_SP_H_
15 |
16 | #include "webrtc/common_audio/vad/vad_core.h"
17 |
18 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
19 | //
20 | // Inputs:
21 | // - signal_in : Input signal.
22 | // - in_length : Length of input signal in samples.
23 | //
24 | // Input & Output:
25 | // - filter_state : Current filter states of the two all-pass filters. The
26 | // |filter_state| is updated after all samples have been
27 | // processed.
28 | //
29 | // Output:
30 | // - signal_out : Downsampled signal (of length |in_length| / 2).
31 | void WebRtcVad_Downsampling(const int16_t* signal_in,
32 | int16_t* signal_out,
33 | int32_t* filter_state,
34 | size_t in_length);
35 |
36 | // Updates and returns the smoothed feature minimum. As minimum we use the
37 | // median of the five smallest feature values in a 100 frames long window.
38 | // As long as |handle->frame_counter| is zero, that is, we haven't received any
39 | // "valid" data, FindMinimum() outputs the default value of 1600.
40 | //
41 | // Inputs:
42 | // - feature_value : New feature value to update with.
43 | // - channel : Channel number.
44 | //
45 | // Input & Output:
46 | // - handle : State information of the VAD.
47 | //
48 | // Returns:
49 | // : Smoothed minimum value for a moving window.
50 | int16_t WebRtcVad_FindMinimum(VadInstT* handle,
51 | int16_t feature_value,
52 | int channel);
53 |
54 | #endif // COMMON_AUDIO_VAD_VAD_SP_H_
55 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/common_audio/vad/webrtc_vad.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #include "webrtc/common_audio/vad/include/webrtc_vad.h"
12 |
13 | #include
14 | #include
15 |
16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
17 | #include "webrtc/common_audio/vad/vad_core.h"
18 |
19 | static const int kInitCheck = 42;
20 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
21 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
22 | static const int kMaxFrameLengthMs = 30;
23 |
24 | VadInst* WebRtcVad_Create() {
25 | VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
26 |
27 | WebRtcSpl_Init();
28 | self->init_flag = 0;
29 |
30 | return (VadInst*)self;
31 | }
32 |
33 | void WebRtcVad_Free(VadInst* handle) {
34 | free(handle);
35 | }
36 |
37 | // TODO(bjornv): Move WebRtcVad_InitCore() code here.
38 | int WebRtcVad_Init(VadInst* handle) {
39 | // Initialize the core VAD component.
40 | return WebRtcVad_InitCore((VadInstT*) handle);
41 | }
42 |
43 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
44 | int WebRtcVad_set_mode(VadInst* handle, int mode) {
45 | VadInstT* self = (VadInstT*) handle;
46 |
47 | if (handle == NULL) {
48 | return -1;
49 | }
50 | if (self->init_flag != kInitCheck) {
51 | return -1;
52 | }
53 |
54 | return WebRtcVad_set_mode_core(self, mode);
55 | }
56 |
57 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
58 | size_t frame_length) {
59 | int vad = -1;
60 | VadInstT* self = (VadInstT*) handle;
61 |
62 | if (handle == NULL) {
63 | return -1;
64 | }
65 |
66 | if (self->init_flag != kInitCheck) {
67 | return -1;
68 | }
69 | if (audio_frame == NULL) {
70 | return -1;
71 | }
72 | if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
73 | return -1;
74 | }
75 |
76 | if (fs == 48000) {
77 | vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
78 | } else if (fs == 32000) {
79 | vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
80 | } else if (fs == 16000) {
81 | vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
82 | } else if (fs == 8000) {
83 | vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
84 | }
85 |
86 | if (vad > 0) {
87 | vad = 1;
88 | }
89 | return vad;
90 | }
91 |
92 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
93 | int return_value = -1;
94 | size_t i;
95 | int valid_length_ms;
96 | size_t valid_length;
97 |
98 | // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
99 | // see if we have a matching pair.
100 | for (i = 0; i < kRatesSize; i++) {
101 | if (kValidRates[i] == rate) {
102 | for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
103 | valid_length_ms += 10) {
104 | valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
105 | if (frame_length == valid_length) {
106 | return_value = 0;
107 | break;
108 | }
109 | }
110 | break;
111 | }
112 | }
113 |
114 | return return_value;
115 | }
116 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/checks.cc:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2006 The WebRTC Project Authors. All rights reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // Most of this was borrowed (with minor modifications) from V8's and Chromium's
12 | // src/base/logging.cc.
13 |
14 | #include
15 | #include
16 | #include
17 |
18 | #if defined(WEBRTC_ANDROID)
19 | #define RTC_LOG_TAG_ANDROID "rtc"
20 | #include // NOLINT
21 | #endif
22 |
23 | #if defined(WEBRTC_WIN)
24 | #include
25 | #endif
26 |
27 | #if defined(WEBRTC_WIN)
28 | #define LAST_SYSTEM_ERROR (::GetLastError())
29 | #elif defined(__native_client__) && __native_client__
30 | #define LAST_SYSTEM_ERROR (0)
31 | #elif defined(WEBRTC_POSIX)
32 | #include
33 | #define LAST_SYSTEM_ERROR (errno)
34 | #endif // WEBRTC_WIN
35 |
36 | #include "webrtc/rtc_base/checks.h"
37 |
38 | namespace {
39 | #if defined(__GNUC__)
40 | __attribute__((__format__(__printf__, 2, 3)))
41 | #endif
42 | void AppendFormat(std::string* s, const char* fmt, ...) {
43 | va_list args, copy;
44 | va_start(args, fmt);
45 | va_copy(copy, args);
46 | const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy);
47 | va_end(copy);
48 |
49 | if (predicted_length > 0) {
50 | const size_t size = s->size();
51 | s->resize(size + predicted_length);
52 | // Pass "+ 1" to vsnprintf to include space for the '\0'.
53 | std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args);
54 | }
55 | va_end(args);
56 | }
57 | }
58 |
59 | namespace rtc {
60 | namespace webrtc_checks_impl {
61 |
62 | // Reads one argument from args, appends it to s and advances fmt.
63 | // Returns true iff an argument was sucessfully parsed.
64 | bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) {
65 | if (**fmt == CheckArgType::kEnd)
66 | return false;
67 |
68 | switch (**fmt) {
69 | case CheckArgType::kInt:
70 | AppendFormat(s, "%d", va_arg(*args, int));
71 | break;
72 | case CheckArgType::kLong:
73 | AppendFormat(s, "%ld", va_arg(*args, long));
74 | break;
75 | case CheckArgType::kLongLong:
76 | AppendFormat(s, "%lld", va_arg(*args, long long));
77 | break;
78 | case CheckArgType::kUInt:
79 | AppendFormat(s, "%u", va_arg(*args, unsigned));
80 | break;
81 | case CheckArgType::kULong:
82 | AppendFormat(s, "%lu", va_arg(*args, unsigned long));
83 | break;
84 | case CheckArgType::kULongLong:
85 | AppendFormat(s, "%llu", va_arg(*args, unsigned long long));
86 | break;
87 | case CheckArgType::kDouble:
88 | AppendFormat(s, "%g", va_arg(*args, double));
89 | break;
90 | case CheckArgType::kLongDouble:
91 | AppendFormat(s, "%Lg", va_arg(*args, long double));
92 | break;
93 | case CheckArgType::kCharP:
94 | s->append(va_arg(*args, const char*));
95 | break;
96 | case CheckArgType::kStdString:
97 | s->append(*va_arg(*args, const std::string*));
98 | break;
99 | case CheckArgType::kVoidP:
100 | AppendFormat(s, "%p", va_arg(*args, const void*));
101 | break;
102 | default:
103 | s->append("[Invalid CheckArgType]");
104 | return false;
105 | }
106 | (*fmt)++;
107 | return true;
108 | }
109 |
110 | RTC_NORETURN void FatalLog(const char* file,
111 | int line,
112 | const char* message,
113 | const CheckArgType* fmt,
114 | ...) {
115 | va_list args;
116 | va_start(args, fmt);
117 |
118 | std::string s;
119 | AppendFormat(&s,
120 | "\n\n"
121 | "#\n"
122 | "# Fatal error in: %s, line %d\n"
123 | "# last system error: %u\n"
124 | "# Check failed: %s",
125 | file, line, LAST_SYSTEM_ERROR, message);
126 |
127 | if (*fmt == CheckArgType::kCheckOp) {
128 | // This log message was generated by RTC_CHECK_OP, so we have to complete
129 | // the error message using the operands that have been passed as the first
130 | // two arguments.
131 | fmt++;
132 |
133 | std::string s1, s2;
134 | if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2))
135 | AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str());
136 | } else {
137 | s.append("\n# ");
138 | }
139 |
140 | // Append all the user-supplied arguments to the message.
141 | while (ParseArg(&args, &fmt, &s))
142 | ;
143 |
144 | va_end(args);
145 |
146 | const char* output = s.c_str();
147 |
148 | #if defined(WEBRTC_ANDROID)
149 | __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output);
150 | #endif
151 |
152 | fflush(stdout);
153 | fprintf(stderr, "%s", output);
154 | fflush(stderr);
155 | abort();
156 | }
157 |
158 | } // namespace webrtc_checks_impl
159 | } // namespace rtc
160 |
161 | // Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros.
162 | RTC_NORETURN void rtc_FatalMessage(const char* file, int line,
163 | const char* msg) {
164 | static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = {
165 | rtc::webrtc_checks_impl::CheckArgType::kEnd};
166 | FatalLog(file, line, msg, t);
167 | }
168 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/compile_assert_c.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef RTC_BASE_COMPILE_ASSERT_C_H_
12 | #define RTC_BASE_COMPILE_ASSERT_C_H_
13 |
14 | // Use this macro to verify at compile time that certain restrictions are met.
15 | // The argument is the boolean expression to evaluate.
16 | // Example:
17 | // RTC_COMPILE_ASSERT(sizeof(foo) < 128);
18 | // Note: In C++, use static_assert instead!
19 | #define RTC_COMPILE_ASSERT(expression) \
20 | switch (0) { \
21 | case 0: \
22 | case expression:; \
23 | }
24 |
25 | #endif // RTC_BASE_COMPILE_ASSERT_C_H_
26 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/sanitizer.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 The WebRTC Project Authors. All rights reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef RTC_BASE_SANITIZER_H_
12 | #define RTC_BASE_SANITIZER_H_
13 |
14 | #include // For size_t.
15 |
16 | #ifdef __cplusplus
17 | #include
18 | #endif
19 |
20 | #if defined(__has_feature)
21 | #if __has_feature(address_sanitizer)
22 | #define RTC_HAS_ASAN 1
23 | #endif
24 | #if __has_feature(memory_sanitizer)
25 | #define RTC_HAS_MSAN 1
26 | #endif
27 | #endif
28 | #ifndef RTC_HAS_ASAN
29 | #define RTC_HAS_ASAN 0
30 | #endif
31 | #ifndef RTC_HAS_MSAN
32 | #define RTC_HAS_MSAN 0
33 | #endif
34 |
35 | #if RTC_HAS_ASAN
36 | #include
37 | #endif
38 | #if RTC_HAS_MSAN
39 | #include
40 | #endif
41 |
42 | #ifdef __has_attribute
43 | #if __has_attribute(no_sanitize)
44 | #define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what)))
45 | #endif
46 | #endif
47 | #ifndef RTC_NO_SANITIZE
48 | #define RTC_NO_SANITIZE(what)
49 | #endif
50 |
51 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
52 | // as being unaddressable, so that reads and writes are not allowed. ASan may
53 | // narrow the range to the nearest alignment boundaries.
54 | static inline void rtc_AsanPoison(const volatile void* ptr,
55 | size_t element_size,
56 | size_t num_elements) {
57 | #if RTC_HAS_ASAN
58 | ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements);
59 | #endif
60 | }
61 |
62 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
63 | // as being addressable, so that reads and writes are allowed. ASan may widen
64 | // the range to the nearest alignment boundaries.
65 | static inline void rtc_AsanUnpoison(const volatile void* ptr,
66 | size_t element_size,
67 | size_t num_elements) {
68 | #if RTC_HAS_ASAN
69 | ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements);
70 | #endif
71 | }
72 |
73 | // Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements)
74 | // as being uninitialized.
75 | static inline void rtc_MsanMarkUninitialized(const volatile void* ptr,
76 | size_t element_size,
77 | size_t num_elements) {
78 | #if RTC_HAS_MSAN
79 | __msan_poison(ptr, element_size * num_elements);
80 | #endif
81 | }
82 |
83 | // Force an MSan check (if any bits in the memory range [ptr, ptr +
84 | // element_size * num_elements) are uninitialized the call will crash with an
85 | // MSan report).
86 | static inline void rtc_MsanCheckInitialized(const volatile void* ptr,
87 | size_t element_size,
88 | size_t num_elements) {
89 | #if RTC_HAS_MSAN
90 | __msan_check_mem_is_initialized(ptr, element_size * num_elements);
91 | #endif
92 | }
93 |
94 | #ifdef __cplusplus
95 |
96 | namespace rtc {
97 | namespace sanitizer_impl {
98 |
99 | template
100 | constexpr bool IsTriviallyCopyable() {
101 | return static_cast(std::is_trivially_copy_constructible::value &&
102 | (std::is_trivially_copy_assignable::value ||
103 | !std::is_copy_assignable::value) &&
104 | std::is_trivially_destructible::value);
105 | }
106 |
107 | } // namespace sanitizer_impl
108 |
109 | template
110 | inline void AsanPoison(const T& mem) {
111 | rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size());
112 | }
113 |
114 | template
115 | inline void AsanUnpoison(const T& mem) {
116 | rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size());
117 | }
118 |
119 | template
120 | inline void MsanMarkUninitialized(const T& mem) {
121 | rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
122 | }
123 |
124 | template
125 | inline T MsanUninitialized(T t) {
126 | #if RTC_HAS_MSAN
127 | // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it
128 | // becomes available in downstream projects.
129 | static_assert(sanitizer_impl::IsTriviallyCopyable(), "");
130 | #endif
131 | rtc_MsanMarkUninitialized(&t, sizeof(T), 1);
132 | return t;
133 | }
134 |
135 | template
136 | inline void MsanCheckInitialized(const T& mem) {
137 | rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
138 | }
139 |
140 | } // namespace rtc
141 |
142 | #endif // __cplusplus
143 |
144 | #endif // RTC_BASE_SANITIZER_H_
145 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/system/arch.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // This file contains platform-specific typedefs and defines.
12 | // Much of it is derived from Chromium's build/build_config.h.
13 |
14 | #ifndef RTC_BASE_SYSTEM_ARCH_H_
15 | #define RTC_BASE_SYSTEM_ARCH_H_
16 |
17 | // Processor architecture detection. For more info on what's defined, see:
18 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx
19 | // http://www.agner.org/optimize/calling_conventions.pdf
20 | // or with gcc, run: "echo | gcc -E -dM -"
21 | #if defined(_M_X64) || defined(__x86_64__)
22 | #define WEBRTC_ARCH_X86_FAMILY
23 | #define WEBRTC_ARCH_X86_64
24 | #define WEBRTC_ARCH_64_BITS
25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
26 | #elif defined(__aarch64__)
27 | #define WEBRTC_ARCH_ARM_FAMILY
28 | #define WEBRTC_ARCH_64_BITS
29 | #define WEBRTC_ARCH_LITTLE_ENDIAN
30 | #elif defined(_M_IX86) || defined(__i386__)
31 | #define WEBRTC_ARCH_X86_FAMILY
32 | #define WEBRTC_ARCH_X86
33 | #define WEBRTC_ARCH_32_BITS
34 | #define WEBRTC_ARCH_LITTLE_ENDIAN
35 | #elif defined(__ARMEL__)
36 | #define WEBRTC_ARCH_ARM_FAMILY
37 | #define WEBRTC_ARCH_32_BITS
38 | #define WEBRTC_ARCH_LITTLE_ENDIAN
39 | #elif defined(__MIPSEL__)
40 | #define WEBRTC_ARCH_MIPS_FAMILY
41 | #if defined(__LP64__)
42 | #define WEBRTC_ARCH_64_BITS
43 | #else
44 | #define WEBRTC_ARCH_32_BITS
45 | #endif
46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
47 | #elif defined(__pnacl__)
48 | #define WEBRTC_ARCH_32_BITS
49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
50 | #else
51 | #error Please add support for your architecture in typedefs.h
52 | #endif
53 |
54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
56 | #endif
57 |
58 | #endif // RTC_BASE_SYSTEM_ARCH_H_
59 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/system/inline.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef RTC_BASE_SYSTEM_INLINE_H_
12 | #define RTC_BASE_SYSTEM_INLINE_H_
13 |
14 | #if defined(_MSC_VER)
15 |
16 | #define RTC_FORCE_INLINE __forceinline
17 | #define RTC_NO_INLINE __declspec(noinline)
18 |
19 | #elif defined(__GNUC__)
20 |
21 | #define RTC_FORCE_INLINE __attribute__((__always_inline__))
22 | #define RTC_NO_INLINE __attribute__((__noinline__))
23 |
24 | #else
25 |
26 | #define RTC_FORCE_INLINE
27 | #define RTC_NO_INLINE
28 |
29 | #endif
30 |
31 | #endif // RTC_BASE_SYSTEM_INLINE_H_
32 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/rtc_base/type_traits.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2016 The WebRTC Project Authors. All rights reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef RTC_BASE_TYPE_TRAITS_H_
12 | #define RTC_BASE_TYPE_TRAITS_H_
13 |
14 | #include
15 | #include
16 |
17 | namespace rtc {
18 |
19 | // Determines if the given class has zero-argument .data() and .size() methods
20 | // whose return values are convertible to T* and size_t, respectively.
21 | template
22 | class HasDataAndSize {
23 | private:
24 | template <
25 | typename C,
26 | typename std::enable_if<
27 | std::is_convertible().data()), T*>::value &&
28 | std::is_convertible().size()),
29 | std::size_t>::value>::type* = nullptr>
30 | static int Test(int);
31 |
32 | template
33 | static char Test(...);
34 |
35 | public:
36 | static constexpr bool value = std::is_same(0)), int>::value;
37 | };
38 |
39 | namespace test_has_data_and_size {
40 |
41 | template
42 | struct Test1 {
43 | DR data();
44 | SR size();
45 | };
46 | static_assert(HasDataAndSize, int>::value, "");
47 | static_assert(HasDataAndSize, const int>::value, "");
48 | static_assert(HasDataAndSize, const int>::value, "");
49 | static_assert(!HasDataAndSize, int>::value,
50 | "implicit cast of const int* to int*");
51 | static_assert(!HasDataAndSize, int>::value,
52 | "implicit cast of char* to int*");
53 |
54 | struct Test2 {
55 | int* data;
56 | size_t size;
57 | };
58 | static_assert(!HasDataAndSize::value,
59 | ".data and .size aren't functions");
60 |
61 | struct Test3 {
62 | int* data();
63 | };
64 | static_assert(!HasDataAndSize::value, ".size() is missing");
65 |
66 | class Test4 {
67 | int* data();
68 | size_t size();
69 | };
70 | static_assert(!HasDataAndSize::value,
71 | ".data() and .size() are private");
72 |
73 | } // namespace test_has_data_and_size
74 |
75 | namespace type_traits_impl {
76 |
77 | // Determines if the given type is an enum that converts implicitly to
78 | // an integral type.
79 | template
80 | struct IsIntEnum {
81 | private:
82 | // This overload is used if the type is an enum, and unary plus
83 | // compiles and turns it into an integral type.
84 | template ::value &&
87 | std::is_integral())>::value>::type* =
88 | nullptr>
89 | static int Test(int);
90 |
91 | // Otherwise, this overload is used.
92 | template
93 | static char Test(...);
94 |
95 | public:
96 | static constexpr bool value =
97 | std::is_same::type>(0)),
98 | int>::value;
99 | };
100 |
101 | } // namespace type_traits_impl
102 |
103 | // Determines if the given type is integral, or an enum that
104 | // converts implicitly to an integral type.
105 | template
106 | struct IsIntlike {
107 | private:
108 | using X = typename std::remove_reference::type;
109 |
110 | public:
111 | static constexpr bool value =
112 | std::is_integral::value || type_traits_impl::IsIntEnum::value;
113 | };
114 |
115 | namespace test_enum_intlike {
116 |
117 | enum E1 { e1 };
118 | enum { e2 };
119 | enum class E3 { e3 };
120 | struct S {};
121 |
122 | static_assert(type_traits_impl::IsIntEnum::value, "");
123 | static_assert(type_traits_impl::IsIntEnum::value, "");
124 | static_assert(!type_traits_impl::IsIntEnum::value, "");
125 | static_assert(!type_traits_impl::IsIntEnum::value, "");
126 | static_assert(!type_traits_impl::IsIntEnum::value, "");
127 | static_assert(!type_traits_impl::IsIntEnum::value, "");
128 |
129 | static_assert(IsIntlike::value, "");
130 | static_assert(IsIntlike::value, "");
131 | static_assert(!IsIntlike::value, "");
132 | static_assert(IsIntlike::value, "");
133 | static_assert(!IsIntlike::value, "");
134 | static_assert(!IsIntlike::value, "");
135 |
136 | } // namespace test_enum_intlike
137 |
138 | } // namespace rtc
139 |
140 | #endif // RTC_BASE_TYPE_TRAITS_H_
141 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/system_wrappers/include/cpu_features_wrapper.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | #ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
12 | #define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
13 |
14 | #include
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | // List of features in x86.
21 | typedef enum { kSSE2, kSSE3 } CPUFeature;
22 |
23 | // List of features in ARM.
24 | enum {
25 | kCPUFeatureARMv7 = (1 << 0),
26 | kCPUFeatureVFPv3 = (1 << 1),
27 | kCPUFeatureNEON = (1 << 2),
28 | kCPUFeatureLDREXSTREX = (1 << 3)
29 | };
30 |
31 | typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
32 |
33 | // Returns true if the CPU supports the feature.
34 | extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
35 |
36 | // No CPU feature is available => straight C path.
37 | extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
38 |
39 | // Return the features in an ARM device.
40 | // It detects the features in the hardware platform, and returns supported
41 | // values in the above enum definition as a bitmask.
42 | extern uint64_t WebRtc_GetCPUFeaturesARM(void);
43 |
44 | #if defined(__cplusplus) || defined(c_plusplus)
45 | } // extern "C"
46 | #endif
47 |
48 | #endif // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
49 |
--------------------------------------------------------------------------------
/cpp_onnx/third_party/webrtc/typedefs.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 | *
4 | * Use of this source code is governed by a BSD-style license
5 | * that can be found in the LICENSE file in the root of the source
6 | * tree. An additional intellectual property rights grant can be found
7 | * in the file PATENTS. All contributing project authors may
8 | * be found in the AUTHORS file in the root of the source tree.
9 | */
10 |
11 | // This file contains platform-specific typedefs and defines.
12 | // Much of it is derived from Chromium's build/build_config.h.
13 |
14 | #ifndef WEBRTC_TYPEDEFS_H_
15 | #define WEBRTC_TYPEDEFS_H_
16 |
17 | // Processor architecture detection. For more info on what's defined, see:
18 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx
19 | // http://www.agner.org/optimize/calling_conventions.pdf
20 | // or with gcc, run: "echo | gcc -E -dM -"
21 | #if defined(_M_X64) || defined(__x86_64__)
22 | #define WEBRTC_ARCH_X86_FAMILY
23 | #define WEBRTC_ARCH_X86_64
24 | #define WEBRTC_ARCH_64_BITS
25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
26 | #elif defined(__aarch64__)
27 | #define WEBRTC_ARCH_64_BITS
28 | #define WEBRTC_ARCH_LITTLE_ENDIAN
29 | #elif defined(_M_IX86) || defined(__i386__)
30 | #define WEBRTC_ARCH_X86_FAMILY
31 | #define WEBRTC_ARCH_X86
32 | #define WEBRTC_ARCH_32_BITS
33 | #define WEBRTC_ARCH_LITTLE_ENDIAN
34 | #elif defined(__ARMEL__)
35 | // TODO(ajm): We'd prefer to control platform defines here, but this is
36 | // currently provided by the Android makefiles. Commented to avoid duplicate
37 | // definition warnings.
38 | //#define WEBRTC_ARCH_ARM
39 | // TODO(ajm): Chromium uses the following two defines. Should we switch?
40 | //#define WEBRTC_ARCH_ARM_FAMILY
41 | //#define WEBRTC_ARCH_ARMEL
42 | #define WEBRTC_ARCH_32_BITS
43 | #define WEBRTC_ARCH_LITTLE_ENDIAN
44 | #elif defined(__MIPSEL__)
45 | #define WEBRTC_ARCH_32_BITS
46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
47 | #elif defined(__pnacl__)
48 | #define WEBRTC_ARCH_32_BITS
49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
50 | #elif defined(__PPC__)
51 | #if defined(__PPC64__)
52 | #define WEBRTC_ARCH_64_BITS
53 | #else
54 | #define WEBRTC_ARCH_32_BITS
55 | #endif
56 | #define WEBRTC_ARCH_BIG_ENDIAN
57 | #else
58 | #error Please add support for your architecture in typedefs.h
59 | #endif
60 |
61 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
62 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
63 | #endif
64 |
65 | #if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \
66 | (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
67 | #define WEBRTC_CPU_DETECTION
68 | #endif
69 |
70 | #if !defined(_MSC_VER)
71 | #include
72 | #else
73 | // Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
74 | typedef signed char int8_t;
75 | typedef signed short int16_t;
76 | typedef signed int int32_t;
77 | typedef __int64 int64_t;
78 | typedef unsigned char uint8_t;
79 | typedef unsigned short uint16_t;
80 | typedef unsigned int uint32_t;
81 | typedef unsigned __int64 uint64_t;
82 | #endif
83 |
84 | // Borrowed from Chromium's base/compiler_specific.h.
85 | // Annotate a virtual method indicating it must be overriding a virtual
86 | // method in the parent class.
87 | // Use like:
88 | // virtual void foo() OVERRIDE;
89 | #if defined(_MSC_VER)
90 | #define OVERRIDE override
91 | #elif defined(__clang__)
92 | // Clang defaults to C++03 and warns about using override. Squelch that.
93 | // Intentionally no push/pop here so all users of OVERRIDE ignore the warning
94 | // too. This is like passing -Wno-c++11-extensions, except that GCC won't die
95 | // (because it won't see this pragma).
96 | #pragma clang diagnostic ignored "-Wc++11-extensions"
97 | #define OVERRIDE override
98 | #elif defined(__GNUC__) && __cplusplus >= 201103 && \
99 | (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700
100 | // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled.
101 | #define OVERRIDE override
102 | #else
103 | #define OVERRIDE
104 | #endif
105 |
106 | // Annotate a function indicating the caller must examine the return value.
107 | // Use like:
108 | // int foo() WARN_UNUSED_RESULT;
109 | // TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
110 | // libjingle are merged.
111 | #if !defined(WARN_UNUSED_RESULT)
112 | #if defined(__GNUC__)
113 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
114 | #else
115 | #define WARN_UNUSED_RESULT
116 | #endif
117 | #endif // WARN_UNUSED_RESULT
118 |
119 | // Put after a variable that might not be used, to prevent compiler warnings:
120 | // int result ATTRIBUTE_UNUSED = DoSomething();
121 | // assert(result == 17);
122 | #ifndef ATTRIBUTE_UNUSED
123 | #if defined(__GNUC__) || defined(__clang__)
124 | #define ATTRIBUTE_UNUSED __attribute__((unused))
125 | #else
126 | #define ATTRIBUTE_UNUSED
127 | #endif
128 | #endif
129 |
130 | // Macro to be used for switch-case fallthrough (required for enabling
131 | // -Wimplicit-fallthrough warning on Clang).
132 | #ifndef FALLTHROUGH
133 | #if defined(__clang__)
134 | #define FALLTHROUGH() [[clang::fallthrough]]
135 | #else
136 | #define FALLTHROUGH() do { } while (0)
137 | #endif
138 | #endif
139 |
140 | // Annotate a function that will not return control flow to the caller.
141 | #if defined(_MSC_VER)
142 | #define NO_RETURN __declspec(noreturn)
143 | #elif defined(__GNUC__)
144 | #define NO_RETURN __attribute__((noreturn))
145 | #else
146 | #define NO_RETURN
147 | #endif
148 |
149 | #endif // WEBRTC_TYPEDEFS_H_
150 |
--------------------------------------------------------------------------------
/cpp_onnx/wave/asr_example.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/asr_example.wav
--------------------------------------------------------------------------------
/cpp_onnx/wave/long.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/long.wav
--------------------------------------------------------------------------------
/cpp_onnx/wave/short.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/short.wav
--------------------------------------------------------------------------------
/cpp_onnx/wave/test.pcm.bytes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.bytes
--------------------------------------------------------------------------------
/cpp_onnx/wave/test.pcm.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/wave/test.pcm.wav
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3f-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3f-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/libfftw3l-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/libfftw3l-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x64/onnxruntime.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x64/onnxruntime.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3f-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3f-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/libfftw3l-3.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/libfftw3l-3.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/bin/x86/onnxruntime.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/bin/x86/onnxruntime.dll
--------------------------------------------------------------------------------
/cpp_onnx/win/images/sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/images/sample.png
--------------------------------------------------------------------------------
/cpp_onnx/win/include/cpu_provider_factory.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation. All rights reserved.
2 | // Licensed under the MIT License.
3 |
4 | #include "onnxruntime_c_api.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | /**
11 | * \param use_arena zero: false. non-zero: true.
12 | */
13 | ORT_EXPORT
14 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena)
15 | ORT_ALL_ARGS_NONNULL;
16 |
17 | #ifdef __cplusplus
18 | }
19 | #endif
20 |
--------------------------------------------------------------------------------
/cpp_onnx/win/include/onnxruntime_run_options_config_keys.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation. All rights reserved.
2 | // Licensed under the MIT License.
3 |
4 | #pragma once
5 |
6 | /*
7 | * This file defines RunOptions Config Keys and format of the Config Values.
8 | *
9 | * The Naming Convention for a RunOptions Config Key,
10 | * "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
11 | * Such as "ep.cuda.use_arena"
12 | * The Config Key cannot be empty
13 | * The maximum length of the Config Key is 128
14 | *
15 | * The string format of a RunOptions Config Value is defined individually for each Config.
16 | * The maximum length of the Config Value is 1024
17 | */
18 |
19 | // Key for enabling shrinkages of user listed device memory arenas.
20 | // Expects a list of semi-colon separated key value pairs separated by colon in the following format:
21 | // "device_0:device_id_0;device_1:device_id_1"
22 | // No white-spaces allowed in the provided list string.
23 | // Currently, the only supported devices are : "cpu", "gpu" (case sensitive).
24 | // If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled.
25 | // Example usage: "cpu:0;gpu:0" (or) "gpu:0"
26 | // By default, the value for this key is empty (i.e.) no memory arenas are shrunk
27 | static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage";
28 |
29 | // Set to '1' to not synchronize execution providers with CPU at the end of session run.
30 | // Per default it will be set to '0'
31 | // Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
32 | static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
33 |
--------------------------------------------------------------------------------
/cpp_onnx/win/include/provider_options.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation. All rights reserved.
2 | // Licensed under the MIT License.
3 |
4 | #pragma once
5 |
6 | #include
7 | #include
8 | #include
9 |
10 | namespace onnxruntime {
11 |
12 | // data types for execution provider options
13 |
14 | using ProviderOptions = std::unordered_map;
15 | using ProviderOptionsVector = std::vector;
16 | using ProviderOptionsMap = std::unordered_map;
17 |
18 | } // namespace onnxruntime
19 |
--------------------------------------------------------------------------------
/cpp_onnx/win/include/tensorrt_provider_factory.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Microsoft Corporation. All rights reserved.
2 | // Licensed under the MIT License.
3 |
4 | #include "onnxruntime_c_api.h"
5 |
6 | #ifdef __cplusplus
7 | extern "C" {
8 | #endif
9 |
10 | ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id);
11 |
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 |
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3f-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3f-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3f-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3l-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/libfftw3l-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/libfftw3l-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x64/onnxruntime.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x64/onnxruntime.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3f-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3f-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3f-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3l-3.exp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.exp
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/libfftw3l-3.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/libfftw3l-3.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/lib/x86/onnxruntime.lib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/cpp_onnx/win/lib/x86/onnxruntime.lib
--------------------------------------------------------------------------------
/cpp_onnx/win/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/python/.gitattributes:
--------------------------------------------------------------------------------
1 | *.7z filter=lfs diff=lfs merge=lfs -text
2 | *.arrow filter=lfs diff=lfs merge=lfs -text
3 | *.bin filter=lfs diff=lfs merge=lfs -text
4 | *.bz2 filter=lfs diff=lfs merge=lfs -text
5 | *.ckpt filter=lfs diff=lfs merge=lfs -text
6 | *.ftz filter=lfs diff=lfs merge=lfs -text
7 | *.gz filter=lfs diff=lfs merge=lfs -text
8 | *.h5 filter=lfs diff=lfs merge=lfs -text
9 | *.joblib filter=lfs diff=lfs merge=lfs -text
10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text
11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text
12 | *.model filter=lfs diff=lfs merge=lfs -text
13 | *.msgpack filter=lfs diff=lfs merge=lfs -text
14 | *.npy filter=lfs diff=lfs merge=lfs -text
15 | *.npz filter=lfs diff=lfs merge=lfs -text
16 | *.onnx filter=lfs diff=lfs merge=lfs -text
17 | *.ot filter=lfs diff=lfs merge=lfs -text
18 | *.parquet filter=lfs diff=lfs merge=lfs -text
19 | *.pb filter=lfs diff=lfs merge=lfs -text
20 | *.pickle filter=lfs diff=lfs merge=lfs -text
21 | *.pkl filter=lfs diff=lfs merge=lfs -text
22 | *.pt filter=lfs diff=lfs merge=lfs -text
23 | *.pth filter=lfs diff=lfs merge=lfs -text
24 | *.rar filter=lfs diff=lfs merge=lfs -text
25 | *.safetensors filter=lfs diff=lfs merge=lfs -text
26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27 | *.tar.* filter=lfs diff=lfs merge=lfs -text
28 | *.tar filter=lfs diff=lfs merge=lfs -text
29 | *.tflite filter=lfs diff=lfs merge=lfs -text
30 | *.tgz filter=lfs diff=lfs merge=lfs -text
31 | *.wasm filter=lfs diff=lfs merge=lfs -text
32 | *.xz filter=lfs diff=lfs merge=lfs -text
33 | *.zip filter=lfs diff=lfs merge=lfs -text
34 | *.zst filter=lfs diff=lfs merge=lfs -text
35 | *tfevents* filter=lfs diff=lfs merge=lfs -text
36 |
--------------------------------------------------------------------------------
/python/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://gitee.com/SWHL/autoflake
3 | rev: v2.1.1
4 | hooks:
5 | - id: autoflake
6 | args:
7 | [
8 | "--recursive",
9 | "--in-place",
10 | "--remove-all-unused-imports",
11 | "--remove-unused-variable",
12 | "--ignore-init-module-imports",
13 | ]
14 | files: \.py$
15 | - repo: https://gitee.com/SWHL/black
16 | rev: 23.1.0
17 | hooks:
18 | - id: black
19 | files: \.py$
--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
Rapid Paraformer
4 |
5 |
6 |

7 |

8 |

9 |

10 |

11 |

12 |
13 |
14 | ## 简介
15 | rapid_paraformer是一个基于阿里达摩院[Paraformer语音识别-中文-通用-16k-离线-large-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)的语音识别工具。
16 |
17 | 🎉该项目核心代码已经并入[FunASR](https://github.com/alibaba-damo-academy/FunASR)
18 |
19 | 本仓库仅对模型做了转换,只采用ONNXRuntime推理引擎
20 |
21 | ## TODO
22 | - [ ] 整合vad + asr + pun三个模型,打造可部署使用的方案
23 |
24 | ## 安装
25 | ```bash
26 | pip install rapid_paraformer
27 | ```
28 |
29 | ## 模型下载
30 | 方法一:从Hugging Face上下载([link](https://huggingface.co/SWHL/RapidParaformer))
31 | ```python
32 | from rapid_paraformer import download_hf_model
33 |
34 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".")
35 | ```
36 |
37 | 方法二:([Google Drive](https://drive.google.com/drive/folders/1RVQtMe0eB_k6G5TJlmXwPELx4VtF2oCw?usp=sharing) | [百度网盘](https://pan.baidu.com/s/1zf8Ta6QxFHY3Z75fHNYKrQ?pwd=6ekq))
38 | ```bash
39 | resources
40 | ├── [ 700] config.yaml
41 | └── [4.0K] models
42 | ├── [ 11K] am.mvn
43 | ├── [824M] asr_paraformerv2.onnx
44 | └── [ 50K] token_list.pkl
45 | ```
46 |
47 | ## 模型转换
48 | 基于modescope下的notebook环境自助转换:
49 | 1. 打开[快速体验](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)
50 | 2. 打开notebook → Cell中输入以下命令, 执行即可。
51 | ```bash
52 | !python -m funasr.export.export_model --model-name 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' --export-dir "./export"
53 | ```
54 |
55 | ## 使用
56 | ```python
57 | from rapid_paraformer import RapidParaformer
58 |
59 | config_path = "resources/config.yaml"
60 |
61 | paraformer = RapidParaformer(config_path)
62 |
63 | wav_path = [
64 | "test_wavs/0478_00017.wav",
65 | "test_wavs/asr_example_zh.wav",
66 | ]
67 |
68 | result = paraformer(wav_path)
69 | print(result)
70 | # ['y', '欢迎大家来体验达摩院推出的语音识别模型']
71 | ```
72 |
--------------------------------------------------------------------------------
/python/demo.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from rapid_paraformer import RapidParaformer, download_hf_model
5 |
6 | download_hf_model(repo_id="SWHL/RapidParaformer", save_dir=".")
7 |
8 | config_path = "resources/config.yaml"
9 |
10 | paraformer = RapidParaformer(config_path)
11 |
12 | wav_path = [
13 | "test_wavs/0478_00017.wav",
14 | "test_wavs/asr_example_zh.wav",
15 | ]
16 |
17 | print(wav_path)
18 | result = paraformer(wav_path)
19 | print(result)
20 |
--------------------------------------------------------------------------------
/python/docs/doc_whl.md:
--------------------------------------------------------------------------------
1 | See [link](https://github.com/RapidAI/RapidASR/tree/main/python) for details.
--------------------------------------------------------------------------------
/python/rapid_paraformer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from .main import RapidParaformer
5 | from .utils import download_hf_model
6 |
--------------------------------------------------------------------------------
/python/rapid_paraformer/kaldifeat/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | from .feature import compute_fbank_feats, compute_mfcc_feats, apply_cmvn_sliding
3 | from .ivector import compute_vad
4 |
--------------------------------------------------------------------------------
/python/rapid_paraformer/kaldifeat/ivector.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from .feature import sliding_window
4 |
5 |
6 | # ---------- compute-vad ----------
7 |
8 | def compute_vad(log_energy, energy_mean_scale=0.5, energy_threshold=0.5, frames_context=0, proportion_threshold=0.6):
9 | """ Apply voice activity detection
10 |
11 | :param log_energy: Log mel energy.
12 | :param energy_mean_scale: If this is set to s, to get the actual threshold we let m be the mean log-energy of the file, and use s*m + vad-energy-threshold (float, default = 0.5)
13 | :param energy_threshold: Constant term in energy threshold for VAD (also see energy_mean_scale) (float, default = 5)
14 | :param frames_context: Number of frames of context on each side of central frame, in window for which energy is monitored (int, default = 0)
15 | :param proportion_threshold: Parameter controlling the proportion of frames within the window that need to have more energy than the threshold (float, default = 0.6)
16 | :return: A vector of boolean that are True if we judge the frame voiced and False otherwise.
17 | """
18 | assert len(log_energy.shape) == 1
19 | assert energy_mean_scale >= 0
20 | assert frames_context >= 0
21 | assert 0 < proportion_threshold < 1
22 | dtype = log_energy.dtype
23 | energy_threshold += energy_mean_scale * log_energy.mean()
24 | if frames_context > 0:
25 | num_frames = len(log_energy)
26 | window_size = frames_context * 2 + 1
27 | log_energy_pad = np.concatenate([
28 | np.zeros(frames_context, dtype=dtype),
29 | log_energy,
30 | np.zeros(frames_context, dtype=dtype)
31 | ])
32 | log_energy_window = sliding_window(log_energy_pad, window_size, 1)
33 | num_count = np.count_nonzero(log_energy_window > energy_threshold, axis=1)
34 | den_count = np.ones(num_frames, dtype=dtype) * window_size
35 | max_den_count = np.arange(frames_context + 1, min(window_size, num_frames) + 1, dtype=dtype)
36 | den_count[:-(frames_context + 2):-1] = max_den_count
37 | den_count[:frames_context + 1] = np.min([den_count[:frames_context + 1], max_den_count], axis=0)
38 | vad = num_count / den_count >= proportion_threshold
39 | else:
40 | vad = log_energy > energy_threshold
41 | return vad
42 |
43 | # ---------- compute-vad ----------
44 |
--------------------------------------------------------------------------------
/python/rapid_paraformer/main.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | from pathlib import Path
5 | from typing import List, Tuple, Union
6 |
7 | import librosa
8 | import numpy as np
9 |
10 | from .utils import (
11 | CharTokenizer,
12 | Hypothesis,
13 | ONNXRuntimeError,
14 | OrtInferSession,
15 | TokenIDConverter,
16 | WavFrontend,
17 | get_logger,
18 | read_yaml,
19 | )
20 |
21 | logging = get_logger()
22 |
23 |
24 | class RapidParaformer:
25 | def __init__(self, config_path: Union[str, Path]) -> None:
26 | if not Path(config_path).exists():
27 | raise FileNotFoundError(f"{config_path} does not exist.")
28 |
29 | config = read_yaml(config_path)
30 |
31 | self.converter = TokenIDConverter(**config["TokenIDConverter"])
32 | self.tokenizer = CharTokenizer(**config["CharTokenizer"])
33 | self.frontend = WavFrontend(
34 | cmvn_file=config["WavFrontend"]["cmvn_file"],
35 | **config["WavFrontend"]["frontend_conf"],
36 | )
37 | self.ort_infer = OrtInferSession(config["Model"])
38 | self.batch_size = config["Model"]["batch_size"]
39 |
40 | def __call__(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
41 | waveform_list = self.load_data(wav_content)
42 | waveform_nums = len(waveform_list)
43 |
44 | asr_res = []
45 | for beg_idx in range(0, waveform_nums, self.batch_size):
46 | end_idx = min(waveform_nums, beg_idx + self.batch_size)
47 |
48 | feats, feats_len = self.extract_feat(waveform_list[beg_idx:end_idx])
49 |
50 | try:
51 | am_scores, valid_token_lens = self.infer(feats, feats_len)
52 | except ONNXRuntimeError:
53 | logging.warning("input wav is silence or noise")
54 | preds = []
55 | else:
56 | preds = self.decode(am_scores, valid_token_lens)
57 |
58 | asr_res.extend(preds)
59 | return asr_res
60 |
61 | def load_data(self, wav_content: Union[str, np.ndarray, List[str]]) -> List:
62 | def load_wav(path: str) -> np.ndarray:
63 | waveform, _ = librosa.load(path, sr=None)
64 | return waveform[None, ...]
65 |
66 | if isinstance(wav_content, np.ndarray):
67 | return [wav_content]
68 |
69 | if isinstance(wav_content, str):
70 | return [load_wav(wav_content)]
71 |
72 | if isinstance(wav_content, list):
73 | return [load_wav(path) for path in wav_content]
74 |
75 | raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]")
76 |
77 | def extract_feat(
78 | self, waveform_list: List[np.ndarray]
79 | ) -> Tuple[np.ndarray, np.ndarray]:
80 | feats, feats_len = [], []
81 | for waveform in waveform_list:
82 | speech, _ = self.frontend.fbank(waveform)
83 | feat, feat_len = self.frontend.lfr_cmvn(speech)
84 | feats.append(feat)
85 | feats_len.append(feat_len)
86 |
87 | feats = self.pad_feats(feats, np.max(feats_len))
88 | feats_len = np.array(feats_len).astype(np.int32)
89 | return feats, feats_len
90 |
91 | @staticmethod
92 | def pad_feats(feats: List[np.ndarray], max_feat_len: int) -> np.ndarray:
93 | def pad_feat(feat: np.ndarray, cur_len: int) -> np.ndarray:
94 | pad_width = ((0, max_feat_len - cur_len), (0, 0))
95 | return np.pad(feat, pad_width, "constant", constant_values=0)
96 |
97 | feat_res = [pad_feat(feat, feat.shape[0]) for feat in feats]
98 | feats = np.array(feat_res).astype(np.float32)
99 | return feats
100 |
101 | def infer(
102 | self, feats: np.ndarray, feats_len: np.ndarray
103 | ) -> Tuple[np.ndarray, np.ndarray]:
104 | am_scores, token_nums = self.ort_infer([feats, feats_len])
105 | return am_scores, token_nums
106 |
107 | def decode(self, am_scores: np.ndarray, token_nums: int) -> List[str]:
108 | return [
109 | self.decode_one(am_score, token_num)
110 | for am_score, token_num in zip(am_scores, token_nums)
111 | ]
112 |
113 | def decode_one(self, am_score: np.ndarray, valid_token_num: int) -> List[str]:
114 | yseq = am_score.argmax(axis=-1)
115 | score = am_score.max(axis=-1)
116 | score = np.sum(score, axis=-1)
117 |
118 | # pad with mask tokens to ensure compatibility with sos/eos tokens
119 | # asr_model.sos:1 asr_model.eos:2
120 | yseq = np.array([1] + yseq.tolist() + [2])
121 | hyp = Hypothesis(yseq=yseq, score=score)
122 |
123 | # remove sos/eos and get results
124 | last_pos = -1
125 | token_int = hyp.yseq[1:last_pos].tolist()
126 |
127 | # remove blank symbol id, which is assumed to be 0
128 | token_int = list(filter(lambda x: x not in (0, 2), token_int))
129 |
130 | # Change integer-ids to tokens
131 | token = self.converter.ids2tokens(token_int)
132 | text = self.tokenizer.tokens2text(token)
133 | return text[: valid_token_num - 1]
134 |
135 |
136 | if __name__ == "__main__":
137 | project_dir = Path(__file__).resolve().parent.parent
138 | cfg_path = project_dir / "resources" / "config.yaml"
139 | paraformer = RapidParaformer(cfg_path)
140 |
141 | wav_file = "0478_00017.wav"
142 | for i in range(1000):
143 | result = paraformer(wav_file)
144 | print(result)
145 |
--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | librosa
2 | numpy
3 | onnxruntime
4 | typeguard==2.13.3
5 | huggingface_hub
--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import sys
5 | from pathlib import Path
6 | from typing import List, Union
7 |
8 | import setuptools
9 | from get_pypi_latest_version import GetPyPiLatestVersion
10 |
11 |
12 | def read_txt(txt_path: Union[Path, str]) -> List[str]:
13 | with open(txt_path, "r", encoding="utf-8") as f:
14 | data = [v.rstrip("\n") for v in f]
15 | return data
16 |
17 |
18 | def get_readme() -> str:
19 | root_dir = Path(__file__).resolve().parent
20 | readme_path = str(root_dir / "docs" / "doc_whl.md")
21 | with open(readme_path, "r", encoding="utf-8") as f:
22 | readme = f.read()
23 | return readme
24 |
25 |
26 | MODULE_NAME = "rapid_paraformer"
27 |
28 | obtainer = GetPyPiLatestVersion()
29 | try:
30 | latest_version = obtainer(MODULE_NAME)
31 | except ValueError:
32 | latest_version = "0.0.1"
33 |
34 | VERSION_NUM = obtainer.version_add_one(latest_version)
35 |
36 | if len(sys.argv) > 2:
37 | match_str = " ".join(sys.argv[2:])
38 | matched_versions = obtainer.extract_version(match_str)
39 | if matched_versions:
40 | VERSION_NUM = matched_versions
41 | sys.argv = sys.argv[:2]
42 |
43 | setuptools.setup(
44 | name=MODULE_NAME,
45 | version=VERSION_NUM,
46 | platforms="Any",
47 | description="Tool of speech recognition.",
48 | long_description=get_readme(),
49 | long_description_content_type="text/markdown",
50 | author="SWHL",
51 | author_email="liekkaskono@163.com",
52 | url="https://github.com/RapidAI/RapidASR",
53 | license="Apache-2.0",
54 | include_package_data=True,
55 | install_requires=read_txt("requirements.txt"),
56 | packages=[MODULE_NAME, f"{MODULE_NAME}/kaldifeat"],
57 | package_data={"": ["*.md", "LICENSE"]},
58 | keywords=["asr,paraformer,wenet"],
59 | classifiers=[
60 | "Programming Language :: Python :: 3.6",
61 | "Programming Language :: Python :: 3.7",
62 | "Programming Language :: Python :: 3.8",
63 | "Programming Language :: Python :: 3.9",
64 | "Programming Language :: Python :: 3.10",
65 | "Programming Language :: Python :: 3.11",
66 | "Programming Language :: Python :: 3.12",
67 | ],
68 | python_requires=">=3.6,<3.13",
69 | entry_points={
70 | "console_scripts": [f"{MODULE_NAME}={MODULE_NAME}.main:main"],
71 | },
72 | )
73 |
--------------------------------------------------------------------------------
/python/test_wavs/0478_00017.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/0478_00017.wav
--------------------------------------------------------------------------------
/python/test_wavs/asr_example_zh.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RapidAI/RapidASR/c4f76bbb5d40a9554308ea1b533a90acbc7efc20/python/test_wavs/asr_example_zh.wav
--------------------------------------------------------------------------------
/python/tests/test_infer.py:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 | # @Author: SWHL
3 | # @Contact: liekkaskono@163.com
4 | import os
5 | from pathlib import Path
6 |
7 | import pytest
8 | import librosa
9 |
10 | project_dir = Path(__file__).resolve().parent.parent
11 | os.sys.path.append(str(project_dir))
12 |
13 | from rapid_paraformer import RapidParaformer
14 |
15 |
16 | cfg_path = project_dir / 'resources' / 'config.yaml'
17 | paraformer = RapidParaformer(cfg_path)
18 |
19 |
20 | def test_input_by_path():
21 | wav_file = 'test_wavs/0478_00017.wav'
22 | result = paraformer(wav_file)
23 | assert result[0][:5] == '呃说不配合'
24 |
25 |
26 | def test_input_by_ndarray():
27 | wav_file = 'test_wavs/0478_00017.wav'
28 | waveform, _ = librosa.load(wav_file)
29 | result = paraformer(waveform[None, ...])
30 | assert result[0][:5] == '呃说不配合'
31 |
32 |
33 | def test_input_by_str_list():
34 | wave_list = [
35 | 'test_wavs/0478_00017.wav',
36 | 'test_wavs/asr_example_zh.wav',
37 | ]
38 | result = paraformer(wave_list)
39 | assert result[0][:5] == '呃说不配合'
40 |
41 |
42 | def test_empty():
43 | wav_file = None
44 | with pytest.raises(TypeError) as exc_info:
45 | paraformer(wav_file)
46 | raise TypeError()
47 | assert exc_info.type is TypeError
48 |
--------------------------------------------------------------------------------