├── .gitignore ├── LICENSE ├── mk ├── Android │ ├── Android.mk │ ├── build_for_android.sh │ └── webrtc_vad.h └── iOS │ └── webrtcvad.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ └── contents.xcworkspacedata └── webrtc ├── common_audio ├── signal_processing │ ├── complex_bit_reverse.c │ ├── complex_fft.c │ ├── complex_fft_tables.h │ ├── cross_correlation.c │ ├── division_operations.c │ ├── downsample_fast.c │ ├── energy.c │ ├── get_scaling_square.c │ ├── include │ │ ├── real_fft.h │ │ ├── signal_processing_library.h │ │ └── spl_inl.h │ ├── min_max_operations.c │ ├── real_fft.c │ ├── resample_48khz.c │ ├── resample_by_2_internal.c │ ├── resample_by_2_internal.h │ ├── resample_fractional.c │ ├── spl_init.c │ └── vector_scaling_operations.c └── vad │ ├── include │ └── webrtc_vad.h │ ├── vad_core.c │ ├── vad_core.h │ ├── vad_filterbank.c │ ├── vad_filterbank.h │ ├── vad_gmm.c │ ├── vad_gmm.h │ ├── vad_sp.c │ ├── vad_sp.h │ └── webrtc_vad.c ├── system_wrappers └── interface │ └── cpu_features_wrapper.h └── typedefs.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | ## Build generated 6 | build/ 7 | DerivedData/ 8 | 9 | ## Various settings 10 | *.pbxuser 11 | !default.pbxuser 12 | *.mode1v3 13 | !default.mode1v3 14 | *.mode2v3 15 | !default.mode2v3 16 | *.perspectivev3 17 | !default.perspectivev3 18 | xcuserdata/ 19 | 20 | ## Other 21 | *.moved-aside 22 | *.xccheckout 23 | *.xcscmblueprint 24 | 25 | ## Obj-C/Swift specific 26 | *.hmap 27 | *.ipa 28 | *.dSYM.zip 29 | *.dSYM 30 | 31 | # CocoaPods 32 | # 33 | # We recommend against adding the Pods directory to your .gitignore. However 34 | # you should judge for yourself, the pros and cons are mentioned at: 35 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 36 | # 37 | # Pods/ 38 | 39 | # Carthage 40 | # 41 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 42 | # Carthage/Checkouts 43 | 44 | Carthage/Build 45 | 46 | # fastlane 47 | # 48 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 49 | # screenshots whenever they are needed. 50 | # For more information about the recommended setup visit: 51 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 52 | 53 | fastlane/report.xml 54 | fastlane/Preview.html 55 | fastlane/screenshots 56 | fastlane/test_output 57 | 58 | # Code Injection 59 | # 60 | # After new code Injection tools there's a generated folder /iOSInjectionProject 61 | # https://github.com/johnno1962/injectionforxcode 62 | 63 | iOSInjectionProject/ 64 | 65 | output 66 | *.o 67 | *.a 68 | *.obj 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /mk/Android/Android.mk: -------------------------------------------------------------------------------- 1 | #################################################################################### 2 | # Author : yanyongjie 3 | # Date : 2017/07/13 4 | #################################################################################### 5 | 6 | MY_DIR_PATH := $(call my-dir) 7 | MCOM_PATH := $(MY_DIR_PATH) 8 | LOCAL_PATH := $(MCOM_PATH)/../../ 9 | 10 | include $(CLEAR_VARS) 11 | 12 | LOCAL_MODULE := libwebrtc_vad 13 | LOCAL_MODULE_FILENAME := libwebrtc_vad 14 | LOCAL_CFLAGS := -D_ANDROID 15 | LOCAL_LDLIBS := -llog -lm -lz 16 | 17 | LOCAL_LDFLAGS := -Wl,--no-warn-mismatch -fPIE -fPIC -Wl,--fix-cortex-a8 18 | LOCAL_ALLOW_UNDEFINED_SYMBOLS := true 19 | 20 | INC_DIRS = \ 21 | -I$(LOCAL_PATH)/../../webrtc\ 22 | -I$(LOCAL_PATH)/../../webrtc/common_audio/signal_processing/\ 23 | -I$(LOCAL_PATH)/../../webrtc/common_audio/signal_processing/include\ 24 | -I$(LOCAL_PATH)/../../webrtc/common_audio/vad\ 25 | -I$(LOCAL_PATH)/../../webrtc/common_audio/vad/include\ 26 | 27 | LOCAL_CFLAGS += $(INC_DIRS) 28 | 29 | LOCAL_SRC_FILES := \ 30 | webrtc/common_audio/vad/vad_core.c\ 31 | webrtc/common_audio/vad/vad_filterbank.c\ 32 | webrtc/common_audio/vad/vad_gmm.c\ 33 | webrtc/common_audio/vad/vad_sp.c\ 34 | webrtc/common_audio/vad/webrtc_vad.c\ 35 | webrtc/common_audio/signal_processing/complex_bit_reverse.c\ 36 | webrtc/common_audio/signal_processing/complex_fft.c\ 37 | webrtc/common_audio/signal_processing/cross_correlation.c\ 38 | webrtc/common_audio/signal_processing/division_operations.c\ 39 | webrtc/common_audio/signal_processing/downsample_fast.c\ 40 | webrtc/common_audio/signal_processing/energy.c\ 41 | webrtc/common_audio/signal_processing/get_scaling_square.c\ 42 | webrtc/common_audio/signal_processing/min_max_operations.c\ 43 | webrtc/common_audio/signal_processing/real_fft.c\ 44 | webrtc/common_audio/signal_processing/resample_48khz.c\ 45 | webrtc/common_audio/signal_processing/resample_by_2_internal.c\ 46 | webrtc/common_audio/signal_processing/resample_fractional.c\ 47 | webrtc/common_audio/signal_processing/spl_init.c\ 48 | webrtc/common_audio/signal_processing/vector_scaling_operations.c\ 49 | 50 | 51 | include $(BUILD_STATIC_LIBRARY) 52 | -------------------------------------------------------------------------------- /mk/Android/build_for_android.sh: -------------------------------------------------------------------------------- 1 | rm -rf obj 2 | rm -rf ../../output 3 | ndk-build NDK_PROJECT_PATH=. APP_BUILD_SCRIPT=./Android.mk 4 | 5 | mkdir ../../output 6 | mv obj/local/* ../../output 7 | 8 | mkdir ../../output/include 9 | cp ../../webrtc/typedefs.h ../../output/include 10 | cp ./webrtc_vad.h ../../output/include 11 | 12 | rm -rf obj 13 | 14 | cd ../../output 15 | 16 | echo "Install Success, output path is `pwd`" 17 | -------------------------------------------------------------------------------- /mk/Android/webrtc_vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file includes the VAD API calls. Specific function calls are given below. 14 | */ 15 | 16 | #ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 17 | #define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18 | 19 | #include "typedefs.h" 20 | 21 | typedef struct WebRtcVadInst VadInst; 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | // Creates an instance to the VAD structure. 28 | // 29 | // - handle [o] : Pointer to the VAD instance that should be created. 30 | // 31 | // returns : 0 - (OK), -1 - (Error) 32 | int WebRtcVad_Create(VadInst** handle); 33 | 34 | // Frees the dynamic memory of a specified VAD instance. 35 | // 36 | // - handle [i] : Pointer to VAD instance that should be freed. 37 | void WebRtcVad_Free(VadInst* handle); 38 | 39 | // Initializes a VAD instance. 40 | // 41 | // - handle [i/o] : Instance that should be initialized. 42 | // 43 | // returns : 0 - (OK), 44 | // -1 - (NULL pointer or Default mode could not be set). 45 | int WebRtcVad_Init(VadInst* handle); 46 | 47 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 48 | // restrictive in reporting speech. Put in other words the probability of being 49 | // speech when the VAD returns 1 is increased with increasing mode. As a 50 | // consequence also the missed detection rate goes up. 51 | // 52 | // - handle [i/o] : VAD instance. 53 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 54 | // 55 | // returns : 0 - (OK), 56 | // -1 - (NULL pointer, mode could not be set or the VAD instance 57 | // has not been initialized). 58 | int WebRtcVad_set_mode(VadInst* handle, int mode); 59 | 60 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates 61 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 62 | // 63 | // - handle [i/o] : VAD Instance. Needs to be initialized by 64 | // WebRtcVad_Init() before call. 65 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 66 | // - audio_frame [i] : Audio frame buffer. 67 | // - frame_length [i] : Length of audio frame buffer in number of samples. 68 | // 69 | // returns : 1 - (Active Voice), 70 | // 0 - (Non-active Voice), 71 | // -1 - (Error) 72 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 73 | int frame_length); 74 | 75 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, 76 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 77 | // 78 | // - rate [i] : Sampling frequency (Hz). 79 | // - frame_length [i] : Speech frame buffer length in number of samples. 80 | // 81 | // returns : 0 - (valid combination), -1 - (invalid combination) 82 | int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | 88 | #endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 89 | -------------------------------------------------------------------------------- /mk/iOS/webrtcvad.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/complex_bit_reverse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | /* Tables for data buffer indexes that are bit reversed and thus need to be 14 | * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap 15 | * operations, while index_7[{1, 3, 5, ...}] are for the right side of the 16 | * operation. Same for index_8. 17 | */ 18 | 19 | /* Indexes for the case of stages == 7. */ 20 | static const int16_t index_7[112] = { 21 | 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, 22 | 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, 23 | 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, 24 | 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, 25 | 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, 26 | 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, 27 | 103, 115, 111, 123 28 | }; 29 | 30 | /* Indexes for the case of stages == 8. */ 31 | static const int16_t index_8[240] = { 32 | 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, 33 | 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, 34 | 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, 35 | 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, 36 | 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, 37 | 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, 38 | 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, 39 | 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, 40 | 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, 41 | 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, 42 | 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, 43 | 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, 44 | 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, 45 | 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, 46 | 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 47 | }; 48 | 49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { 50 | /* For any specific value of stages, we know exactly the indexes that are 51 | * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of 52 | * stages are 7 and 8, so we use tables to save unnecessary iterations and 53 | * calculations for these two cases. 54 | */ 55 | if (stages == 7 || stages == 8) { 56 | int m = 0; 57 | int length = 112; 58 | const int16_t* index = index_7; 59 | 60 | if (stages == 8) { 61 | length = 240; 62 | index = index_8; 63 | } 64 | 65 | /* Decimation in time. Swap the elements with bit-reversed indexes. */ 66 | for (m = 0; m < length; m += 2) { 67 | /* We declare a int32_t* type pointer, to load both the 16-bit real 68 | * and imaginary elements from complex_data in one instruction, reducing 69 | * complexity. 70 | */ 71 | int32_t* complex_data_ptr = (int32_t*)complex_data; 72 | int32_t temp = 0; 73 | 74 | temp = complex_data_ptr[index[m]]; /* Real and imaginary */ 75 | complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; 76 | complex_data_ptr[index[m + 1]] = temp; 77 | } 78 | } 79 | else { 80 | int m = 0, mr = 0, l = 0; 81 | int n = 1 << stages; 82 | int nn = n - 1; 83 | 84 | /* Decimation in time - re-order data */ 85 | for (m = 1; m <= nn; ++m) { 86 | int32_t* complex_data_ptr = (int32_t*)complex_data; 87 | int32_t temp = 0; 88 | 89 | /* Find out indexes that are bit-reversed. */ 90 | l = n; 91 | do { 92 | l >>= 1; 93 | } while (l > nn - mr); 94 | mr = (mr & (l - 1)) + l; 95 | 96 | if (mr <= m) { 97 | continue; 98 | } 99 | 100 | /* Swap the elements with bit-reversed indexes. 101 | * This is similar to the loop in the stages == 7 or 8 cases. 102 | */ 103 | temp = complex_data_ptr[m]; /* Real and imaginary */ 104 | complex_data_ptr[m] = complex_data_ptr[mr]; 105 | complex_data_ptr[mr] = temp; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/complex_fft.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_ComplexFFT(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/complex_fft_tables.h" 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 | 21 | #define CFFTSFT 14 22 | #define CFFTRND 1 23 | #define CFFTRND2 16384 24 | 25 | #define CIFFTSFT 14 26 | #define CIFFTRND 1 27 | 28 | 29 | int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) 30 | { 31 | int i, j, l, k, istep, n, m; 32 | int16_t wr, wi; 33 | int32_t tr32, ti32, qr32, qi32; 34 | 35 | /* The 1024-value is a constant given from the size of kSinTable1024[], 36 | * and should not be changed depending on the input parameter 'stages' 37 | */ 38 | n = 1 << stages; 39 | if (n > 1024) 40 | return -1; 41 | 42 | l = 1; 43 | k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change 44 | depending on the input parameter 'stages' */ 45 | 46 | if (mode == 0) 47 | { 48 | // mode==0: Low-complexity and Low-accuracy mode 49 | while (l < n) 50 | { 51 | istep = l << 1; 52 | 53 | for (m = 0; m < l; ++m) 54 | { 55 | j = m << k; 56 | 57 | /* The 256-value is a constant given as 1/4 of the size of 58 | * kSinTable1024[], and should not be changed depending on the input 59 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 60 | */ 61 | wr = kSinTable1024[j + 256]; 62 | wi = -kSinTable1024[j]; 63 | 64 | for (i = m; i < n; i += istep) 65 | { 66 | j = i + l; 67 | 68 | tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; 69 | 70 | ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; 71 | 72 | qr32 = (int32_t)frfi[2 * i]; 73 | qi32 = (int32_t)frfi[2 * i + 1]; 74 | frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); 75 | frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); 76 | frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); 77 | frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); 78 | } 79 | } 80 | 81 | --k; 82 | l = istep; 83 | 84 | } 85 | 86 | } else 87 | { 88 | // mode==1: High-complexity and High-accuracy mode 89 | while (l < n) 90 | { 91 | istep = l << 1; 92 | 93 | for (m = 0; m < l; ++m) 94 | { 95 | j = m << k; 96 | 97 | /* The 256-value is a constant given as 1/4 of the size of 98 | * kSinTable1024[], and should not be changed depending on the input 99 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 100 | */ 101 | wr = kSinTable1024[j + 256]; 102 | wi = -kSinTable1024[j]; 103 | 104 | #ifdef WEBRTC_ARCH_ARM_V7 105 | int32_t wri = 0; 106 | __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : 107 | "r"((int32_t)wr), "r"((int32_t)wi)); 108 | #endif 109 | 110 | for (i = m; i < n; i += istep) 111 | { 112 | j = i + l; 113 | 114 | #ifdef WEBRTC_ARCH_ARM_V7 115 | register int32_t frfi_r; 116 | __asm __volatile( 117 | "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," 118 | " lsl #16\n\t" 119 | "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" 120 | "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" 121 | :[frfi_r]"=&r"(frfi_r), 122 | [tr32]"=&r"(tr32), 123 | [ti32]"=r"(ti32) 124 | :[frfi_even]"r"((int32_t)frfi[2*j]), 125 | [frfi_odd]"r"((int32_t)frfi[2*j +1]), 126 | [wri]"r"(wri), 127 | [cfftrnd]"r"(CFFTRND)); 128 | #else 129 | tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j]) 130 | - WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CFFTRND; 131 | 132 | ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1]) 133 | + WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CFFTRND; 134 | #endif 135 | 136 | tr32 >>= 15 - CFFTSFT; 137 | ti32 >>= 15 - CFFTSFT; 138 | 139 | qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT; 140 | qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT; 141 | 142 | frfi[2 * j] = (int16_t)( 143 | (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); 144 | frfi[2 * j + 1] = (int16_t)( 145 | (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); 146 | frfi[2 * i] = (int16_t)( 147 | (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); 148 | frfi[2 * i + 1] = (int16_t)( 149 | (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); 150 | } 151 | } 152 | 153 | --k; 154 | l = istep; 155 | } 156 | } 157 | return 0; 158 | } 159 | 160 | int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) 161 | { 162 | int i, j, l, k, istep, n, m, scale, shift; 163 | int16_t wr, wi; 164 | int32_t tr32, ti32, qr32, qi32; 165 | int32_t tmp32, round2; 166 | 167 | /* The 1024-value is a constant given from the size of kSinTable1024[], 168 | * and should not be changed depending on the input parameter 'stages' 169 | */ 170 | n = 1 << stages; 171 | if (n > 1024) 172 | return -1; 173 | 174 | scale = 0; 175 | 176 | l = 1; 177 | k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change 178 | depending on the input parameter 'stages' */ 179 | 180 | while (l < n) 181 | { 182 | // variable scaling, depending upon data 183 | shift = 0; 184 | round2 = 8192; 185 | 186 | tmp32 = (int32_t)WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); 187 | if (tmp32 > 13573) 188 | { 189 | shift++; 190 | scale++; 191 | round2 <<= 1; 192 | } 193 | if (tmp32 > 27146) 194 | { 195 | shift++; 196 | scale++; 197 | round2 <<= 1; 198 | } 199 | 200 | istep = l << 1; 201 | 202 | if (mode == 0) 203 | { 204 | // mode==0: Low-complexity and Low-accuracy mode 205 | for (m = 0; m < l; ++m) 206 | { 207 | j = m << k; 208 | 209 | /* The 256-value is a constant given as 1/4 of the size of 210 | * kSinTable1024[], and should not be changed depending on the input 211 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 212 | */ 213 | wr = kSinTable1024[j + 256]; 214 | wi = kSinTable1024[j]; 215 | 216 | for (i = m; i < n; i += istep) 217 | { 218 | j = i + l; 219 | 220 | tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; 221 | 222 | ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; 223 | 224 | qr32 = (int32_t)frfi[2 * i]; 225 | qi32 = (int32_t)frfi[2 * i + 1]; 226 | frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); 227 | frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); 228 | frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); 229 | frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); 230 | } 231 | } 232 | } else 233 | { 234 | // mode==1: High-complexity and High-accuracy mode 235 | 236 | for (m = 0; m < l; ++m) 237 | { 238 | j = m << k; 239 | 240 | /* The 256-value is a constant given as 1/4 of the size of 241 | * kSinTable1024[], and should not be changed depending on the input 242 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 243 | */ 244 | wr = kSinTable1024[j + 256]; 245 | wi = kSinTable1024[j]; 246 | 247 | #ifdef WEBRTC_ARCH_ARM_V7 248 | int32_t wri = 0; 249 | __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : 250 | "r"((int32_t)wr), "r"((int32_t)wi)); 251 | #endif 252 | 253 | for (i = m; i < n; i += istep) 254 | { 255 | j = i + l; 256 | 257 | #ifdef WEBRTC_ARCH_ARM_V7 258 | register int32_t frfi_r; 259 | __asm __volatile( 260 | "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" 261 | "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" 262 | "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" 263 | :[frfi_r]"=&r"(frfi_r), 264 | [tr32]"=&r"(tr32), 265 | [ti32]"=r"(ti32) 266 | :[frfi_even]"r"((int32_t)frfi[2*j]), 267 | [frfi_odd]"r"((int32_t)frfi[2*j +1]), 268 | [wri]"r"(wri), 269 | [cifftrnd]"r"(CIFFTRND) 270 | ); 271 | #else 272 | 273 | tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j]) 274 | - WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CIFFTRND; 275 | 276 | ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1]) 277 | + WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CIFFTRND; 278 | #endif 279 | tr32 >>= 15 - CIFFTSFT; 280 | ti32 >>= 15 - CIFFTSFT; 281 | 282 | qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT; 283 | qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT; 284 | 285 | frfi[2 * j] = (int16_t)( 286 | (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); 287 | frfi[2 * j + 1] = (int16_t)( 288 | (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); 289 | frfi[2 * i] = (int16_t)( 290 | (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); 291 | frfi[2 * i + 1] = (int16_t)( 292 | (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); 293 | } 294 | } 295 | 296 | } 297 | --k; 298 | l = istep; 299 | } 300 | return scale; 301 | } 302 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/complex_fft_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | #ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 13 | #define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 14 | 15 | #include "webrtc/typedefs.h" 16 | 17 | static const int16_t kSinTable1024[] = { 18 | 0, 201, 402, 603, 804, 1005, 1206, 1406, 19 | 1607, 1808, 2009, 2209, 2410, 2610, 2811, 3011, 20 | 3211, 3411, 3611, 3811, 4011, 4210, 4409, 4608, 21 | 4807, 5006, 5205, 5403, 5601, 5799, 5997, 6195, 22 | 6392, 6589, 6786, 6982, 7179, 7375, 7571, 7766, 23 | 7961, 8156, 8351, 8545, 8739, 8932, 9126, 9319, 24 | 9511, 9703, 9895, 10087, 10278, 10469, 10659, 10849, 25 | 11038, 11227, 11416, 11604, 11792, 11980, 12166, 12353, 26 | 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827, 27 | 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, 28 | 15446, 15623, 15799, 15975, 16150, 16325, 16499, 16672, 29 | 16845, 17017, 17189, 17360, 17530, 17699, 17868, 18036, 30 | 18204, 18371, 18537, 18702, 18867, 19031, 19194, 19357, 31 | 19519, 19680, 19840, 20000, 20159, 20317, 20474, 20631, 32 | 20787, 20942, 21096, 21249, 21402, 21554, 21705, 21855, 33 | 22004, 22153, 22301, 22448, 22594, 22739, 22883, 23027, 34 | 23169, 23311, 23452, 23592, 23731, 23869, 24006, 24143, 35 | 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201, 36 | 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, 37 | 26318, 26437, 26556, 26673, 26789, 26905, 27019, 27132, 38 | 27244, 27355, 27466, 27575, 27683, 27790, 27896, 28001, 39 | 28105, 28208, 28309, 28410, 28510, 28608, 28706, 28802, 40 | 28897, 28992, 29085, 29177, 29268, 29358, 29446, 29534, 41 | 29621, 29706, 29790, 29873, 29955, 30036, 30116, 30195, 42 | 30272, 30349, 30424, 30498, 30571, 30643, 30713, 30783, 43 | 30851, 30918, 30984, 31049, 31113, 31175, 31236, 31297, 44 | 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735, 45 | 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, 46 | 32137, 32176, 32213, 32249, 32284, 32318, 32350, 32382, 47 | 32412, 32441, 32468, 32495, 32520, 32544, 32567, 32588, 48 | 32609, 32628, 32646, 32662, 32678, 32692, 32705, 32717, 49 | 32727, 32736, 32744, 32751, 32757, 32761, 32764, 32766, 50 | 32767, 32766, 32764, 32761, 32757, 32751, 32744, 32736, 51 | 32727, 32717, 32705, 32692, 32678, 32662, 32646, 32628, 52 | 32609, 32588, 32567, 32544, 32520, 32495, 32468, 32441, 53 | 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176, 54 | 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, 55 | 31785, 31735, 31684, 31633, 31580, 31525, 31470, 31413, 56 | 31356, 31297, 31236, 31175, 31113, 31049, 30984, 30918, 57 | 30851, 30783, 30713, 30643, 30571, 30498, 30424, 30349, 58 | 30272, 30195, 30116, 30036, 29955, 29873, 29790, 29706, 59 | 29621, 29534, 29446, 29358, 29268, 29177, 29085, 28992, 60 | 28897, 28802, 28706, 28608, 28510, 28410, 28309, 28208, 61 | 28105, 28001, 27896, 27790, 27683, 27575, 27466, 27355, 62 | 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437, 63 | 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, 64 | 25329, 25201, 25072, 24942, 24811, 24679, 24546, 24413, 65 | 24278, 24143, 24006, 23869, 23731, 23592, 23452, 23311, 66 | 23169, 23027, 22883, 22739, 22594, 22448, 22301, 22153, 67 | 22004, 21855, 21705, 21554, 21402, 21249, 21096, 20942, 68 | 20787, 20631, 20474, 20317, 20159, 20000, 19840, 19680, 69 | 19519, 19357, 19194, 19031, 18867, 18702, 18537, 18371, 70 | 18204, 18036, 17868, 17699, 17530, 17360, 17189, 17017, 71 | 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623, 72 | 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, 73 | 14009, 13827, 13645, 13462, 13278, 13094, 12909, 12724, 74 | 12539, 12353, 12166, 11980, 11792, 11604, 11416, 11227, 75 | 11038, 10849, 10659, 10469, 10278, 10087, 9895, 9703, 76 | 9511, 9319, 9126, 8932, 8739, 8545, 8351, 8156, 77 | 7961, 7766, 7571, 7375, 7179, 6982, 6786, 6589, 78 | 6392, 6195, 5997, 5799, 5601, 5403, 5205, 5006, 79 | 4807, 4608, 4409, 4210, 4011, 3811, 3611, 3411, 80 | 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808, 81 | 1607, 1406, 1206, 1005, 804, 603, 402, 201, 82 | 0, -201, -402, -603, -804, -1005, -1206, -1406, 83 | -1607, -1808, -2009, -2209, -2410, -2610, -2811, -3011, 84 | -3211, -3411, -3611, -3811, -4011, -4210, -4409, -4608, 85 | -4807, -5006, -5205, -5403, -5601, -5799, -5997, -6195, 86 | -6392, -6589, -6786, -6982, -7179, -7375, -7571, -7766, 87 | -7961, -8156, -8351, -8545, -8739, -8932, -9126, -9319, 88 | -9511, -9703, -9895, -10087, -10278, -10469, -10659, -10849, 89 | -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353, 90 | -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, 91 | -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268, 92 | -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672, 93 | -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036, 94 | -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357, 95 | -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631, 96 | -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855, 97 | -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027, 98 | -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143, 99 | -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, 100 | -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198, 101 | -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132, 102 | -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001, 103 | -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802, 104 | -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534, 105 | -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195, 106 | -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783, 107 | -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297, 108 | -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, 109 | -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097, 110 | -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382, 111 | -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588, 112 | -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717, 113 | -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766, 114 | -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736, 115 | -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628, 116 | -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441, 117 | -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, 118 | -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833, 119 | -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413, 120 | -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918, 121 | -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349, 122 | -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706, 123 | -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992, 124 | -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208, 125 | -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355, 126 | -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, 127 | -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456, 128 | -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413, 129 | -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311, 130 | -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153, 131 | -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942, 132 | -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680, 133 | -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371, 134 | -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017, 135 | -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, 136 | -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191, 137 | -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724, 138 | -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227, 139 | -11038, -10849, -10659, -10469, -10278, -10087, -9895, -9703, 140 | -9511, -9319, -9126, -8932, -8739, -8545, -8351, -8156, 141 | -7961, -7766, -7571, -7375, -7179, -6982, -6786, -6589, 142 | -6392, -6195, -5997, -5799, -5601, -5403, -5205, -5006, 143 | -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411, 144 | -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, 145 | -1607, -1406, -1206, -1005, -804, -603, -402, -201 146 | }; 147 | 148 | #endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 149 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/cross_correlation.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ 14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, 15 | const int16_t* seq1, 16 | const int16_t* seq2, 17 | int16_t dim_seq, 18 | int16_t dim_cross_correlation, 19 | int16_t right_shifts, 20 | int16_t step_seq2) { 21 | int i = 0, j = 0; 22 | 23 | for (i = 0; i < dim_cross_correlation; i++) { 24 | *cross_correlation = 0; 25 | /* Unrolling doesn't seem to improve performance. */ 26 | for (j = 0; j < dim_seq; j++) { 27 | *cross_correlation += (seq1[j] * seq2[step_seq2 * i + j]) >> right_shifts; 28 | } 29 | cross_correlation++; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/division_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the divisions 14 | * WebRtcSpl_DivU32U16() 15 | * WebRtcSpl_DivW32W16() 16 | * WebRtcSpl_DivW32W16ResW16() 17 | * WebRtcSpl_DivResultInQ31() 18 | * WebRtcSpl_DivW32HiLow() 19 | * 20 | * The description header can be found in signal_processing_library.h 21 | * 22 | */ 23 | 24 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 25 | 26 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) 27 | { 28 | // Guard against division with 0 29 | if (den != 0) 30 | { 31 | return (uint32_t)(num / den); 32 | } else 33 | { 34 | return (uint32_t)0xFFFFFFFF; 35 | } 36 | } 37 | 38 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) 39 | { 40 | // Guard against division with 0 41 | if (den != 0) 42 | { 43 | return (int32_t)(num / den); 44 | } else 45 | { 46 | return (int32_t)0x7FFFFFFF; 47 | } 48 | } 49 | 50 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) 51 | { 52 | // Guard against division with 0 53 | if (den != 0) 54 | { 55 | return (int16_t)(num / den); 56 | } else 57 | { 58 | return (int16_t)0x7FFF; 59 | } 60 | } 61 | 62 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) 63 | { 64 | int32_t L_num = num; 65 | int32_t L_den = den; 66 | int32_t div = 0; 67 | int k = 31; 68 | int change_sign = 0; 69 | 70 | if (num == 0) 71 | return 0; 72 | 73 | if (num < 0) 74 | { 75 | change_sign++; 76 | L_num = -num; 77 | } 78 | if (den < 0) 79 | { 80 | change_sign++; 81 | L_den = -den; 82 | } 83 | while (k--) 84 | { 85 | div <<= 1; 86 | L_num <<= 1; 87 | if (L_num >= L_den) 88 | { 89 | L_num -= L_den; 90 | div++; 91 | } 92 | } 93 | if (change_sign == 1) 94 | { 95 | div = -div; 96 | } 97 | return div; 98 | } 99 | 100 | int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) 101 | { 102 | int16_t approx, tmp_hi, tmp_low, num_hi, num_low; 103 | int32_t tmpW32; 104 | 105 | approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); 106 | // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) 107 | 108 | // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) 109 | tmpW32 = (WEBRTC_SPL_MUL_16_16(den_hi, approx) << 1) 110 | + ((WEBRTC_SPL_MUL_16_16(den_low, approx) >> 15) << 1); 111 | // tmpW32 = den * approx 112 | 113 | tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx)) 114 | 115 | // Store tmpW32 in hi and low format 116 | tmp_hi = (int16_t)(tmpW32 >> 16); 117 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 118 | 119 | // tmpW32 = 1/den in Q29 120 | tmpW32 = ((WEBRTC_SPL_MUL_16_16(tmp_hi, approx) + (WEBRTC_SPL_MUL_16_16(tmp_low, approx) 121 | >> 15)) << 1); 122 | 123 | // 1/den in hi and low format 124 | tmp_hi = (int16_t)(tmpW32 >> 16); 125 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 126 | 127 | // Store num in hi and low format 128 | num_hi = (int16_t)(num >> 16); 129 | num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); 130 | 131 | // num * (1/den) by 32 bit multiplication (result in Q28) 132 | 133 | tmpW32 = (WEBRTC_SPL_MUL_16_16(num_hi, tmp_hi) + (WEBRTC_SPL_MUL_16_16(num_hi, tmp_low) 134 | >> 15) + (WEBRTC_SPL_MUL_16_16(num_low, tmp_hi) >> 15)); 135 | 136 | // Put result in Q31 (convert from Q28) 137 | tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); 138 | 139 | return tmpW32; 140 | } 141 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/downsample_fast.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 12 | 13 | // TODO(Bjornv): Change the function parameter order to WebRTC code style. 14 | // C version of WebRtcSpl_DownsampleFast() for generic platforms. 15 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in, 16 | int data_in_length, 17 | int16_t* data_out, 18 | int data_out_length, 19 | const int16_t* __restrict coefficients, 20 | int coefficients_length, 21 | int factor, 22 | int delay) { 23 | int i = 0; 24 | int j = 0; 25 | int32_t out_s32 = 0; 26 | int endpos = delay + factor * (data_out_length - 1) + 1; 27 | 28 | // Return error if any of the running conditions doesn't meet. 29 | if (data_out_length <= 0 || coefficients_length <= 0 30 | || data_in_length < endpos) { 31 | return -1; 32 | } 33 | 34 | for (i = delay; i < endpos; i += factor) { 35 | out_s32 = 2048; // Round value, 0.5 in Q12. 36 | 37 | for (j = 0; j < coefficients_length; j++) { 38 | out_s32 += coefficients[j] * data_in[i - j]; // Q12. 39 | } 40 | 41 | out_s32 >>= 12; // Q0. 42 | 43 | // Saturate and store the output. 44 | *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); 45 | } 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/energy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_Energy(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | int32_t WebRtcSpl_Energy(int16_t* vector, int vector_length, int* scale_factor) 21 | { 22 | int32_t en = 0; 23 | int i; 24 | int scaling = WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); 25 | int looptimes = vector_length; 26 | int16_t *vectorptr = vector; 27 | 28 | for (i = 0; i < looptimes; i++) 29 | { 30 | en += WEBRTC_SPL_MUL_16_16_RSFT(*vectorptr, *vectorptr, scaling); 31 | vectorptr++; 32 | } 33 | *scale_factor = scaling; 34 | 35 | return en; 36 | } 37 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/get_scaling_square.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_GetScalingSquare(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, 21 | int in_vector_length, 22 | int times) 23 | { 24 | int16_t nbits = WebRtcSpl_GetSizeInBits(times); 25 | int i; 26 | int16_t smax = -1; 27 | int16_t sabs; 28 | int16_t *sptr = in_vector; 29 | int16_t t; 30 | int looptimes = in_vector_length; 31 | 32 | for (i = looptimes; i > 0; i--) 33 | { 34 | sabs = (*sptr > 0 ? *sptr++ : -*sptr++); 35 | smax = (sabs > smax ? sabs : smax); 36 | } 37 | t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); 38 | 39 | if (smax == 0) 40 | { 41 | return 0; // Since norm(0) returns 0 42 | } else 43 | { 44 | return (t > nbits) ? 0 : nbits - t; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/include/real_fft.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 12 | #define WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 13 | 14 | #include "webrtc/typedefs.h" 15 | 16 | // For ComplexFFT(), the maximum fft order is 10; 17 | // for OpenMax FFT in ARM, it is 12; 18 | // WebRTC APM uses orders of only 7 and 8. 19 | enum {kMaxFFTOrder = 10}; 20 | 21 | struct RealFFT; 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order); 28 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self); 29 | 30 | // Compute an FFT for a real-valued signal of length of 2^order, 31 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the 32 | // specification structure, which must be initialized prior to calling the FFT 33 | // function with WebRtcSpl_CreateRealFFT(). 34 | // The relationship between the input and output sequences can 35 | // be expressed in terms of the DFT, i.e.: 36 | // x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) 37 | // n=0,1,2,...N-1 38 | // N=2^order. 39 | // The conjugate-symmetric output sequence is represented using a CCS vector, 40 | // which is of length N+2, and is organized as follows: 41 | // Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 42 | // Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 43 | // where R[n] and I[n], respectively, denote the real and imaginary components 44 | // for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. 45 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to 46 | // the foldover frequency. 47 | // 48 | // Input Arguments: 49 | // self - pointer to preallocated and initialized FFT specification structure. 50 | // real_data_in - the input signal. For an ARM Neon platform, it must be 51 | // aligned on a 32-byte boundary. 52 | // 53 | // Output Arguments: 54 | // complex_data_out - the output complex signal with (2^order + 2) 16-bit 55 | // elements. For an ARM Neon platform, it must be different 56 | // from real_data_in, and aligned on a 32-byte boundary. 57 | // 58 | // Return Value: 59 | // 0 - FFT calculation is successful. 60 | // -1 - Error with bad arguments (NULL pointers). 61 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self, 62 | const int16_t* real_data_in, 63 | int16_t* complex_data_out); 64 | 65 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of 66 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by 67 | // the specification structure, which must be initialized prior to calling the 68 | // FFT function with WebRtcSpl_CreateRealFFT(). 69 | // For a transform of length M, the input sequence is represented using a packed 70 | // CCS vector of length M+2, which is explained in the comments for 71 | // WebRtcSpl_RealForwardFFTC above. 72 | // 73 | // Input Arguments: 74 | // self - pointer to preallocated and initialized FFT specification structure. 75 | // complex_data_in - the input complex signal with (2^order + 2) 16-bit 76 | // elements. For an ARM Neon platform, it must be aligned on 77 | // a 32-byte boundary. 78 | // 79 | // Output Arguments: 80 | // real_data_out - the output real signal. For an ARM Neon platform, it must 81 | // be different to complex_data_in, and aligned on a 32-byte 82 | // boundary. 83 | // 84 | // Return Value: 85 | // 0 or a positive number - a value that the elements in the |real_data_out| 86 | // should be shifted left with in order to get 87 | // correct physical values. 88 | // -1 - Error with bad arguments (NULL pointers). 89 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self, 90 | const int16_t* complex_data_in, 91 | int16_t* real_data_out); 92 | 93 | #ifdef __cplusplus 94 | } 95 | #endif 96 | 97 | #endif // WEBRTC_COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 98 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/include/spl_inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | // This header file includes the inline functions in 13 | // the fix point signal processing library. 14 | 15 | #ifndef WEBRTC_SPL_SPL_INL_H_ 16 | #define WEBRTC_SPL_SPL_INL_H_ 17 | 18 | #ifdef WEBRTC_ARCH_ARM_V7 19 | #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h" 20 | #else 21 | 22 | #if defined(MIPS32_LE) 23 | #include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h" 24 | #endif 25 | 26 | #if !defined(MIPS_DSP_R1_LE) 27 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { 28 | int16_t out16 = (int16_t) value32; 29 | 30 | if (value32 > 32767) 31 | out16 = 32767; 32 | else if (value32 < -32768) 33 | out16 = -32768; 34 | 35 | return out16; 36 | } 37 | 38 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { 39 | int32_t l_sum; 40 | 41 | // Perform long addition 42 | l_sum = l_var1 + l_var2; 43 | 44 | if (l_var1 < 0) { // Check for underflow. 45 | if ((l_var2 < 0) && (l_sum >= 0)) { 46 | l_sum = (int32_t)0x80000000; 47 | } 48 | } else { // Check for overflow. 49 | if ((l_var2 > 0) && (l_sum < 0)) { 50 | l_sum = (int32_t)0x7FFFFFFF; 51 | } 52 | } 53 | 54 | return l_sum; 55 | } 56 | 57 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { 58 | int32_t l_diff; 59 | 60 | // Perform subtraction. 61 | l_diff = l_var1 - l_var2; 62 | 63 | if (l_var1 < 0) { // Check for underflow. 64 | if ((l_var2 > 0) && (l_diff > 0)) { 65 | l_diff = (int32_t)0x80000000; 66 | } 67 | } else { // Check for overflow. 68 | if ((l_var2 < 0) && (l_diff < 0)) { 69 | l_diff = (int32_t)0x7FFFFFFF; 70 | } 71 | } 72 | 73 | return l_diff; 74 | } 75 | 76 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { 77 | return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b); 78 | } 79 | 80 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { 81 | return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2); 82 | } 83 | #endif // #if !defined(MIPS_DSP_R1_LE) 84 | 85 | #if !defined(MIPS32_LE) 86 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { 87 | int16_t bits; 88 | 89 | if (0xFFFF0000 & n) { 90 | bits = 16; 91 | } else { 92 | bits = 0; 93 | } 94 | if (0x0000FF00 & (n >> bits)) bits += 8; 95 | if (0x000000F0 & (n >> bits)) bits += 4; 96 | if (0x0000000C & (n >> bits)) bits += 2; 97 | if (0x00000002 & (n >> bits)) bits += 1; 98 | if (0x00000001 & (n >> bits)) bits += 1; 99 | 100 | return bits; 101 | } 102 | 103 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) { 104 | int16_t zeros; 105 | 106 | if (a == 0) { 107 | return 0; 108 | } 109 | else if (a < 0) { 110 | a = ~a; 111 | } 112 | 113 | if (!(0xFFFF8000 & a)) { 114 | zeros = 16; 115 | } else { 116 | zeros = 0; 117 | } 118 | if (!(0xFF800000 & (a << zeros))) zeros += 8; 119 | if (!(0xF8000000 & (a << zeros))) zeros += 4; 120 | if (!(0xE0000000 & (a << zeros))) zeros += 2; 121 | if (!(0xC0000000 & (a << zeros))) zeros += 1; 122 | 123 | return zeros; 124 | } 125 | 126 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { 127 | int16_t zeros; 128 | 129 | if (a == 0) return 0; 130 | 131 | if (!(0xFFFF0000 & a)) { 132 | zeros = 16; 133 | } else { 134 | zeros = 0; 135 | } 136 | if (!(0xFF000000 & (a << zeros))) zeros += 8; 137 | if (!(0xF0000000 & (a << zeros))) zeros += 4; 138 | if (!(0xC0000000 & (a << zeros))) zeros += 2; 139 | if (!(0x80000000 & (a << zeros))) zeros += 1; 140 | 141 | return zeros; 142 | } 143 | 144 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) { 145 | int16_t zeros; 146 | 147 | if (a == 0) { 148 | return 0; 149 | } 150 | else if (a < 0) { 151 | a = ~a; 152 | } 153 | 154 | if (!(0xFF80 & a)) { 155 | zeros = 8; 156 | } else { 157 | zeros = 0; 158 | } 159 | if (!(0xF800 & (a << zeros))) zeros += 4; 160 | if (!(0xE000 & (a << zeros))) zeros += 2; 161 | if (!(0xC000 & (a << zeros))) zeros += 1; 162 | 163 | return zeros; 164 | } 165 | 166 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { 167 | return (a * b + c); 168 | } 169 | #endif // #if !defined(MIPS32_LE) 170 | 171 | #endif // WEBRTC_ARCH_ARM_V7 172 | 173 | #endif // WEBRTC_SPL_SPL_INL_H_ 174 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/min_max_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file contains the implementation of functions 13 | * WebRtcSpl_MaxAbsValueW16C() 14 | * WebRtcSpl_MaxAbsValueW32C() 15 | * WebRtcSpl_MaxValueW16C() 16 | * WebRtcSpl_MaxValueW32C() 17 | * WebRtcSpl_MinValueW16C() 18 | * WebRtcSpl_MinValueW32C() 19 | * WebRtcSpl_MaxAbsIndexW16() 20 | * WebRtcSpl_MaxIndexW16() 21 | * WebRtcSpl_MaxIndexW32() 22 | * WebRtcSpl_MinIndexW16() 23 | * WebRtcSpl_MinIndexW32() 24 | * 25 | */ 26 | 27 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 28 | 29 | #include 30 | 31 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine 32 | // WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) 33 | // TODO(kma): Move the next six functions into min_max_operations_c.c. 34 | 35 | // Maximum absolute value of word16 vector. C version for generic platforms. 36 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, int length) { 37 | int i = 0, absolute = 0, maximum = 0; 38 | 39 | if (vector == NULL || length <= 0) { 40 | return -1; 41 | } 42 | 43 | for (i = 0; i < length; i++) { 44 | absolute = abs((int)vector[i]); 45 | 46 | if (absolute > maximum) { 47 | maximum = absolute; 48 | } 49 | } 50 | 51 | // Guard the case for abs(-32768). 52 | if (maximum > WEBRTC_SPL_WORD16_MAX) { 53 | maximum = WEBRTC_SPL_WORD16_MAX; 54 | } 55 | 56 | return (int16_t)maximum; 57 | } 58 | 59 | // Maximum absolute value of word32 vector. C version for generic platforms. 60 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, int length) { 61 | // Use uint32_t for the local variables, to accommodate the return value 62 | // of abs(0x80000000), which is 0x80000000. 63 | 64 | uint32_t absolute = 0, maximum = 0; 65 | int i = 0; 66 | 67 | if (vector == NULL || length <= 0) { 68 | return -1; 69 | } 70 | 71 | for (i = 0; i < length; i++) { 72 | absolute = abs((int)vector[i]); 73 | if (absolute > maximum) { 74 | maximum = absolute; 75 | } 76 | } 77 | 78 | maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); 79 | 80 | return (int32_t)maximum; 81 | } 82 | 83 | // Maximum value of word16 vector. C version for generic platforms. 84 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, int length) { 85 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 86 | int i = 0; 87 | 88 | if (vector == NULL || length <= 0) { 89 | return maximum; 90 | } 91 | 92 | for (i = 0; i < length; i++) { 93 | if (vector[i] > maximum) 94 | maximum = vector[i]; 95 | } 96 | return maximum; 97 | } 98 | 99 | // Maximum value of word32 vector. C version for generic platforms. 100 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, int length) { 101 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 102 | int i = 0; 103 | 104 | if (vector == NULL || length <= 0) { 105 | return maximum; 106 | } 107 | 108 | for (i = 0; i < length; i++) { 109 | if (vector[i] > maximum) 110 | maximum = vector[i]; 111 | } 112 | return maximum; 113 | } 114 | 115 | // Minimum value of word16 vector. C version for generic platforms. 116 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, int length) { 117 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 118 | int i = 0; 119 | 120 | if (vector == NULL || length <= 0) { 121 | return minimum; 122 | } 123 | 124 | for (i = 0; i < length; i++) { 125 | if (vector[i] < minimum) 126 | minimum = vector[i]; 127 | } 128 | return minimum; 129 | } 130 | 131 | // Minimum value of word32 vector. C version for generic platforms. 132 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, int length) { 133 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 134 | int i = 0; 135 | 136 | if (vector == NULL || length <= 0) { 137 | return minimum; 138 | } 139 | 140 | for (i = 0; i < length; i++) { 141 | if (vector[i] < minimum) 142 | minimum = vector[i]; 143 | } 144 | return minimum; 145 | } 146 | 147 | // Index of maximum absolute value in a word16 vector. 148 | int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) { 149 | // Use type int for local variables, to accomodate the value of abs(-32768). 150 | 151 | int i = 0, absolute = 0, maximum = 0, index = 0; 152 | 153 | if (vector == NULL || length <= 0) { 154 | return -1; 155 | } 156 | 157 | for (i = 0; i < length; i++) { 158 | absolute = abs((int)vector[i]); 159 | 160 | if (absolute > maximum) { 161 | maximum = absolute; 162 | index = i; 163 | } 164 | } 165 | 166 | return index; 167 | } 168 | 169 | // Index of maximum value in a word16 vector. 170 | int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) { 171 | int i = 0, index = 0; 172 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 173 | 174 | if (vector == NULL || length <= 0) { 175 | return -1; 176 | } 177 | 178 | for (i = 0; i < length; i++) { 179 | if (vector[i] > maximum) { 180 | maximum = vector[i]; 181 | index = i; 182 | } 183 | } 184 | 185 | return index; 186 | } 187 | 188 | // Index of maximum value in a word32 vector. 189 | int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) { 190 | int i = 0, index = 0; 191 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 192 | 193 | if (vector == NULL || length <= 0) { 194 | return -1; 195 | } 196 | 197 | for (i = 0; i < length; i++) { 198 | if (vector[i] > maximum) { 199 | maximum = vector[i]; 200 | index = i; 201 | } 202 | } 203 | 204 | return index; 205 | } 206 | 207 | // Index of minimum value in a word16 vector. 208 | int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) { 209 | int i = 0, index = 0; 210 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 211 | 212 | if (vector == NULL || length <= 0) { 213 | return -1; 214 | } 215 | 216 | for (i = 0; i < length; i++) { 217 | if (vector[i] < minimum) { 218 | minimum = vector[i]; 219 | index = i; 220 | } 221 | } 222 | 223 | return index; 224 | } 225 | 226 | // Index of minimum value in a word32 vector. 227 | int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) { 228 | int i = 0, index = 0; 229 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 230 | 231 | if (vector == NULL || length <= 0) { 232 | return -1; 233 | } 234 | 235 | for (i = 0; i < length; i++) { 236 | if (vector[i] < minimum) { 237 | minimum = vector[i]; 238 | index = i; 239 | } 240 | } 241 | 242 | return index; 243 | } 244 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/real_fft.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/signal_processing/include/real_fft.h" 12 | 13 | #include 14 | 15 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 | 17 | struct RealFFT { 18 | int order; 19 | }; 20 | 21 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order) { 22 | struct RealFFT* self = NULL; 23 | 24 | if (order > kMaxFFTOrder || order < 0) { 25 | return NULL; 26 | } 27 | 28 | self = malloc(sizeof(struct RealFFT)); 29 | if (self == NULL) { 30 | return NULL; 31 | } 32 | self->order = order; 33 | 34 | return self; 35 | } 36 | 37 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self) { 38 | if (self != NULL) { 39 | free(self); 40 | } 41 | } 42 | 43 | // The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and 44 | // WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued 45 | // FFT implementation in SPL. 46 | 47 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self, 48 | const int16_t* real_data_in, 49 | int16_t* complex_data_out) { 50 | int i = 0; 51 | int j = 0; 52 | int result = 0; 53 | int n = 1 << self->order; 54 | // The complex-value FFT implementation needs a buffer to hold 2^order 55 | // 16-bit COMPLEX numbers, for both time and frequency data. 56 | int16_t complex_buffer[2 << kMaxFFTOrder]; 57 | 58 | // Insert zeros to the imaginary parts for complex forward FFT input. 59 | for (i = 0, j = 0; i < n; i += 1, j += 2) { 60 | complex_buffer[j] = real_data_in[i]; 61 | complex_buffer[j + 1] = 0; 62 | }; 63 | 64 | WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); 65 | result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1); 66 | 67 | // For real FFT output, use only the first N + 2 elements from 68 | // complex forward FFT. 69 | memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2)); 70 | 71 | return result; 72 | } 73 | 74 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self, 75 | const int16_t* complex_data_in, 76 | int16_t* real_data_out) { 77 | int i = 0; 78 | int j = 0; 79 | int result = 0; 80 | int n = 1 << self->order; 81 | // Create the buffer specific to complex-valued FFT implementation. 82 | int16_t complex_buffer[2 << kMaxFFTOrder]; 83 | 84 | // For n-point FFT, first copy the first n + 2 elements into complex 85 | // FFT, then construct the remaining n - 2 elements by real FFT's 86 | // conjugate-symmetric properties. 87 | memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2)); 88 | for (i = n + 2; i < 2 * n; i += 2) { 89 | complex_buffer[i] = complex_data_in[2 * n - i]; 90 | complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1]; 91 | } 92 | 93 | WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); 94 | result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1); 95 | 96 | // Strip out the imaginary parts of the complex inverse FFT output. 97 | for (i = 0, j = 0; i < n; i += 1, j += 2) { 98 | real_data_out[i] = complex_buffer[j]; 99 | } 100 | 101 | return result; 102 | } 103 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/resample_48khz.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains resampling functions between 48 kHz and nb/wb. 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" 21 | 22 | //////////////////////////// 23 | ///// 48 kHz -> 16 kHz ///// 24 | //////////////////////////// 25 | 26 | // 48 -> 16 resampler 27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, 28 | WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) 29 | { 30 | ///// 48 --> 48(LP) ///// 31 | // int16_t in[480] 32 | // int32_t out[480] 33 | ///// 34 | WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); 35 | 36 | ///// 48 --> 32 ///// 37 | // int32_t in[480] 38 | // int32_t out[320] 39 | ///// 40 | // copy state to and from input array 41 | memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); 42 | memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); 43 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); 44 | 45 | ///// 32 --> 16 ///// 46 | // int32_t in[320] 47 | // int16_t out[160] 48 | ///// 49 | WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); 50 | } 51 | 52 | // initialize state of 48 -> 16 resampler 53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) 54 | { 55 | memset(state->S_48_48, 0, 16 * sizeof(int32_t)); 56 | memset(state->S_48_32, 0, 8 * sizeof(int32_t)); 57 | memset(state->S_32_16, 0, 8 * sizeof(int32_t)); 58 | } 59 | 60 | //////////////////////////// 61 | ///// 16 kHz -> 48 kHz ///// 62 | //////////////////////////// 63 | 64 | // 16 -> 48 resampler 65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, 66 | WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) 67 | { 68 | ///// 16 --> 32 ///// 69 | // int16_t in[160] 70 | // int32_t out[320] 71 | ///// 72 | WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); 73 | 74 | ///// 32 --> 24 ///// 75 | // int32_t in[320] 76 | // int32_t out[240] 77 | // copy state to and from input array 78 | ///// 79 | memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); 80 | memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); 81 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); 82 | 83 | ///// 24 --> 48 ///// 84 | // int32_t in[240] 85 | // int16_t out[480] 86 | ///// 87 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 88 | } 89 | 90 | // initialize state of 16 -> 48 resampler 91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) 92 | { 93 | memset(state->S_16_32, 0, 8 * sizeof(int32_t)); 94 | memset(state->S_32_24, 0, 8 * sizeof(int32_t)); 95 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 96 | } 97 | 98 | //////////////////////////// 99 | ///// 48 kHz -> 8 kHz ///// 100 | //////////////////////////// 101 | 102 | // 48 -> 8 resampler 103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, 104 | WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) 105 | { 106 | ///// 48 --> 24 ///// 107 | // int16_t in[480] 108 | // int32_t out[240] 109 | ///// 110 | WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); 111 | 112 | ///// 24 --> 24(LP) ///// 113 | // int32_t in[240] 114 | // int32_t out[240] 115 | ///// 116 | WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); 117 | 118 | ///// 24 --> 16 ///// 119 | // int32_t in[240] 120 | // int32_t out[160] 121 | ///// 122 | // copy state to and from input array 123 | memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); 124 | memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); 125 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); 126 | 127 | ///// 16 --> 8 ///// 128 | // int32_t in[160] 129 | // int16_t out[80] 130 | ///// 131 | WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); 132 | } 133 | 134 | // initialize state of 48 -> 8 resampler 135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) 136 | { 137 | memset(state->S_48_24, 0, 8 * sizeof(int32_t)); 138 | memset(state->S_24_24, 0, 16 * sizeof(int32_t)); 139 | memset(state->S_24_16, 0, 8 * sizeof(int32_t)); 140 | memset(state->S_16_8, 0, 8 * sizeof(int32_t)); 141 | } 142 | 143 | //////////////////////////// 144 | ///// 8 kHz -> 48 kHz ///// 145 | //////////////////////////// 146 | 147 | // 8 -> 48 resampler 148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, 149 | WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) 150 | { 151 | ///// 8 --> 16 ///// 152 | // int16_t in[80] 153 | // int32_t out[160] 154 | ///// 155 | WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); 156 | 157 | ///// 16 --> 12 ///// 158 | // int32_t in[160] 159 | // int32_t out[120] 160 | ///// 161 | // copy state to and from input array 162 | memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); 163 | memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); 164 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); 165 | 166 | ///// 12 --> 24 ///// 167 | // int32_t in[120] 168 | // int16_t out[240] 169 | ///// 170 | WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); 171 | 172 | ///// 24 --> 48 ///// 173 | // int32_t in[240] 174 | // int16_t out[480] 175 | ///// 176 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 177 | } 178 | 179 | // initialize state of 8 -> 48 resampler 180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) 181 | { 182 | memset(state->S_8_16, 0, 8 * sizeof(int32_t)); 183 | memset(state->S_16_12, 0, 8 * sizeof(int32_t)); 184 | memset(state->S_12_24, 0, 8 * sizeof(int32_t)); 185 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 186 | } 187 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/resample_by_2_internal.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file contains some internal resampling functions. 14 | * 15 | */ 16 | 17 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" 18 | 19 | // allpass filter coefficients. 20 | static const int16_t kResampleAllpass[2][3] = { 21 | {821, 6110, 12382}, 22 | {3050, 9368, 15063} 23 | }; 24 | 25 | // 26 | // decimator 27 | // input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! 28 | // output: int16_t (saturated) (of length len/2) 29 | // state: filter state array; length = 8 30 | 31 | void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, 32 | int32_t *state) 33 | { 34 | int32_t tmp0, tmp1, diff; 35 | int32_t i; 36 | 37 | len >>= 1; 38 | 39 | // lower allpass filter (operates on even input samples) 40 | for (i = 0; i < len; i++) 41 | { 42 | tmp0 = in[i << 1]; 43 | diff = tmp0 - state[1]; 44 | // scale down and round 45 | diff = (diff + (1 << 13)) >> 14; 46 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 47 | state[0] = tmp0; 48 | diff = tmp1 - state[2]; 49 | // scale down and truncate 50 | diff = diff >> 14; 51 | if (diff < 0) 52 | diff += 1; 53 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 54 | state[1] = tmp1; 55 | diff = tmp0 - state[3]; 56 | // scale down and truncate 57 | diff = diff >> 14; 58 | if (diff < 0) 59 | diff += 1; 60 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 61 | state[2] = tmp0; 62 | 63 | // divide by two and store temporarily 64 | in[i << 1] = (state[3] >> 1); 65 | } 66 | 67 | in++; 68 | 69 | // upper allpass filter (operates on odd input samples) 70 | for (i = 0; i < len; i++) 71 | { 72 | tmp0 = in[i << 1]; 73 | diff = tmp0 - state[5]; 74 | // scale down and round 75 | diff = (diff + (1 << 13)) >> 14; 76 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 77 | state[4] = tmp0; 78 | diff = tmp1 - state[6]; 79 | // scale down and round 80 | diff = diff >> 14; 81 | if (diff < 0) 82 | diff += 1; 83 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 84 | state[5] = tmp1; 85 | diff = tmp0 - state[7]; 86 | // scale down and truncate 87 | diff = diff >> 14; 88 | if (diff < 0) 89 | diff += 1; 90 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 91 | state[6] = tmp0; 92 | 93 | // divide by two and store temporarily 94 | in[i << 1] = (state[7] >> 1); 95 | } 96 | 97 | in--; 98 | 99 | // combine allpass outputs 100 | for (i = 0; i < len; i += 2) 101 | { 102 | // divide by two, add both allpass outputs and round 103 | tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; 104 | tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; 105 | if (tmp0 > (int32_t)0x00007FFF) 106 | tmp0 = 0x00007FFF; 107 | if (tmp0 < (int32_t)0xFFFF8000) 108 | tmp0 = 0xFFFF8000; 109 | out[i] = (int16_t)tmp0; 110 | if (tmp1 > (int32_t)0x00007FFF) 111 | tmp1 = 0x00007FFF; 112 | if (tmp1 < (int32_t)0xFFFF8000) 113 | tmp1 = 0xFFFF8000; 114 | out[i + 1] = (int16_t)tmp1; 115 | } 116 | } 117 | 118 | // 119 | // decimator 120 | // input: int16_t 121 | // output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) 122 | // state: filter state array; length = 8 123 | 124 | void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, 125 | int32_t len, 126 | int32_t *out, 127 | int32_t *state) 128 | { 129 | int32_t tmp0, tmp1, diff; 130 | int32_t i; 131 | 132 | len >>= 1; 133 | 134 | // lower allpass filter (operates on even input samples) 135 | for (i = 0; i < len; i++) 136 | { 137 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 138 | diff = tmp0 - state[1]; 139 | // scale down and round 140 | diff = (diff + (1 << 13)) >> 14; 141 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 142 | state[0] = tmp0; 143 | diff = tmp1 - state[2]; 144 | // scale down and truncate 145 | diff = diff >> 14; 146 | if (diff < 0) 147 | diff += 1; 148 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 149 | state[1] = tmp1; 150 | diff = tmp0 - state[3]; 151 | // scale down and truncate 152 | diff = diff >> 14; 153 | if (diff < 0) 154 | diff += 1; 155 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 156 | state[2] = tmp0; 157 | 158 | // divide by two and store temporarily 159 | out[i] = (state[3] >> 1); 160 | } 161 | 162 | in++; 163 | 164 | // upper allpass filter (operates on odd input samples) 165 | for (i = 0; i < len; i++) 166 | { 167 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 168 | diff = tmp0 - state[5]; 169 | // scale down and round 170 | diff = (diff + (1 << 13)) >> 14; 171 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 172 | state[4] = tmp0; 173 | diff = tmp1 - state[6]; 174 | // scale down and round 175 | diff = diff >> 14; 176 | if (diff < 0) 177 | diff += 1; 178 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 179 | state[5] = tmp1; 180 | diff = tmp0 - state[7]; 181 | // scale down and truncate 182 | diff = diff >> 14; 183 | if (diff < 0) 184 | diff += 1; 185 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 186 | state[6] = tmp0; 187 | 188 | // divide by two and store temporarily 189 | out[i] += (state[7] >> 1); 190 | } 191 | 192 | in--; 193 | } 194 | 195 | // 196 | // interpolator 197 | // input: int16_t 198 | // output: int32_t (normalized, not saturated) (of length len*2) 199 | // state: filter state array; length = 8 200 | void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, 201 | int32_t *state) 202 | { 203 | int32_t tmp0, tmp1, diff; 204 | int32_t i; 205 | 206 | // upper allpass filter (generates odd output samples) 207 | for (i = 0; i < len; i++) 208 | { 209 | tmp0 = ((int32_t)in[i] << 15) + (1 << 14); 210 | diff = tmp0 - state[5]; 211 | // scale down and round 212 | diff = (diff + (1 << 13)) >> 14; 213 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 214 | state[4] = tmp0; 215 | diff = tmp1 - state[6]; 216 | // scale down and truncate 217 | diff = diff >> 14; 218 | if (diff < 0) 219 | diff += 1; 220 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 221 | state[5] = tmp1; 222 | diff = tmp0 - state[7]; 223 | // scale down and truncate 224 | diff = diff >> 14; 225 | if (diff < 0) 226 | diff += 1; 227 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 228 | state[6] = tmp0; 229 | 230 | // scale down, round and store 231 | out[i << 1] = state[7] >> 15; 232 | } 233 | 234 | out++; 235 | 236 | // lower allpass filter (generates even output samples) 237 | for (i = 0; i < len; i++) 238 | { 239 | tmp0 = ((int32_t)in[i] << 15) + (1 << 14); 240 | diff = tmp0 - state[1]; 241 | // scale down and round 242 | diff = (diff + (1 << 13)) >> 14; 243 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 244 | state[0] = tmp0; 245 | diff = tmp1 - state[2]; 246 | // scale down and truncate 247 | diff = diff >> 14; 248 | if (diff < 0) 249 | diff += 1; 250 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 251 | state[1] = tmp1; 252 | diff = tmp0 - state[3]; 253 | // scale down and truncate 254 | diff = diff >> 14; 255 | if (diff < 0) 256 | diff += 1; 257 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 258 | state[2] = tmp0; 259 | 260 | // scale down, round and store 261 | out[i << 1] = state[3] >> 15; 262 | } 263 | } 264 | 265 | // 266 | // interpolator 267 | // input: int32_t (shifted 15 positions to the left, + offset 16384) 268 | // output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) 269 | // state: filter state array; length = 8 270 | void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, 271 | int32_t *state) 272 | { 273 | int32_t tmp0, tmp1, diff; 274 | int32_t i; 275 | 276 | // upper allpass filter (generates odd output samples) 277 | for (i = 0; i < len; i++) 278 | { 279 | tmp0 = in[i]; 280 | diff = tmp0 - state[5]; 281 | // scale down and round 282 | diff = (diff + (1 << 13)) >> 14; 283 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 284 | state[4] = tmp0; 285 | diff = tmp1 - state[6]; 286 | // scale down and truncate 287 | diff = diff >> 14; 288 | if (diff < 0) 289 | diff += 1; 290 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 291 | state[5] = tmp1; 292 | diff = tmp0 - state[7]; 293 | // scale down and truncate 294 | diff = diff >> 14; 295 | if (diff < 0) 296 | diff += 1; 297 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 298 | state[6] = tmp0; 299 | 300 | // scale down, round and store 301 | out[i << 1] = state[7]; 302 | } 303 | 304 | out++; 305 | 306 | // lower allpass filter (generates even output samples) 307 | for (i = 0; i < len; i++) 308 | { 309 | tmp0 = in[i]; 310 | diff = tmp0 - state[1]; 311 | // scale down and round 312 | diff = (diff + (1 << 13)) >> 14; 313 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 314 | state[0] = tmp0; 315 | diff = tmp1 - state[2]; 316 | // scale down and truncate 317 | diff = diff >> 14; 318 | if (diff < 0) 319 | diff += 1; 320 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 321 | state[1] = tmp1; 322 | diff = tmp0 - state[3]; 323 | // scale down and truncate 324 | diff = diff >> 14; 325 | if (diff < 0) 326 | diff += 1; 327 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 328 | state[2] = tmp0; 329 | 330 | // scale down, round and store 331 | out[i << 1] = state[3]; 332 | } 333 | } 334 | 335 | // 336 | // interpolator 337 | // input: int32_t (shifted 15 positions to the left, + offset 16384) 338 | // output: int16_t (saturated) (of length len*2) 339 | // state: filter state array; length = 8 340 | void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, 341 | int32_t *state) 342 | { 343 | int32_t tmp0, tmp1, diff; 344 | int32_t i; 345 | 346 | // upper allpass filter (generates odd output samples) 347 | for (i = 0; i < len; i++) 348 | { 349 | tmp0 = in[i]; 350 | diff = tmp0 - state[5]; 351 | // scale down and round 352 | diff = (diff + (1 << 13)) >> 14; 353 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 354 | state[4] = tmp0; 355 | diff = tmp1 - state[6]; 356 | // scale down and round 357 | diff = diff >> 14; 358 | if (diff < 0) 359 | diff += 1; 360 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 361 | state[5] = tmp1; 362 | diff = tmp0 - state[7]; 363 | // scale down and truncate 364 | diff = diff >> 14; 365 | if (diff < 0) 366 | diff += 1; 367 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 368 | state[6] = tmp0; 369 | 370 | // scale down, saturate and store 371 | tmp1 = state[7] >> 15; 372 | if (tmp1 > (int32_t)0x00007FFF) 373 | tmp1 = 0x00007FFF; 374 | if (tmp1 < (int32_t)0xFFFF8000) 375 | tmp1 = 0xFFFF8000; 376 | out[i << 1] = (int16_t)tmp1; 377 | } 378 | 379 | out++; 380 | 381 | // lower allpass filter (generates even output samples) 382 | for (i = 0; i < len; i++) 383 | { 384 | tmp0 = in[i]; 385 | diff = tmp0 - state[1]; 386 | // scale down and round 387 | diff = (diff + (1 << 13)) >> 14; 388 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 389 | state[0] = tmp0; 390 | diff = tmp1 - state[2]; 391 | // scale down and truncate 392 | diff = diff >> 14; 393 | if (diff < 0) 394 | diff += 1; 395 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 396 | state[1] = tmp1; 397 | diff = tmp0 - state[3]; 398 | // scale down and truncate 399 | diff = diff >> 14; 400 | if (diff < 0) 401 | diff += 1; 402 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 403 | state[2] = tmp0; 404 | 405 | // scale down, saturate and store 406 | tmp1 = state[3] >> 15; 407 | if (tmp1 > (int32_t)0x00007FFF) 408 | tmp1 = 0x00007FFF; 409 | if (tmp1 < (int32_t)0xFFFF8000) 410 | tmp1 = 0xFFFF8000; 411 | out[i << 1] = (int16_t)tmp1; 412 | } 413 | } 414 | 415 | // lowpass filter 416 | // input: int16_t 417 | // output: int32_t (normalized, not saturated) 418 | // state: filter state array; length = 8 419 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, 420 | int32_t* state) 421 | { 422 | int32_t tmp0, tmp1, diff; 423 | int32_t i; 424 | 425 | len >>= 1; 426 | 427 | // lower allpass filter: odd input -> even output samples 428 | in++; 429 | // initial state of polyphase delay element 430 | tmp0 = state[12]; 431 | for (i = 0; i < len; i++) 432 | { 433 | diff = tmp0 - state[1]; 434 | // scale down and round 435 | diff = (diff + (1 << 13)) >> 14; 436 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 437 | state[0] = tmp0; 438 | diff = tmp1 - state[2]; 439 | // scale down and truncate 440 | diff = diff >> 14; 441 | if (diff < 0) 442 | diff += 1; 443 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 444 | state[1] = tmp1; 445 | diff = tmp0 - state[3]; 446 | // scale down and truncate 447 | diff = diff >> 14; 448 | if (diff < 0) 449 | diff += 1; 450 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 451 | state[2] = tmp0; 452 | 453 | // scale down, round and store 454 | out[i << 1] = state[3] >> 1; 455 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 456 | } 457 | in--; 458 | 459 | // upper allpass filter: even input -> even output samples 460 | for (i = 0; i < len; i++) 461 | { 462 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 463 | diff = tmp0 - state[5]; 464 | // scale down and round 465 | diff = (diff + (1 << 13)) >> 14; 466 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 467 | state[4] = tmp0; 468 | diff = tmp1 - state[6]; 469 | // scale down and round 470 | diff = diff >> 14; 471 | if (diff < 0) 472 | diff += 1; 473 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 474 | state[5] = tmp1; 475 | diff = tmp0 - state[7]; 476 | // scale down and truncate 477 | diff = diff >> 14; 478 | if (diff < 0) 479 | diff += 1; 480 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 481 | state[6] = tmp0; 482 | 483 | // average the two allpass outputs, scale down and store 484 | out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; 485 | } 486 | 487 | // switch to odd output samples 488 | out++; 489 | 490 | // lower allpass filter: even input -> odd output samples 491 | for (i = 0; i < len; i++) 492 | { 493 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 494 | diff = tmp0 - state[9]; 495 | // scale down and round 496 | diff = (diff + (1 << 13)) >> 14; 497 | tmp1 = state[8] + diff * kResampleAllpass[1][0]; 498 | state[8] = tmp0; 499 | diff = tmp1 - state[10]; 500 | // scale down and truncate 501 | diff = diff >> 14; 502 | if (diff < 0) 503 | diff += 1; 504 | tmp0 = state[9] + diff * kResampleAllpass[1][1]; 505 | state[9] = tmp1; 506 | diff = tmp0 - state[11]; 507 | // scale down and truncate 508 | diff = diff >> 14; 509 | if (diff < 0) 510 | diff += 1; 511 | state[11] = state[10] + diff * kResampleAllpass[1][2]; 512 | state[10] = tmp0; 513 | 514 | // scale down, round and store 515 | out[i << 1] = state[11] >> 1; 516 | } 517 | 518 | // upper allpass filter: odd input -> odd output samples 519 | in++; 520 | for (i = 0; i < len; i++) 521 | { 522 | tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); 523 | diff = tmp0 - state[13]; 524 | // scale down and round 525 | diff = (diff + (1 << 13)) >> 14; 526 | tmp1 = state[12] + diff * kResampleAllpass[0][0]; 527 | state[12] = tmp0; 528 | diff = tmp1 - state[14]; 529 | // scale down and round 530 | diff = diff >> 14; 531 | if (diff < 0) 532 | diff += 1; 533 | tmp0 = state[13] + diff * kResampleAllpass[0][1]; 534 | state[13] = tmp1; 535 | diff = tmp0 - state[15]; 536 | // scale down and truncate 537 | diff = diff >> 14; 538 | if (diff < 0) 539 | diff += 1; 540 | state[15] = state[14] + diff * kResampleAllpass[0][2]; 541 | state[14] = tmp0; 542 | 543 | // average the two allpass outputs, scale down and store 544 | out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; 545 | } 546 | } 547 | 548 | // lowpass filter 549 | // input: int32_t (shifted 15 positions to the left, + offset 16384) 550 | // output: int32_t (normalized, not saturated) 551 | // state: filter state array; length = 8 552 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, 553 | int32_t* state) 554 | { 555 | int32_t tmp0, tmp1, diff; 556 | int32_t i; 557 | 558 | len >>= 1; 559 | 560 | // lower allpass filter: odd input -> even output samples 561 | in++; 562 | // initial state of polyphase delay element 563 | tmp0 = state[12]; 564 | for (i = 0; i < len; i++) 565 | { 566 | diff = tmp0 - state[1]; 567 | // scale down and round 568 | diff = (diff + (1 << 13)) >> 14; 569 | tmp1 = state[0] + diff * kResampleAllpass[1][0]; 570 | state[0] = tmp0; 571 | diff = tmp1 - state[2]; 572 | // scale down and truncate 573 | diff = diff >> 14; 574 | if (diff < 0) 575 | diff += 1; 576 | tmp0 = state[1] + diff * kResampleAllpass[1][1]; 577 | state[1] = tmp1; 578 | diff = tmp0 - state[3]; 579 | // scale down and truncate 580 | diff = diff >> 14; 581 | if (diff < 0) 582 | diff += 1; 583 | state[3] = state[2] + diff * kResampleAllpass[1][2]; 584 | state[2] = tmp0; 585 | 586 | // scale down, round and store 587 | out[i << 1] = state[3] >> 1; 588 | tmp0 = in[i << 1]; 589 | } 590 | in--; 591 | 592 | // upper allpass filter: even input -> even output samples 593 | for (i = 0; i < len; i++) 594 | { 595 | tmp0 = in[i << 1]; 596 | diff = tmp0 - state[5]; 597 | // scale down and round 598 | diff = (diff + (1 << 13)) >> 14; 599 | tmp1 = state[4] + diff * kResampleAllpass[0][0]; 600 | state[4] = tmp0; 601 | diff = tmp1 - state[6]; 602 | // scale down and round 603 | diff = diff >> 14; 604 | if (diff < 0) 605 | diff += 1; 606 | tmp0 = state[5] + diff * kResampleAllpass[0][1]; 607 | state[5] = tmp1; 608 | diff = tmp0 - state[7]; 609 | // scale down and truncate 610 | diff = diff >> 14; 611 | if (diff < 0) 612 | diff += 1; 613 | state[7] = state[6] + diff * kResampleAllpass[0][2]; 614 | state[6] = tmp0; 615 | 616 | // average the two allpass outputs, scale down and store 617 | out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; 618 | } 619 | 620 | // switch to odd output samples 621 | out++; 622 | 623 | // lower allpass filter: even input -> odd output samples 624 | for (i = 0; i < len; i++) 625 | { 626 | tmp0 = in[i << 1]; 627 | diff = tmp0 - state[9]; 628 | // scale down and round 629 | diff = (diff + (1 << 13)) >> 14; 630 | tmp1 = state[8] + diff * kResampleAllpass[1][0]; 631 | state[8] = tmp0; 632 | diff = tmp1 - state[10]; 633 | // scale down and truncate 634 | diff = diff >> 14; 635 | if (diff < 0) 636 | diff += 1; 637 | tmp0 = state[9] + diff * kResampleAllpass[1][1]; 638 | state[9] = tmp1; 639 | diff = tmp0 - state[11]; 640 | // scale down and truncate 641 | diff = diff >> 14; 642 | if (diff < 0) 643 | diff += 1; 644 | state[11] = state[10] + diff * kResampleAllpass[1][2]; 645 | state[10] = tmp0; 646 | 647 | // scale down, round and store 648 | out[i << 1] = state[11] >> 1; 649 | } 650 | 651 | // upper allpass filter: odd input -> odd output samples 652 | in++; 653 | for (i = 0; i < len; i++) 654 | { 655 | tmp0 = in[i << 1]; 656 | diff = tmp0 - state[13]; 657 | // scale down and round 658 | diff = (diff + (1 << 13)) >> 14; 659 | tmp1 = state[12] + diff * kResampleAllpass[0][0]; 660 | state[12] = tmp0; 661 | diff = tmp1 - state[14]; 662 | // scale down and round 663 | diff = diff >> 14; 664 | if (diff < 0) 665 | diff += 1; 666 | tmp0 = state[13] + diff * kResampleAllpass[0][1]; 667 | state[13] = tmp1; 668 | diff = tmp0 - state[15]; 669 | // scale down and truncate 670 | diff = diff >> 14; 671 | if (diff < 0) 672 | diff += 1; 673 | state[15] = state[14] + diff * kResampleAllpass[0][2]; 674 | state[14] = tmp0; 675 | 676 | // average the two allpass outputs, scale down and store 677 | out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; 678 | } 679 | } 680 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/resample_by_2_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file contains some internal resampling functions. 14 | * 15 | */ 16 | 17 | #ifndef WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ 18 | #define WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ 19 | 20 | #include "webrtc/typedefs.h" 21 | 22 | /******************************************************************* 23 | * resample_by_2_fast.c 24 | * Functions for internal use in the other resample functions 25 | ******************************************************************/ 26 | void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, 27 | int32_t *state); 28 | 29 | void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len, 30 | int32_t *out, int32_t *state); 31 | 32 | void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, 33 | int32_t *out, int32_t *state); 34 | 35 | void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, 36 | int32_t *state); 37 | 38 | void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, 39 | int16_t *out, int32_t *state); 40 | 41 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, 42 | int32_t* out, int32_t* state); 43 | 44 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, 45 | int32_t* state); 46 | 47 | #endif // WEBRTC_SPL_RESAMPLE_BY_2_INTERNAL_H_ 48 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/resample_fractional.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the resampling functions between 48, 44, 32 and 24 kHz. 14 | * The description headers can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 19 | 20 | // interpolation coefficients 21 | static const int16_t kCoefficients48To32[2][8] = { 22 | {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, 23 | {222, 441, -3783, 12903, 23285, 1087, -2050, 778} 24 | }; 25 | 26 | static const int16_t kCoefficients32To24[3][8] = { 27 | {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, 28 | {386, -381, -2646, 19062, 19062, -2646, -381, 386}, 29 | {90, 721, -3838, 10620, 24406, 2434, -2362, 767} 30 | }; 31 | 32 | static const int16_t kCoefficients44To32[4][9] = { 33 | {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, 34 | {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, 35 | {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, 36 | {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} 37 | }; 38 | 39 | // Resampling ratio: 2/3 40 | // input: int32_t (normalized, not saturated) :: size 3 * K 41 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K 42 | // K: number of blocks 43 | 44 | void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, 45 | int32_t K) 46 | { 47 | ///////////////////////////////////////////////////////////// 48 | // Filter operation: 49 | // 50 | // Perform resampling (3 input samples -> 2 output samples); 51 | // process in sub blocks of size 3 samples. 52 | int32_t tmp; 53 | int32_t m; 54 | 55 | for (m = 0; m < K; m++) 56 | { 57 | tmp = 1 << 14; 58 | tmp += kCoefficients48To32[0][0] * In[0]; 59 | tmp += kCoefficients48To32[0][1] * In[1]; 60 | tmp += kCoefficients48To32[0][2] * In[2]; 61 | tmp += kCoefficients48To32[0][3] * In[3]; 62 | tmp += kCoefficients48To32[0][4] * In[4]; 63 | tmp += kCoefficients48To32[0][5] * In[5]; 64 | tmp += kCoefficients48To32[0][6] * In[6]; 65 | tmp += kCoefficients48To32[0][7] * In[7]; 66 | Out[0] = tmp; 67 | 68 | tmp = 1 << 14; 69 | tmp += kCoefficients48To32[1][0] * In[1]; 70 | tmp += kCoefficients48To32[1][1] * In[2]; 71 | tmp += kCoefficients48To32[1][2] * In[3]; 72 | tmp += kCoefficients48To32[1][3] * In[4]; 73 | tmp += kCoefficients48To32[1][4] * In[5]; 74 | tmp += kCoefficients48To32[1][5] * In[6]; 75 | tmp += kCoefficients48To32[1][6] * In[7]; 76 | tmp += kCoefficients48To32[1][7] * In[8]; 77 | Out[1] = tmp; 78 | 79 | // update pointers 80 | In += 3; 81 | Out += 2; 82 | } 83 | } 84 | 85 | // Resampling ratio: 3/4 86 | // input: int32_t (normalized, not saturated) :: size 4 * K 87 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K 88 | // K: number of blocks 89 | 90 | void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, 91 | int32_t K) 92 | { 93 | ///////////////////////////////////////////////////////////// 94 | // Filter operation: 95 | // 96 | // Perform resampling (4 input samples -> 3 output samples); 97 | // process in sub blocks of size 4 samples. 98 | int32_t m; 99 | int32_t tmp; 100 | 101 | for (m = 0; m < K; m++) 102 | { 103 | tmp = 1 << 14; 104 | tmp += kCoefficients32To24[0][0] * In[0]; 105 | tmp += kCoefficients32To24[0][1] * In[1]; 106 | tmp += kCoefficients32To24[0][2] * In[2]; 107 | tmp += kCoefficients32To24[0][3] * In[3]; 108 | tmp += kCoefficients32To24[0][4] * In[4]; 109 | tmp += kCoefficients32To24[0][5] * In[5]; 110 | tmp += kCoefficients32To24[0][6] * In[6]; 111 | tmp += kCoefficients32To24[0][7] * In[7]; 112 | Out[0] = tmp; 113 | 114 | tmp = 1 << 14; 115 | tmp += kCoefficients32To24[1][0] * In[1]; 116 | tmp += kCoefficients32To24[1][1] * In[2]; 117 | tmp += kCoefficients32To24[1][2] * In[3]; 118 | tmp += kCoefficients32To24[1][3] * In[4]; 119 | tmp += kCoefficients32To24[1][4] * In[5]; 120 | tmp += kCoefficients32To24[1][5] * In[6]; 121 | tmp += kCoefficients32To24[1][6] * In[7]; 122 | tmp += kCoefficients32To24[1][7] * In[8]; 123 | Out[1] = tmp; 124 | 125 | tmp = 1 << 14; 126 | tmp += kCoefficients32To24[2][0] * In[2]; 127 | tmp += kCoefficients32To24[2][1] * In[3]; 128 | tmp += kCoefficients32To24[2][2] * In[4]; 129 | tmp += kCoefficients32To24[2][3] * In[5]; 130 | tmp += kCoefficients32To24[2][4] * In[6]; 131 | tmp += kCoefficients32To24[2][5] * In[7]; 132 | tmp += kCoefficients32To24[2][6] * In[8]; 133 | tmp += kCoefficients32To24[2][7] * In[9]; 134 | Out[2] = tmp; 135 | 136 | // update pointers 137 | In += 4; 138 | Out += 3; 139 | } 140 | } 141 | 142 | // 143 | // fractional resampling filters 144 | // Fout = 11/16 * Fin 145 | // Fout = 8/11 * Fin 146 | // 147 | 148 | // compute two inner-products and store them to output array 149 | static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, 150 | const int16_t *coef_ptr, int32_t *out1, 151 | int32_t *out2) 152 | { 153 | int32_t tmp1 = 16384; 154 | int32_t tmp2 = 16384; 155 | int16_t coef; 156 | 157 | coef = coef_ptr[0]; 158 | tmp1 += coef * in1[0]; 159 | tmp2 += coef * in2[-0]; 160 | 161 | coef = coef_ptr[1]; 162 | tmp1 += coef * in1[1]; 163 | tmp2 += coef * in2[-1]; 164 | 165 | coef = coef_ptr[2]; 166 | tmp1 += coef * in1[2]; 167 | tmp2 += coef * in2[-2]; 168 | 169 | coef = coef_ptr[3]; 170 | tmp1 += coef * in1[3]; 171 | tmp2 += coef * in2[-3]; 172 | 173 | coef = coef_ptr[4]; 174 | tmp1 += coef * in1[4]; 175 | tmp2 += coef * in2[-4]; 176 | 177 | coef = coef_ptr[5]; 178 | tmp1 += coef * in1[5]; 179 | tmp2 += coef * in2[-5]; 180 | 181 | coef = coef_ptr[6]; 182 | tmp1 += coef * in1[6]; 183 | tmp2 += coef * in2[-6]; 184 | 185 | coef = coef_ptr[7]; 186 | tmp1 += coef * in1[7]; 187 | tmp2 += coef * in2[-7]; 188 | 189 | coef = coef_ptr[8]; 190 | *out1 = tmp1 + coef * in1[8]; 191 | *out2 = tmp2 + coef * in2[-8]; 192 | } 193 | 194 | // Resampling ratio: 8/11 195 | // input: int32_t (normalized, not saturated) :: size 11 * K 196 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K 197 | // K: number of blocks 198 | 199 | void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, 200 | int32_t K) 201 | { 202 | ///////////////////////////////////////////////////////////// 203 | // Filter operation: 204 | // 205 | // Perform resampling (11 input samples -> 8 output samples); 206 | // process in sub blocks of size 11 samples. 207 | int32_t tmp; 208 | int32_t m; 209 | 210 | for (m = 0; m < K; m++) 211 | { 212 | tmp = 1 << 14; 213 | 214 | // first output sample 215 | Out[0] = ((int32_t)In[3] << 15) + tmp; 216 | 217 | // sum and accumulate filter coefficients and input samples 218 | tmp += kCoefficients44To32[3][0] * In[5]; 219 | tmp += kCoefficients44To32[3][1] * In[6]; 220 | tmp += kCoefficients44To32[3][2] * In[7]; 221 | tmp += kCoefficients44To32[3][3] * In[8]; 222 | tmp += kCoefficients44To32[3][4] * In[9]; 223 | tmp += kCoefficients44To32[3][5] * In[10]; 224 | tmp += kCoefficients44To32[3][6] * In[11]; 225 | tmp += kCoefficients44To32[3][7] * In[12]; 226 | tmp += kCoefficients44To32[3][8] * In[13]; 227 | Out[4] = tmp; 228 | 229 | // sum and accumulate filter coefficients and input samples 230 | WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); 231 | 232 | // sum and accumulate filter coefficients and input samples 233 | WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); 234 | 235 | // sum and accumulate filter coefficients and input samples 236 | WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); 237 | 238 | // update pointers 239 | In += 11; 240 | Out += 8; 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/spl_init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* The global function contained in this file initializes SPL function 12 | * pointers, currently only for ARM platforms. 13 | * 14 | * Some code came from common/rtcd.c in the WebM project. 15 | */ 16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 17 | #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" 18 | 19 | /* Declare function pointers. */ 20 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; 21 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; 22 | MaxValueW16 WebRtcSpl_MaxValueW16; 23 | MaxValueW32 WebRtcSpl_MaxValueW32; 24 | MinValueW16 WebRtcSpl_MinValueW16; 25 | MinValueW32 WebRtcSpl_MinValueW32; 26 | CrossCorrelation WebRtcSpl_CrossCorrelation; 27 | DownsampleFast WebRtcSpl_DownsampleFast; 28 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; 29 | 30 | #if (defined(WEBRTC_DETECT_ARM_NEON) || !defined(WEBRTC_ARCH_ARM_NEON)) && \ 31 | !defined(MIPS32_LE) 32 | /* Initialize function pointers to the generic C version. */ 33 | static void InitPointersToC() { 34 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; 35 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 36 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; 37 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; 38 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; 39 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; 40 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; 41 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; 42 | WebRtcSpl_ScaleAndAddVectorsWithRound = 43 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 44 | } 45 | #endif 46 | 47 | #if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON) 48 | /* Initialize function pointers to the Neon version. */ 49 | static void InitPointersToNeon() { 50 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; 51 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; 52 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; 53 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; 54 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; 55 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; 56 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon; 57 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; 58 | /* TODO(henrik.lundin): re-enable NEON when the crash from bug 3243 is 59 | understood. */ 60 | WebRtcSpl_ScaleAndAddVectorsWithRound = 61 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 62 | } 63 | #endif 64 | 65 | #if defined(MIPS32_LE) 66 | /* Initialize function pointers to the MIPS version. */ 67 | static void InitPointersToMIPS() { 68 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; 69 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; 70 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; 71 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; 72 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; 73 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; 74 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; 75 | #if defined(MIPS_DSP_R1_LE) 76 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; 77 | WebRtcSpl_ScaleAndAddVectorsWithRound = 78 | WebRtcSpl_ScaleAndAddVectorsWithRound_mips; 79 | #else 80 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 81 | WebRtcSpl_ScaleAndAddVectorsWithRound = 82 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 83 | #endif 84 | } 85 | #endif 86 | 87 | static void InitFunctionPointers(void) { 88 | #if defined(WEBRTC_DETECT_ARM_NEON) 89 | if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { 90 | InitPointersToNeon(); 91 | } else { 92 | InitPointersToC(); 93 | } 94 | #elif defined(WEBRTC_ARCH_ARM_NEON) 95 | InitPointersToNeon(); 96 | #elif defined(MIPS32_LE) 97 | InitPointersToMIPS(); 98 | #else 99 | InitPointersToC(); 100 | #endif /* WEBRTC_DETECT_ARM_NEON */ 101 | } 102 | 103 | #define WEBRTC_POSIX 104 | 105 | #if defined(WEBRTC_POSIX) 106 | #include 107 | 108 | static void once(void (*func)(void)) { 109 | static pthread_once_t lock = PTHREAD_ONCE_INIT; 110 | pthread_once(&lock, func); 111 | } 112 | 113 | #elif defined(_WIN32) 114 | #include 115 | 116 | static void once(void (*func)(void)) { 117 | /* Didn't use InitializeCriticalSection() since there's no race-free context 118 | * in which to execute it. 119 | * 120 | * TODO(kma): Change to different implementation (e.g. 121 | * InterlockedCompareExchangePointer) to avoid issues similar to 122 | * http://code.google.com/p/webm/issues/detail?id=467. 123 | */ 124 | static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0}; 125 | static int done = 0; 126 | 127 | EnterCriticalSection(&lock); 128 | if (!done) { 129 | func(); 130 | done = 1; 131 | } 132 | LeaveCriticalSection(&lock); 133 | } 134 | 135 | /* There's no fallback version as an #else block here to ensure thread safety. 136 | * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build 137 | * system should pick it up. 138 | */ 139 | #endif /* WEBRTC_POSIX */ 140 | 141 | 142 | void WebRtcSpl_Init() { 143 | once(InitFunctionPointers); 144 | } 145 | -------------------------------------------------------------------------------- /webrtc/common_audio/signal_processing/vector_scaling_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the functions 14 | * WebRtcSpl_VectorBitShiftW16() 15 | * WebRtcSpl_VectorBitShiftW32() 16 | * WebRtcSpl_VectorBitShiftW32ToW16() 17 | * WebRtcSpl_ScaleVector() 18 | * WebRtcSpl_ScaleVectorWithSat() 19 | * WebRtcSpl_ScaleAndAddVectors() 20 | * WebRtcSpl_ScaleAndAddVectorsWithRoundC() 21 | */ 22 | 23 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 24 | 25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, int16_t length, 26 | const int16_t *in, int16_t right_shifts) 27 | { 28 | int i; 29 | 30 | if (right_shifts > 0) 31 | { 32 | for (i = length; i > 0; i--) 33 | { 34 | (*res++) = ((*in++) >> right_shifts); 35 | } 36 | } else 37 | { 38 | for (i = length; i > 0; i--) 39 | { 40 | (*res++) = ((*in++) << (-right_shifts)); 41 | } 42 | } 43 | } 44 | 45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, 46 | int16_t vector_length, 47 | const int32_t *in_vector, 48 | int16_t right_shifts) 49 | { 50 | int i; 51 | 52 | if (right_shifts > 0) 53 | { 54 | for (i = vector_length; i > 0; i--) 55 | { 56 | (*out_vector++) = ((*in_vector++) >> right_shifts); 57 | } 58 | } else 59 | { 60 | for (i = vector_length; i > 0; i--) 61 | { 62 | (*out_vector++) = ((*in_vector++) << (-right_shifts)); 63 | } 64 | } 65 | } 66 | 67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, int length, 68 | const int32_t* in, int right_shifts) { 69 | int i; 70 | int32_t tmp_w32; 71 | 72 | if (right_shifts >= 0) { 73 | for (i = length; i > 0; i--) { 74 | tmp_w32 = (*in++) >> right_shifts; 75 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 76 | } 77 | } else { 78 | int left_shifts = -right_shifts; 79 | for (i = length; i > 0; i--) { 80 | tmp_w32 = (*in++) << left_shifts; 81 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 82 | } 83 | } 84 | } 85 | 86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, 87 | int16_t gain, int16_t in_vector_length, 88 | int16_t right_shifts) 89 | { 90 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 91 | int i; 92 | const int16_t *inptr; 93 | int16_t *outptr; 94 | 95 | inptr = in_vector; 96 | outptr = out_vector; 97 | 98 | for (i = 0; i < in_vector_length; i++) 99 | { 100 | (*outptr++) = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(*inptr++, gain, right_shifts); 101 | } 102 | } 103 | 104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, 105 | int16_t gain, int16_t in_vector_length, 106 | int16_t right_shifts) 107 | { 108 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 109 | int i; 110 | int32_t tmpW32; 111 | const int16_t *inptr; 112 | int16_t *outptr; 113 | 114 | inptr = in_vector; 115 | outptr = out_vector; 116 | 117 | for (i = 0; i < in_vector_length; i++) 118 | { 119 | tmpW32 = WEBRTC_SPL_MUL_16_16_RSFT(*inptr++, gain, right_shifts); 120 | (*outptr++) = WebRtcSpl_SatW32ToW16(tmpW32); 121 | } 122 | } 123 | 124 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, 125 | const int16_t *in2, int16_t gain2, int shift2, 126 | int16_t *out, int vector_length) 127 | { 128 | // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 129 | int i; 130 | const int16_t *in1ptr; 131 | const int16_t *in2ptr; 132 | int16_t *outptr; 133 | 134 | in1ptr = in1; 135 | in2ptr = in2; 136 | outptr = out; 137 | 138 | for (i = 0; i < vector_length; i++) 139 | { 140 | (*outptr++) = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(gain1, *in1ptr++, shift1) 141 | + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2); 142 | } 143 | } 144 | 145 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. 146 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, 147 | int16_t in_vector1_scale, 148 | const int16_t* in_vector2, 149 | int16_t in_vector2_scale, 150 | int right_shifts, 151 | int16_t* out_vector, 152 | int length) { 153 | int i = 0; 154 | int round_value = (1 << right_shifts) >> 1; 155 | 156 | if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || 157 | length <= 0 || right_shifts < 0) { 158 | return -1; 159 | } 160 | 161 | for (i = 0; i < length; i++) { 162 | out_vector[i] = (int16_t)(( 163 | WEBRTC_SPL_MUL_16_16(in_vector1[i], in_vector1_scale) 164 | + WEBRTC_SPL_MUL_16_16(in_vector2[i], in_vector2_scale) 165 | + round_value) >> right_shifts); 166 | } 167 | 168 | return 0; 169 | } 170 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/include/webrtc_vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file includes the VAD API calls. Specific function calls are given below. 14 | */ 15 | 16 | #ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 17 | #define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18 | 19 | #include "webrtc/typedefs.h" 20 | 21 | typedef struct WebRtcVadInst VadInst; 22 | 23 | #ifdef __cplusplus 24 | extern "C" { 25 | #endif 26 | 27 | // Creates an instance to the VAD structure. 28 | // 29 | // - handle [o] : Pointer to the VAD instance that should be created. 30 | // 31 | // returns : 0 - (OK), -1 - (Error) 32 | int WebRtcVad_Create(VadInst** handle); 33 | 34 | // Frees the dynamic memory of a specified VAD instance. 35 | // 36 | // - handle [i] : Pointer to VAD instance that should be freed. 37 | void WebRtcVad_Free(VadInst* handle); 38 | 39 | // Initializes a VAD instance. 40 | // 41 | // - handle [i/o] : Instance that should be initialized. 42 | // 43 | // returns : 0 - (OK), 44 | // -1 - (NULL pointer or Default mode could not be set). 45 | int WebRtcVad_Init(VadInst* handle); 46 | 47 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 48 | // restrictive in reporting speech. Put in other words the probability of being 49 | // speech when the VAD returns 1 is increased with increasing mode. As a 50 | // consequence also the missed detection rate goes up. 51 | // 52 | // - handle [i/o] : VAD instance. 53 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 54 | // 55 | // returns : 0 - (OK), 56 | // -1 - (NULL pointer, mode could not be set or the VAD instance 57 | // has not been initialized). 58 | int WebRtcVad_set_mode(VadInst* handle, int mode); 59 | 60 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates 61 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 62 | // 63 | // - handle [i/o] : VAD Instance. Needs to be initialized by 64 | // WebRtcVad_Init() before call. 65 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 66 | // - audio_frame [i] : Audio frame buffer. 67 | // - frame_length [i] : Length of audio frame buffer in number of samples. 68 | // 69 | // returns : 1 - (Active Voice), 70 | // 0 - (Non-active Voice), 71 | // -1 - (Error) 72 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 73 | int frame_length); 74 | 75 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, 76 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 77 | // 78 | // - rate [i] : Sampling frequency (Hz). 79 | // - frame_length [i] : Speech frame buffer length in number of samples. 80 | // 81 | // returns : 0 - (valid combination), -1 - (invalid combination) 82 | int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length); 83 | 84 | #ifdef __cplusplus 85 | } 86 | #endif 87 | 88 | #endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 89 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_core.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_core.h" 12 | 13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 14 | #include "webrtc/common_audio/vad/vad_filterbank.h" 15 | #include "webrtc/common_audio/vad/vad_gmm.h" 16 | #include "webrtc/common_audio/vad/vad_sp.h" 17 | #include "webrtc/typedefs.h" 18 | 19 | // Spectrum Weighting 20 | static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 }; 21 | static const int16_t kNoiseUpdateConst = 655; // Q15 22 | static const int16_t kSpeechUpdateConst = 6554; // Q15 23 | static const int16_t kBackEta = 154; // Q8 24 | // Minimum difference between the two models, Q5 25 | static const int16_t kMinimumDifference[kNumChannels] = { 26 | 544, 544, 576, 576, 576, 576 }; 27 | // Upper limit of mean value for speech model, Q7 28 | static const int16_t kMaximumSpeech[kNumChannels] = { 29 | 11392, 11392, 11520, 11520, 11520, 11520 }; 30 | // Minimum value for mean value 31 | static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 }; 32 | // Upper limit of mean value for noise model, Q7 33 | static const int16_t kMaximumNoise[kNumChannels] = { 34 | 9216, 9088, 8960, 8832, 8704, 8576 }; 35 | // Start values for the Gaussian models, Q7 36 | // Weights for the two Gaussians for the six channels (noise) 37 | static const int16_t kNoiseDataWeights[kTableSize] = { 38 | 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; 39 | // Weights for the two Gaussians for the six channels (speech) 40 | static const int16_t kSpeechDataWeights[kTableSize] = { 41 | 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; 42 | // Means for the two Gaussians for the six channels (noise) 43 | static const int16_t kNoiseDataMeans[kTableSize] = { 44 | 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; 45 | // Means for the two Gaussians for the six channels (speech) 46 | static const int16_t kSpeechDataMeans[kTableSize] = { 47 | 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 48 | }; 49 | // Stds for the two Gaussians for the six channels (noise) 50 | static const int16_t kNoiseDataStds[kTableSize] = { 51 | 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; 52 | // Stds for the two Gaussians for the six channels (speech) 53 | static const int16_t kSpeechDataStds[kTableSize] = { 54 | 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; 55 | 56 | // Constants used in GmmProbability(). 57 | // 58 | // Maximum number of counted speech (VAD = 1) frames in a row. 59 | static const int16_t kMaxSpeechFrames = 6; 60 | // Minimum standard deviation for both speech and noise. 61 | static const int16_t kMinStd = 384; 62 | 63 | // Constants in WebRtcVad_InitCore(). 64 | // Default aggressiveness mode. 65 | static const short kDefaultMode = 0; 66 | static const int kInitCheck = 42; 67 | 68 | // Constants used in WebRtcVad_set_mode_core(). 69 | // 70 | // Thresholds for different frame lengths (10 ms, 20 ms and 30 ms). 71 | // 72 | // Mode 0, Quality. 73 | static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 }; 74 | static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 }; 75 | static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 }; 76 | static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 }; 77 | // Mode 1, Low bitrate. 78 | static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 }; 79 | static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 }; 80 | static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 }; 81 | static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 }; 82 | // Mode 2, Aggressive. 83 | static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 }; 84 | static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 }; 85 | static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 }; 86 | static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 }; 87 | // Mode 3, Very aggressive. 88 | static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 }; 89 | static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 }; 90 | static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 }; 91 | static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; 92 | 93 | // Calculates the weighted average w.r.t. number of Gaussians. The |data| are 94 | // updated with an |offset| before averaging. 95 | // 96 | // - data [i/o] : Data to average. 97 | // - offset [i] : An offset added to |data|. 98 | // - weights [i] : Weights used for averaging. 99 | // 100 | // returns : The weighted average. 101 | static int32_t WeightedAverage(int16_t* data, int16_t offset, 102 | const int16_t* weights) { 103 | int k; 104 | int32_t weighted_average = 0; 105 | 106 | for (k = 0; k < kNumGaussians; k++) { 107 | data[k * kNumChannels] += offset; 108 | weighted_average += data[k * kNumChannels] * weights[k * kNumChannels]; 109 | } 110 | return weighted_average; 111 | } 112 | 113 | // Calculates the probabilities for both speech and background noise using 114 | // Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which 115 | // type of signal is most probable. 116 | // 117 | // - self [i/o] : Pointer to VAD instance 118 | // - features [i] : Feature vector of length |kNumChannels| 119 | // = log10(energy in frequency band) 120 | // - total_power [i] : Total power in audio frame. 121 | // - frame_length [i] : Number of input samples 122 | // 123 | // - returns : the VAD decision (0 - noise, 1 - speech). 124 | static int16_t GmmProbability(VadInstT* self, int16_t* features, 125 | int16_t total_power, int frame_length) { 126 | int channel, k; 127 | int16_t feature_minimum; 128 | int16_t h0, h1; 129 | int16_t log_likelihood_ratio; 130 | int16_t vadflag = 0; 131 | int16_t shifts_h0, shifts_h1; 132 | int16_t tmp_s16, tmp1_s16, tmp2_s16; 133 | int16_t diff; 134 | int gaussian; 135 | int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; 136 | int16_t delt, ndelt; 137 | int16_t maxspe, maxmu; 138 | int16_t deltaN[kTableSize], deltaS[kTableSize]; 139 | int16_t ngprvec[kTableSize] = { 0 }; // Conditional probability = 0. 140 | int16_t sgprvec[kTableSize] = { 0 }; // Conditional probability = 0. 141 | int32_t h0_test, h1_test; 142 | int32_t tmp1_s32, tmp2_s32; 143 | int32_t sum_log_likelihood_ratios = 0; 144 | int32_t noise_global_mean, speech_global_mean; 145 | int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians]; 146 | int16_t overhead1, overhead2, individualTest, totalTest; 147 | 148 | // Set various thresholds based on frame lengths (80, 160 or 240 samples). 149 | if (frame_length == 80) { 150 | overhead1 = self->over_hang_max_1[0]; 151 | overhead2 = self->over_hang_max_2[0]; 152 | individualTest = self->individual[0]; 153 | totalTest = self->total[0]; 154 | } else if (frame_length == 160) { 155 | overhead1 = self->over_hang_max_1[1]; 156 | overhead2 = self->over_hang_max_2[1]; 157 | individualTest = self->individual[1]; 158 | totalTest = self->total[1]; 159 | } else { 160 | overhead1 = self->over_hang_max_1[2]; 161 | overhead2 = self->over_hang_max_2[2]; 162 | individualTest = self->individual[2]; 163 | totalTest = self->total[2]; 164 | } 165 | 166 | if (total_power > kMinEnergy) { 167 | // The signal power of current frame is large enough for processing. The 168 | // processing consists of two parts: 169 | // 1) Calculating the likelihood of speech and thereby a VAD decision. 170 | // 2) Updating the underlying model, w.r.t., the decision made. 171 | 172 | // The detection scheme is an LRT with hypothesis 173 | // H0: Noise 174 | // H1: Speech 175 | // 176 | // We combine a global LRT with local tests, for each frequency sub-band, 177 | // here defined as |channel|. 178 | for (channel = 0; channel < kNumChannels; channel++) { 179 | // For each channel we model the probability with a GMM consisting of 180 | // |kNumGaussians|, with different means and standard deviations depending 181 | // on H0 or H1. 182 | h0_test = 0; 183 | h1_test = 0; 184 | for (k = 0; k < kNumGaussians; k++) { 185 | gaussian = channel + k * kNumChannels; 186 | // Probability under H0, that is, probability of frame being noise. 187 | // Value given in Q27 = Q7 * Q20. 188 | tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], 189 | self->noise_means[gaussian], 190 | self->noise_stds[gaussian], 191 | &deltaN[gaussian]); 192 | noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; 193 | h0_test += noise_probability[k]; // Q27 194 | 195 | // Probability under H1, that is, probability of frame being speech. 196 | // Value given in Q27 = Q7 * Q20. 197 | tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], 198 | self->speech_means[gaussian], 199 | self->speech_stds[gaussian], 200 | &deltaS[gaussian]); 201 | speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; 202 | h1_test += speech_probability[k]; // Q27 203 | } 204 | 205 | // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}). 206 | // Approximation: 207 | // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q) 208 | // = log2(h1_test) - log2(h0_test) 209 | // = log2(2^(31-shifts_h1)*(1+b1)) 210 | // - log2(2^(31-shifts_h0)*(1+b0)) 211 | // = shifts_h0 - shifts_h1 212 | // + log2(1+b1) - log2(1+b0) 213 | // ~= shifts_h0 - shifts_h1 214 | // 215 | // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1. 216 | // Further, b0 and b1 are independent and on the average the two terms 217 | // cancel. 218 | shifts_h0 = WebRtcSpl_NormW32(h0_test); 219 | shifts_h1 = WebRtcSpl_NormW32(h1_test); 220 | if (h0_test == 0) { 221 | shifts_h0 = 31; 222 | } 223 | if (h1_test == 0) { 224 | shifts_h1 = 31; 225 | } 226 | log_likelihood_ratio = shifts_h0 - shifts_h1; 227 | 228 | // Update |sum_log_likelihood_ratios| with spectrum weighting. This is 229 | // used for the global VAD decision. 230 | sum_log_likelihood_ratios += 231 | (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]); 232 | 233 | // Local VAD decision. 234 | if ((log_likelihood_ratio << 2) > individualTest) { 235 | vadflag = 1; 236 | } 237 | 238 | // TODO(bjornv): The conditional probabilities below are applied on the 239 | // hard coded number of Gaussians set to two. Find a way to generalize. 240 | // Calculate local noise probabilities used later when updating the GMM. 241 | h0 = (int16_t) (h0_test >> 12); // Q15 242 | if (h0 > 0) { 243 | // High probability of noise. Assign conditional probabilities for each 244 | // Gaussian in the GMM. 245 | tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 246 | ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 247 | ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; 248 | } else { 249 | // Low noise probability. Assign conditional probability 1 to the first 250 | // Gaussian and 0 to the rest (which is already set at initialization). 251 | ngprvec[channel] = 16384; 252 | } 253 | 254 | // Calculate local speech probabilities used later when updating the GMM. 255 | h1 = (int16_t) (h1_test >> 12); // Q15 256 | if (h1 > 0) { 257 | // High probability of speech. Assign conditional probabilities for each 258 | // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. 259 | tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 260 | sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 261 | sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; 262 | } 263 | } 264 | 265 | // Make a global VAD decision. 266 | vadflag |= (sum_log_likelihood_ratios >= totalTest); 267 | 268 | // Update the model parameters. 269 | maxspe = 12800; 270 | for (channel = 0; channel < kNumChannels; channel++) { 271 | 272 | // Get minimum value in past which is used for long term correction in Q4. 273 | feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); 274 | 275 | // Compute the "global" mean, that is the sum of the two means weighted. 276 | noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, 277 | &kNoiseDataWeights[channel]); 278 | tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8 279 | 280 | for (k = 0; k < kNumGaussians; k++) { 281 | gaussian = channel + k * kNumChannels; 282 | 283 | nmk = self->noise_means[gaussian]; 284 | smk = self->speech_means[gaussian]; 285 | nsk = self->noise_stds[gaussian]; 286 | ssk = self->speech_stds[gaussian]; 287 | 288 | // Update noise mean vector if the frame consists of noise only. 289 | nmk2 = nmk; 290 | if (!vadflag) { 291 | // deltaN = (x-mu)/sigma^2 292 | // ngprvec[k] = |noise_probability[k]| / 293 | // (|noise_probability[0]| + |noise_probability[1]|) 294 | 295 | // (Q14 * Q11 >> 11) = Q14. 296 | delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[gaussian], 297 | deltaN[gaussian], 298 | 11); 299 | // Q7 + (Q14 * Q15 >> 22) = Q7. 300 | nmk2 = nmk + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt, 301 | kNoiseUpdateConst, 302 | 22); 303 | } 304 | 305 | // Long term correction of the noise mean. 306 | // Q8 - Q8 = Q8. 307 | ndelt = (feature_minimum << 4) - tmp1_s16; 308 | // Q7 + (Q8 * Q8) >> 9 = Q7. 309 | nmk3 = nmk2 + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(ndelt, kBackEta, 9); 310 | 311 | // Control that the noise mean does not drift to much. 312 | tmp_s16 = (int16_t) ((k + 5) << 7); 313 | if (nmk3 < tmp_s16) { 314 | nmk3 = tmp_s16; 315 | } 316 | tmp_s16 = (int16_t) ((72 + k - channel) << 7); 317 | if (nmk3 > tmp_s16) { 318 | nmk3 = tmp_s16; 319 | } 320 | self->noise_means[gaussian] = nmk3; 321 | 322 | if (vadflag) { 323 | // Update speech mean vector: 324 | // |deltaS| = (x-mu)/sigma^2 325 | // sgprvec[k] = |speech_probability[k]| / 326 | // (|speech_probability[0]| + |speech_probability[1]|) 327 | 328 | // (Q14 * Q11) >> 11 = Q14. 329 | delt = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[gaussian], 330 | deltaS[gaussian], 331 | 11); 332 | // Q14 * Q15 >> 21 = Q8. 333 | tmp_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(delt, 334 | kSpeechUpdateConst, 335 | 21); 336 | // Q7 + (Q8 >> 1) = Q7. With rounding. 337 | smk2 = smk + ((tmp_s16 + 1) >> 1); 338 | 339 | // Control that the speech mean does not drift to much. 340 | maxmu = maxspe + 640; 341 | if (smk2 < kMinimumMean[k]) { 342 | smk2 = kMinimumMean[k]; 343 | } 344 | if (smk2 > maxmu) { 345 | smk2 = maxmu; 346 | } 347 | self->speech_means[gaussian] = smk2; // Q7. 348 | 349 | // (Q7 >> 3) = Q4. With rounding. 350 | tmp_s16 = ((smk + 4) >> 3); 351 | 352 | tmp_s16 = features[channel] - tmp_s16; // Q4 353 | // (Q11 * Q4 >> 3) = Q12. 354 | tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[gaussian], tmp_s16, 3); 355 | tmp2_s32 = tmp1_s32 - 4096; 356 | tmp_s16 = sgprvec[gaussian] >> 2; 357 | // (Q14 >> 2) * Q12 = Q24. 358 | tmp1_s32 = tmp_s16 * tmp2_s32; 359 | 360 | tmp2_s32 = tmp1_s32 >> 4; // Q20 361 | 362 | // 0.1 * Q20 / Q7 = Q13. 363 | if (tmp2_s32 > 0) { 364 | tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); 365 | } else { 366 | tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); 367 | tmp_s16 = -tmp_s16; 368 | } 369 | // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). 370 | // Note that division by 4 equals shift by 2, hence, 371 | // (Q13 >> 8) = (Q13 >> 6) / 4 = Q7. 372 | tmp_s16 += 128; // Rounding. 373 | ssk += (tmp_s16 >> 8); 374 | if (ssk < kMinStd) { 375 | ssk = kMinStd; 376 | } 377 | self->speech_stds[gaussian] = ssk; 378 | } else { 379 | // Update GMM variance vectors. 380 | // deltaN * (features[channel] - nmk) - 1 381 | // Q4 - (Q7 >> 3) = Q4. 382 | tmp_s16 = features[channel] - (nmk >> 3); 383 | // (Q11 * Q4 >> 3) = Q12. 384 | tmp1_s32 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[gaussian], tmp_s16, 3); 385 | tmp1_s32 -= 4096; 386 | 387 | // (Q14 >> 2) * Q12 = Q24. 388 | tmp_s16 = (ngprvec[gaussian] + 2) >> 2; 389 | tmp2_s32 = tmp_s16 * tmp1_s32; 390 | // Q20 * approx 0.001 (2^-10=0.0009766), hence, 391 | // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20. 392 | tmp1_s32 = tmp2_s32 >> 14; 393 | 394 | // Q20 / Q7 = Q13. 395 | if (tmp1_s32 > 0) { 396 | tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk); 397 | } else { 398 | tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk); 399 | tmp_s16 = -tmp_s16; 400 | } 401 | tmp_s16 += 32; // Rounding 402 | nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7. 403 | if (nsk < kMinStd) { 404 | nsk = kMinStd; 405 | } 406 | self->noise_stds[gaussian] = nsk; 407 | } 408 | } 409 | 410 | // Separate models if they are too close. 411 | // |noise_global_mean| in Q14 (= Q7 * Q7). 412 | noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, 413 | &kNoiseDataWeights[channel]); 414 | 415 | // |speech_global_mean| in Q14 (= Q7 * Q7). 416 | speech_global_mean = WeightedAverage(&self->speech_means[channel], 0, 417 | &kSpeechDataWeights[channel]); 418 | 419 | // |diff| = "global" speech mean - "global" noise mean. 420 | // (Q14 >> 9) - (Q14 >> 9) = Q5. 421 | diff = (int16_t) (speech_global_mean >> 9) - 422 | (int16_t) (noise_global_mean >> 9); 423 | if (diff < kMinimumDifference[channel]) { 424 | tmp_s16 = kMinimumDifference[channel] - diff; 425 | 426 | // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7. 427 | // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7. 428 | tmp1_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(13, tmp_s16, 2); 429 | tmp2_s16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(3, tmp_s16, 2); 430 | 431 | // Move Gaussian means for speech model by |tmp1_s16| and update 432 | // |speech_global_mean|. Note that |self->speech_means[channel]| is 433 | // changed after the call. 434 | speech_global_mean = WeightedAverage(&self->speech_means[channel], 435 | tmp1_s16, 436 | &kSpeechDataWeights[channel]); 437 | 438 | // Move Gaussian means for noise model by -|tmp2_s16| and update 439 | // |noise_global_mean|. Note that |self->noise_means[channel]| is 440 | // changed after the call. 441 | noise_global_mean = WeightedAverage(&self->noise_means[channel], 442 | -tmp2_s16, 443 | &kNoiseDataWeights[channel]); 444 | } 445 | 446 | // Control that the speech & noise means do not drift to much. 447 | maxspe = kMaximumSpeech[channel]; 448 | tmp2_s16 = (int16_t) (speech_global_mean >> 7); 449 | if (tmp2_s16 > maxspe) { 450 | // Upper limit of speech model. 451 | tmp2_s16 -= maxspe; 452 | 453 | for (k = 0; k < kNumGaussians; k++) { 454 | self->speech_means[channel + k * kNumChannels] -= tmp2_s16; 455 | } 456 | } 457 | 458 | tmp2_s16 = (int16_t) (noise_global_mean >> 7); 459 | if (tmp2_s16 > kMaximumNoise[channel]) { 460 | tmp2_s16 -= kMaximumNoise[channel]; 461 | 462 | for (k = 0; k < kNumGaussians; k++) { 463 | self->noise_means[channel + k * kNumChannels] -= tmp2_s16; 464 | } 465 | } 466 | } 467 | self->frame_counter++; 468 | } 469 | 470 | // Smooth with respect to transition hysteresis. 471 | if (!vadflag) { 472 | if (self->over_hang > 0) { 473 | vadflag = 2 + self->over_hang; 474 | self->over_hang--; 475 | } 476 | self->num_of_speech = 0; 477 | } else { 478 | self->num_of_speech++; 479 | if (self->num_of_speech > kMaxSpeechFrames) { 480 | self->num_of_speech = kMaxSpeechFrames; 481 | self->over_hang = overhead2; 482 | } else { 483 | self->over_hang = overhead1; 484 | } 485 | } 486 | return vadflag; 487 | } 488 | 489 | // Initialize the VAD. Set aggressiveness mode to default value. 490 | int WebRtcVad_InitCore(VadInstT* self) { 491 | int i; 492 | 493 | if (self == NULL) { 494 | return -1; 495 | } 496 | 497 | // Initialization of general struct variables. 498 | self->vad = 1; // Speech active (=1). 499 | self->frame_counter = 0; 500 | self->over_hang = 0; 501 | self->num_of_speech = 0; 502 | 503 | // Initialization of downsampling filter state. 504 | memset(self->downsampling_filter_states, 0, 505 | sizeof(self->downsampling_filter_states)); 506 | 507 | // Initialization of 48 to 8 kHz downsampling. 508 | WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8); 509 | 510 | // Read initial PDF parameters. 511 | for (i = 0; i < kTableSize; i++) { 512 | self->noise_means[i] = kNoiseDataMeans[i]; 513 | self->speech_means[i] = kSpeechDataMeans[i]; 514 | self->noise_stds[i] = kNoiseDataStds[i]; 515 | self->speech_stds[i] = kSpeechDataStds[i]; 516 | } 517 | 518 | // Initialize Index and Minimum value vectors. 519 | for (i = 0; i < 16 * kNumChannels; i++) { 520 | self->low_value_vector[i] = 10000; 521 | self->index_vector[i] = 0; 522 | } 523 | 524 | // Initialize splitting filter states. 525 | memset(self->upper_state, 0, sizeof(self->upper_state)); 526 | memset(self->lower_state, 0, sizeof(self->lower_state)); 527 | 528 | // Initialize high pass filter states. 529 | memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state)); 530 | 531 | // Initialize mean value memory, for WebRtcVad_FindMinimum(). 532 | for (i = 0; i < kNumChannels; i++) { 533 | self->mean_value[i] = 1600; 534 | } 535 | 536 | // Set aggressiveness mode to default (=|kDefaultMode|). 537 | if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) { 538 | return -1; 539 | } 540 | 541 | self->init_flag = kInitCheck; 542 | 543 | return 0; 544 | } 545 | 546 | // Set aggressiveness mode 547 | int WebRtcVad_set_mode_core(VadInstT* self, int mode) { 548 | int return_value = 0; 549 | 550 | switch (mode) { 551 | case 0: 552 | // Quality mode. 553 | memcpy(self->over_hang_max_1, kOverHangMax1Q, 554 | sizeof(self->over_hang_max_1)); 555 | memcpy(self->over_hang_max_2, kOverHangMax2Q, 556 | sizeof(self->over_hang_max_2)); 557 | memcpy(self->individual, kLocalThresholdQ, 558 | sizeof(self->individual)); 559 | memcpy(self->total, kGlobalThresholdQ, 560 | sizeof(self->total)); 561 | break; 562 | case 1: 563 | // Low bitrate mode. 564 | memcpy(self->over_hang_max_1, kOverHangMax1LBR, 565 | sizeof(self->over_hang_max_1)); 566 | memcpy(self->over_hang_max_2, kOverHangMax2LBR, 567 | sizeof(self->over_hang_max_2)); 568 | memcpy(self->individual, kLocalThresholdLBR, 569 | sizeof(self->individual)); 570 | memcpy(self->total, kGlobalThresholdLBR, 571 | sizeof(self->total)); 572 | break; 573 | case 2: 574 | // Aggressive mode. 575 | memcpy(self->over_hang_max_1, kOverHangMax1AGG, 576 | sizeof(self->over_hang_max_1)); 577 | memcpy(self->over_hang_max_2, kOverHangMax2AGG, 578 | sizeof(self->over_hang_max_2)); 579 | memcpy(self->individual, kLocalThresholdAGG, 580 | sizeof(self->individual)); 581 | memcpy(self->total, kGlobalThresholdAGG, 582 | sizeof(self->total)); 583 | break; 584 | case 3: 585 | // Very aggressive mode. 586 | memcpy(self->over_hang_max_1, kOverHangMax1VAG, 587 | sizeof(self->over_hang_max_1)); 588 | memcpy(self->over_hang_max_2, kOverHangMax2VAG, 589 | sizeof(self->over_hang_max_2)); 590 | memcpy(self->individual, kLocalThresholdVAG, 591 | sizeof(self->individual)); 592 | memcpy(self->total, kGlobalThresholdVAG, 593 | sizeof(self->total)); 594 | break; 595 | default: 596 | return_value = -1; 597 | break; 598 | } 599 | 600 | return return_value; 601 | } 602 | 603 | // Calculate VAD decision by first extracting feature values and then calculate 604 | // probability for both speech and background noise. 605 | 606 | int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, 607 | int frame_length) { 608 | int vad; 609 | int i; 610 | int16_t speech_nb[240]; // 30 ms in 8 kHz. 611 | // |tmp_mem| is a temporary memory used by resample function, length is 612 | // frame length in 10 ms (480 samples) + 256 extra. 613 | int32_t tmp_mem[480 + 256] = { 0 }; 614 | const int kFrameLen10ms48khz = 480; 615 | const int kFrameLen10ms8khz = 80; 616 | int num_10ms_frames = frame_length / kFrameLen10ms48khz; 617 | 618 | for (i = 0; i < num_10ms_frames; i++) { 619 | WebRtcSpl_Resample48khzTo8khz(speech_frame, 620 | &speech_nb[i * kFrameLen10ms8khz], 621 | &inst->state_48_to_8, 622 | tmp_mem); 623 | } 624 | 625 | // Do VAD on an 8 kHz signal 626 | vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); 627 | 628 | return vad; 629 | } 630 | 631 | int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, 632 | int frame_length) 633 | { 634 | int len, vad; 635 | int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) 636 | int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) 637 | 638 | 639 | // Downsample signal 32->16->8 before doing VAD 640 | WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), 641 | frame_length); 642 | len = frame_length / 2; 643 | 644 | WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); 645 | len /= 2; 646 | 647 | // Do VAD on an 8 kHz signal 648 | vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); 649 | 650 | return vad; 651 | } 652 | 653 | int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, 654 | int frame_length) 655 | { 656 | int len, vad; 657 | int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) 658 | 659 | // Wideband: Downsample signal before doing VAD 660 | WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, 661 | frame_length); 662 | 663 | len = frame_length / 2; 664 | vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); 665 | 666 | return vad; 667 | } 668 | 669 | int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, 670 | int frame_length) 671 | { 672 | int16_t feature_vector[kNumChannels], total_power; 673 | 674 | // Get power in the bands 675 | total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, 676 | feature_vector); 677 | 678 | // Make a VAD 679 | inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); 680 | 681 | return inst->vad; 682 | } 683 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file includes the descriptions of the core VAD calls. 14 | */ 15 | 16 | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 17 | #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 18 | 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 | #include "webrtc/typedefs.h" 21 | 22 | enum { kNumChannels = 6 }; // Number of frequency bands (named channels). 23 | enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. 24 | enum { kTableSize = kNumChannels * kNumGaussians }; 25 | enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. 26 | 27 | typedef struct VadInstT_ 28 | { 29 | 30 | int vad; 31 | int32_t downsampling_filter_states[4]; 32 | WebRtcSpl_State48khzTo8khz state_48_to_8; 33 | int16_t noise_means[kTableSize]; 34 | int16_t speech_means[kTableSize]; 35 | int16_t noise_stds[kTableSize]; 36 | int16_t speech_stds[kTableSize]; 37 | // TODO(bjornv): Change to |frame_count|. 38 | int32_t frame_counter; 39 | int16_t over_hang; // Over Hang 40 | int16_t num_of_speech; 41 | // TODO(bjornv): Change to |age_vector|. 42 | int16_t index_vector[16 * kNumChannels]; 43 | int16_t low_value_vector[16 * kNumChannels]; 44 | // TODO(bjornv): Change to |median|. 45 | int16_t mean_value[kNumChannels]; 46 | int16_t upper_state[5]; 47 | int16_t lower_state[5]; 48 | int16_t hp_filter_state[4]; 49 | int16_t over_hang_max_1[3]; 50 | int16_t over_hang_max_2[3]; 51 | int16_t individual[3]; 52 | int16_t total[3]; 53 | 54 | int init_flag; 55 | 56 | } VadInstT; 57 | 58 | // Initializes the core VAD component. The default aggressiveness mode is 59 | // controlled by |kDefaultMode| in vad_core.c. 60 | // 61 | // - self [i/o] : Instance that should be initialized 62 | // 63 | // returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be 64 | // set) 65 | int WebRtcVad_InitCore(VadInstT* self); 66 | 67 | /**************************************************************************** 68 | * WebRtcVad_set_mode_core(...) 69 | * 70 | * This function changes the VAD settings 71 | * 72 | * Input: 73 | * - inst : VAD instance 74 | * - mode : Aggressiveness degree 75 | * 0 (High quality) - 3 (Highly aggressive) 76 | * 77 | * Output: 78 | * - inst : Changed instance 79 | * 80 | * Return value : 0 - Ok 81 | * -1 - Error 82 | */ 83 | 84 | int WebRtcVad_set_mode_core(VadInstT* self, int mode); 85 | 86 | /**************************************************************************** 87 | * WebRtcVad_CalcVad48khz(...) 88 | * WebRtcVad_CalcVad32khz(...) 89 | * WebRtcVad_CalcVad16khz(...) 90 | * WebRtcVad_CalcVad8khz(...) 91 | * 92 | * Calculate probability for active speech and make VAD decision. 93 | * 94 | * Input: 95 | * - inst : Instance that should be initialized 96 | * - speech_frame : Input speech frame 97 | * - frame_length : Number of input samples 98 | * 99 | * Output: 100 | * - inst : Updated filter states etc. 101 | * 102 | * Return value : VAD decision 103 | * 0 - No active speech 104 | * 1-6 - Active speech 105 | */ 106 | int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, 107 | int frame_length); 108 | int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, 109 | int frame_length); 110 | int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, 111 | int frame_length); 112 | int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, 113 | int frame_length); 114 | 115 | #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ 116 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_filterbank.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_filterbank.h" 12 | 13 | #include 14 | 15 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 | #include "webrtc/typedefs.h" 17 | 18 | // Constants used in LogOfEnergy(). 19 | static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. 20 | static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 21 | 22 | // Coefficients used by HighPassFilter, Q14. 23 | static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; 24 | static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; 25 | 26 | // Allpass filter coefficients, upper and lower, in Q15. 27 | // Upper: 0.64, Lower: 0.17 28 | static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; 29 | 30 | // Adjustment for division with two in SplitFilter. 31 | static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; 32 | 33 | // High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is 34 | // sampled at 500 Hz. 35 | // 36 | // - data_in [i] : Input audio data sampled at 500 Hz. 37 | // - data_length [i] : Length of input and output data. 38 | // - filter_state [i/o] : State of the filter. 39 | // - data_out [o] : Output audio data in the frequency interval 40 | // 80 - 250 Hz. 41 | static void HighPassFilter(const int16_t* data_in, int data_length, 42 | int16_t* filter_state, int16_t* data_out) { 43 | int i; 44 | const int16_t* in_ptr = data_in; 45 | int16_t* out_ptr = data_out; 46 | int32_t tmp32 = 0; 47 | 48 | 49 | // The sum of the absolute values of the impulse response: 50 | // The zero/pole-filter has a max amplification of a single sample of: 1.4546 51 | // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 52 | // The all-zero section has a max amplification of a single sample of: 1.6189 53 | // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 54 | // The all-pole section has a max amplification of a single sample of: 1.9931 55 | // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 56 | 57 | for (i = 0; i < data_length; i++) { 58 | // All-zero section (filter coefficients in Q14). 59 | tmp32 = kHpZeroCoefs[0] * *in_ptr; 60 | tmp32 += kHpZeroCoefs[1] * filter_state[0]; 61 | tmp32 += kHpZeroCoefs[2] * filter_state[1]; 62 | filter_state[1] = filter_state[0]; 63 | filter_state[0] = *in_ptr++; 64 | 65 | // All-pole section (filter coefficients in Q14). 66 | tmp32 -= kHpPoleCoefs[1] * filter_state[2]; 67 | tmp32 -= kHpPoleCoefs[2] * filter_state[3]; 68 | filter_state[3] = filter_state[2]; 69 | filter_state[2] = (int16_t) (tmp32 >> 14); 70 | *out_ptr++ = filter_state[2]; 71 | } 72 | } 73 | 74 | // All pass filtering of |data_in|, used before splitting the signal into two 75 | // frequency bands (low pass vs high pass). 76 | // Note that |data_in| and |data_out| can NOT correspond to the same address. 77 | // 78 | // - data_in [i] : Input audio signal given in Q0. 79 | // - data_length [i] : Length of input and output data. 80 | // - filter_coefficient [i] : Given in Q15. 81 | // - filter_state [i/o] : State of the filter given in Q(-1). 82 | // - data_out [o] : Output audio signal given in Q(-1). 83 | static void AllPassFilter(const int16_t* data_in, int data_length, 84 | int16_t filter_coefficient, int16_t* filter_state, 85 | int16_t* data_out) { 86 | // The filter can only cause overflow (in the w16 output variable) 87 | // if more than 4 consecutive input numbers are of maximum value and 88 | // has the the same sign as the impulse responses first taps. 89 | // First 6 taps of the impulse response: 90 | // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 91 | 92 | int i; 93 | int16_t tmp16 = 0; 94 | int32_t tmp32 = 0; 95 | int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15 96 | 97 | for (i = 0; i < data_length; i++) { 98 | tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficient, *data_in); 99 | tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) 100 | *data_out++ = tmp16; 101 | state32 = (((int32_t) (*data_in)) << 14); // Q14 102 | state32 -= WEBRTC_SPL_MUL_16_16(filter_coefficient, tmp16); // Q14 103 | state32 <<= 1; // Q15. 104 | data_in += 2; 105 | } 106 | 107 | *filter_state = (int16_t) (state32 >> 16); // Q(-1) 108 | } 109 | 110 | // Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to 111 | // an upper (high pass) part and a lower (low pass) part respectively. 112 | // 113 | // - data_in [i] : Input audio data to be split into two frequency bands. 114 | // - data_length [i] : Length of |data_in|. 115 | // - upper_state [i/o] : State of the upper filter, given in Q(-1). 116 | // - lower_state [i/o] : State of the lower filter, given in Q(-1). 117 | // - hp_data_out [o] : Output audio data of the upper half of the spectrum. 118 | // The length is |data_length| / 2. 119 | // - lp_data_out [o] : Output audio data of the lower half of the spectrum. 120 | // The length is |data_length| / 2. 121 | static void SplitFilter(const int16_t* data_in, int data_length, 122 | int16_t* upper_state, int16_t* lower_state, 123 | int16_t* hp_data_out, int16_t* lp_data_out) { 124 | int i; 125 | int half_length = data_length >> 1; // Downsampling by 2. 126 | int16_t tmp_out; 127 | 128 | // All-pass filtering upper branch. 129 | AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, 130 | hp_data_out); 131 | 132 | // All-pass filtering lower branch. 133 | AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, 134 | lp_data_out); 135 | 136 | // Make LP and HP signals. 137 | for (i = 0; i < half_length; i++) { 138 | tmp_out = *hp_data_out; 139 | *hp_data_out++ -= *lp_data_out; 140 | *lp_data_out++ += tmp_out; 141 | } 142 | } 143 | 144 | // Calculates the energy of |data_in| in dB, and also updates an overall 145 | // |total_energy| if necessary. 146 | // 147 | // - data_in [i] : Input audio data for energy calculation. 148 | // - data_length [i] : Length of input data. 149 | // - offset [i] : Offset value added to |log_energy|. 150 | // - total_energy [i/o] : An external energy updated with the energy of 151 | // |data_in|. 152 | // NOTE: |total_energy| is only updated if 153 | // |total_energy| <= |kMinEnergy|. 154 | // - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. 155 | static void LogOfEnergy(const int16_t* data_in, int data_length, 156 | int16_t offset, int16_t* total_energy, 157 | int16_t* log_energy) { 158 | // |tot_rshifts| accumulates the number of right shifts performed on |energy|. 159 | int tot_rshifts = 0; 160 | // The |energy| will be normalized to 15 bits. We use unsigned integer because 161 | // we eventually will mask out the fractional part. 162 | uint32_t energy = 0; 163 | 164 | assert(data_in != NULL); 165 | assert(data_length > 0); 166 | 167 | energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, 168 | &tot_rshifts); 169 | 170 | if (energy != 0) { 171 | // By construction, normalizing to 15 bits is equivalent with 17 leading 172 | // zeros of an unsigned 32 bit value. 173 | int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy); 174 | // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is 175 | // (14 << 10), which is what we initialize |log2_energy| with. For a more 176 | // detailed derivations, see below. 177 | int16_t log2_energy = kLogEnergyIntPart; 178 | 179 | tot_rshifts += normalizing_rshifts; 180 | // Normalize |energy| to 15 bits. 181 | // |tot_rshifts| is now the total number of right shifts performed on 182 | // |energy| after normalization. This means that |energy| is in 183 | // Q(-tot_rshifts). 184 | if (normalizing_rshifts < 0) { 185 | energy <<= -normalizing_rshifts; 186 | } else { 187 | energy >>= normalizing_rshifts; 188 | } 189 | 190 | // Calculate the energy of |data_in| in dB, in Q4. 191 | // 192 | // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") = 193 | // 160 * log10(|energy| * 2^|tot_rshifts|) = 194 | // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) = 195 | // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) = 196 | // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) = 197 | // |kLogConst| * (|log2_energy| + |tot_rshifts|) 198 | // 199 | // We know by construction that |energy| is normalized to 15 bits. Hence, 200 | // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15. 201 | // Further, we'd like |log2_energy| in Q10 202 | // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) = 203 | // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) = 204 | // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~= 205 | // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) = 206 | // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4) 207 | // 208 | // Note that frac_Q15 = (|energy| & 0x00003FFF) 209 | 210 | // Calculate and add the fractional part to |log2_energy|. 211 | log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); 212 | 213 | // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0. 214 | // Note that we in our derivation above have accounted for an output in Q4. 215 | *log_energy = (int16_t) (WEBRTC_SPL_MUL_16_16_RSFT( 216 | kLogConst, log2_energy, 19) + 217 | WEBRTC_SPL_MUL_16_16_RSFT(tot_rshifts, kLogConst, 9)); 218 | 219 | if (*log_energy < 0) { 220 | *log_energy = 0; 221 | } 222 | } else { 223 | *log_energy = offset; 224 | return; 225 | } 226 | 227 | *log_energy += offset; 228 | 229 | // Update the approximate |total_energy| with the energy of |data_in|, if 230 | // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an 231 | // energy indicator in WebRtcVad_GmmProbability() in vad_core.c. 232 | if (*total_energy <= kMinEnergy) { 233 | if (tot_rshifts >= 0) { 234 | // We know by construction that the |energy| > |kMinEnergy| in Q0, so add 235 | // an arbitrary value such that |total_energy| exceeds |kMinEnergy|. 236 | *total_energy += kMinEnergy + 1; 237 | } else { 238 | // By construction |energy| is represented by 15 bits, hence any number of 239 | // right shifted |energy| will fit in an int16_t. In addition, adding the 240 | // value to |total_energy| is wrap around safe as long as 241 | // |kMinEnergy| < 8192. 242 | *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. 243 | } 244 | } 245 | } 246 | 247 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, 248 | int data_length, int16_t* features) { 249 | int16_t total_energy = 0; 250 | // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to 251 | // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will 252 | // have at most 120 samples after the first split and at most 60 samples after 253 | // the second split. 254 | int16_t hp_120[120], lp_120[120]; 255 | int16_t hp_60[60], lp_60[60]; 256 | const int half_data_length = data_length >> 1; 257 | int length = half_data_length; // |data_length| / 2, corresponds to 258 | // bandwidth = 2000 Hz after downsampling. 259 | 260 | // Initialize variables for the first SplitFilter(). 261 | int frequency_band = 0; 262 | const int16_t* in_ptr = data_in; // [0 - 4000] Hz. 263 | int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. 264 | int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. 265 | 266 | assert(data_length >= 0); 267 | assert(data_length <= 240); 268 | assert(4 < kNumChannels - 1); // Checking maximum |frequency_band|. 269 | 270 | // Split at 2000 Hz and downsample. 271 | SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], 272 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 273 | 274 | // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. 275 | frequency_band = 1; 276 | in_ptr = hp_120; // [2000 - 4000] Hz. 277 | hp_out_ptr = hp_60; // [3000 - 4000] Hz. 278 | lp_out_ptr = lp_60; // [2000 - 3000] Hz. 279 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 280 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 281 | 282 | // Energy in 3000 Hz - 4000 Hz. 283 | length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. 284 | 285 | LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]); 286 | 287 | // Energy in 2000 Hz - 3000 Hz. 288 | LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]); 289 | 290 | // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. 291 | frequency_band = 2; 292 | in_ptr = lp_120; // [0 - 2000] Hz. 293 | hp_out_ptr = hp_60; // [1000 - 2000] Hz. 294 | lp_out_ptr = lp_60; // [0 - 1000] Hz. 295 | length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz. 296 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 297 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 298 | 299 | // Energy in 1000 Hz - 2000 Hz. 300 | length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. 301 | LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]); 302 | 303 | // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. 304 | frequency_band = 3; 305 | in_ptr = lp_60; // [0 - 1000] Hz. 306 | hp_out_ptr = hp_120; // [500 - 1000] Hz. 307 | lp_out_ptr = lp_120; // [0 - 500] Hz. 308 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 309 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 310 | 311 | // Energy in 500 Hz - 1000 Hz. 312 | length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz. 313 | LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]); 314 | 315 | // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. 316 | frequency_band = 4; 317 | in_ptr = lp_120; // [0 - 500] Hz. 318 | hp_out_ptr = hp_60; // [250 - 500] Hz. 319 | lp_out_ptr = lp_60; // [0 - 250] Hz. 320 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 321 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 322 | 323 | // Energy in 250 Hz - 500 Hz. 324 | length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz. 325 | LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); 326 | 327 | // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. 328 | HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); 329 | 330 | // Energy in 80 Hz - 250 Hz. 331 | LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); 332 | 333 | return total_energy; 334 | } 335 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_filterbank.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file includes feature calculating functionality used in vad_core.c. 13 | */ 14 | 15 | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 16 | #define WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 17 | 18 | #include "webrtc/common_audio/vad/vad_core.h" 19 | #include "webrtc/typedefs.h" 20 | 21 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the 22 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD: 23 | // 80 Hz - 250 Hz 24 | // 250 Hz - 500 Hz 25 | // 500 Hz - 1000 Hz 26 | // 1000 Hz - 2000 Hz 27 | // 2000 Hz - 3000 Hz 28 | // 3000 Hz - 4000 Hz 29 | // 30 | // The values are given in Q4 and written to |features|. Further, an approximate 31 | // overall energy is returned. The return value is used in 32 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above 33 | // the threshold |kMinEnergy|. 34 | // 35 | // - self [i/o] : State information of the VAD. 36 | // - data_in [i] : Input audio data, for feature extraction. 37 | // - data_length [i] : Audio data size, in number of samples. 38 | // - features [o] : 10 * log10(energy in each frequency band), Q4. 39 | // - returns : Total energy of the signal (NOTE! This value is not 40 | // exact. It is only used in a comparison.) 41 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, 42 | int data_length, int16_t* features); 43 | 44 | #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 45 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_gmm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_gmm.h" 12 | 13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 14 | #include "webrtc/typedefs.h" 15 | 16 | static const int32_t kCompVar = 22005; 17 | static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. 18 | 19 | // For a normal distribution, the probability of |input| is calculated and 20 | // returned (in Q20). The formula for normal distributed probability is 21 | // 22 | // 1 / s * exp(-(x - m)^2 / (2 * s^2)) 23 | // 24 | // where the parameters are given in the following Q domains: 25 | // m = |mean| (Q7) 26 | // s = |std| (Q7) 27 | // x = |input| (Q4) 28 | // in addition to the probability we output |delta| (in Q11) used when updating 29 | // the noise/speech model. 30 | int32_t WebRtcVad_GaussianProbability(int16_t input, 31 | int16_t mean, 32 | int16_t std, 33 | int16_t* delta) { 34 | int16_t tmp16, inv_std, inv_std2, exp_value = 0; 35 | int32_t tmp32; 36 | 37 | // Calculate |inv_std| = 1 / s, in Q10. 38 | // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. 39 | // Q-domain: Q17 / Q7 = Q10. 40 | tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); 41 | inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); 42 | 43 | // Calculate |inv_std2| = 1 / s^2, in Q14. 44 | tmp16 = (inv_std >> 2); // Q10 -> Q8. 45 | // Q-domain: (Q8 * Q8) >> 2 = Q14. 46 | inv_std2 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); 47 | // TODO(bjornv): Investigate if changing to 48 | // |inv_std2| = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(|inv_std|, |inv_std|, 6); 49 | // gives better accuracy. 50 | 51 | tmp16 = (input << 3); // Q4 -> Q7 52 | tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 53 | 54 | // To be used later, when updating noise/speech model. 55 | // |delta| = (x - m) / s^2, in Q11. 56 | // Q-domain: (Q14 * Q7) >> 10 = Q11. 57 | *delta = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(inv_std2, tmp16, 10); 58 | 59 | // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing 60 | // division by two with one shift. 61 | // Q-domain: (Q11 * Q7) >> 8 = Q10. 62 | tmp32 = WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); 63 | 64 | // If the exponent is small enough to give a non-zero probability we calculate 65 | // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) 66 | // ~= exp2(-log2(exp(1)) * |tmp32|). 67 | if (tmp32 < kCompVar) { 68 | // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. 69 | // Q-domain: (Q12 * Q10) >> 12 = Q10. 70 | tmp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLog2Exp, (int16_t) tmp32, 12); 71 | tmp16 = -tmp16; 72 | exp_value = (0x0400 | (tmp16 & 0x03FF)); 73 | tmp16 ^= 0xFFFF; 74 | tmp16 >>= 10; 75 | tmp16 += 1; 76 | // Get |exp_value| = exp(-|tmp32|) in Q10. 77 | exp_value >>= tmp16; 78 | } 79 | 80 | // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. 81 | // Q-domain: Q10 * Q10 = Q20. 82 | return inv_std * exp_value; 83 | } 84 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_gmm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Gaussian probability calculations internally used in vad_core.c. 12 | 13 | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ 14 | #define WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ 15 | 16 | #include "webrtc/typedefs.h" 17 | 18 | // Calculates the probability for |input|, given that |input| comes from a 19 | // normal distribution with mean and standard deviation (|mean|, |std|). 20 | // 21 | // Inputs: 22 | // - input : input sample in Q4. 23 | // - mean : mean input in the statistical model, Q7. 24 | // - std : standard deviation, Q7. 25 | // 26 | // Output: 27 | // 28 | // - delta : input used when updating the model, Q11. 29 | // |delta| = (|input| - |mean|) / |std|^2. 30 | // 31 | // Return: 32 | // (probability for |input|) = 33 | // 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); 34 | int32_t WebRtcVad_GaussianProbability(int16_t input, 35 | int16_t mean, 36 | int16_t std, 37 | int16_t* delta); 38 | 39 | #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ 40 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_sp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/vad_sp.h" 12 | 13 | #include 14 | 15 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 16 | #include "webrtc/common_audio/vad/vad_core.h" 17 | #include "webrtc/typedefs.h" 18 | 19 | // Allpass filter coefficients, upper and lower, in Q13. 20 | // Upper: 0.64, Lower: 0.17. 21 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. 22 | static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. 23 | static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. 24 | 25 | // TODO(bjornv): Move this function to vad_filterbank.c. 26 | // Downsampling filter based on splitting filter and allpass functions. 27 | void WebRtcVad_Downsampling(const int16_t* signal_in, 28 | int16_t* signal_out, 29 | int32_t* filter_state, 30 | int in_length) { 31 | int16_t tmp16_1 = 0, tmp16_2 = 0; 32 | int32_t tmp32_1 = filter_state[0]; 33 | int32_t tmp32_2 = filter_state[1]; 34 | int n = 0; 35 | int half_length = (in_length >> 1); // Downsampling by 2 gives half length. 36 | 37 | // Filter coefficients in Q13, filter state in Q0. 38 | for (n = 0; n < half_length; n++) { 39 | // All-pass filtering upper branch. 40 | tmp16_1 = (int16_t) ((tmp32_1 >> 1) + 41 | WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], *signal_in, 14)); 42 | *signal_out = tmp16_1; 43 | tmp32_1 = (int32_t) (*signal_in++) - 44 | WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[0], tmp16_1, 12); 45 | 46 | // All-pass filtering lower branch. 47 | tmp16_2 = (int16_t) ((tmp32_2 >> 1) + 48 | WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], *signal_in, 14)); 49 | *signal_out++ += tmp16_2; 50 | tmp32_2 = (int32_t) (*signal_in++) - 51 | WEBRTC_SPL_MUL_16_16_RSFT(kAllPassCoefsQ13[1], tmp16_2, 12); 52 | } 53 | // Store the filter states. 54 | filter_state[0] = tmp32_1; 55 | filter_state[1] = tmp32_2; 56 | } 57 | 58 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16 59 | // smallest values the last 100 frames. Then calculates and returns the median 60 | // of the five smallest values. 61 | int16_t WebRtcVad_FindMinimum(VadInstT* self, 62 | int16_t feature_value, 63 | int channel) { 64 | int i = 0, j = 0; 65 | int position = -1; 66 | // Offset to beginning of the 16 minimum values in memory. 67 | const int offset = (channel << 4); 68 | int16_t current_median = 1600; 69 | int16_t alpha = 0; 70 | int32_t tmp32 = 0; 71 | // Pointer to memory for the 16 minimum values and the age of each value of 72 | // the |channel|. 73 | int16_t* age = &self->index_vector[offset]; 74 | int16_t* smallest_values = &self->low_value_vector[offset]; 75 | 76 | assert(channel < kNumChannels); 77 | 78 | // Each value in |smallest_values| is getting 1 loop older. Update |age|, and 79 | // remove old values. 80 | for (i = 0; i < 16; i++) { 81 | if (age[i] != 100) { 82 | age[i]++; 83 | } else { 84 | // Too old value. Remove from memory and shift larger values downwards. 85 | for (j = i; j < 16; j++) { 86 | smallest_values[j] = smallest_values[j + 1]; 87 | age[j] = age[j + 1]; 88 | } 89 | age[15] = 101; 90 | smallest_values[15] = 10000; 91 | } 92 | } 93 | 94 | // Check if |feature_value| is smaller than any of the values in 95 | // |smallest_values|. If so, find the |position| where to insert the new value 96 | // (|feature_value|). 97 | if (feature_value < smallest_values[7]) { 98 | if (feature_value < smallest_values[3]) { 99 | if (feature_value < smallest_values[1]) { 100 | if (feature_value < smallest_values[0]) { 101 | position = 0; 102 | } else { 103 | position = 1; 104 | } 105 | } else if (feature_value < smallest_values[2]) { 106 | position = 2; 107 | } else { 108 | position = 3; 109 | } 110 | } else if (feature_value < smallest_values[5]) { 111 | if (feature_value < smallest_values[4]) { 112 | position = 4; 113 | } else { 114 | position = 5; 115 | } 116 | } else if (feature_value < smallest_values[6]) { 117 | position = 6; 118 | } else { 119 | position = 7; 120 | } 121 | } else if (feature_value < smallest_values[15]) { 122 | if (feature_value < smallest_values[11]) { 123 | if (feature_value < smallest_values[9]) { 124 | if (feature_value < smallest_values[8]) { 125 | position = 8; 126 | } else { 127 | position = 9; 128 | } 129 | } else if (feature_value < smallest_values[10]) { 130 | position = 10; 131 | } else { 132 | position = 11; 133 | } 134 | } else if (feature_value < smallest_values[13]) { 135 | if (feature_value < smallest_values[12]) { 136 | position = 12; 137 | } else { 138 | position = 13; 139 | } 140 | } else if (feature_value < smallest_values[14]) { 141 | position = 14; 142 | } else { 143 | position = 15; 144 | } 145 | } 146 | 147 | // If we have detected a new small value, insert it at the correct position 148 | // and shift larger values up. 149 | if (position > -1) { 150 | for (i = 15; i > position; i--) { 151 | smallest_values[i] = smallest_values[i - 1]; 152 | age[i] = age[i - 1]; 153 | } 154 | smallest_values[position] = feature_value; 155 | age[position] = 1; 156 | } 157 | 158 | // Get |current_median|. 159 | if (self->frame_counter > 2) { 160 | current_median = smallest_values[2]; 161 | } else if (self->frame_counter > 0) { 162 | current_median = smallest_values[0]; 163 | } 164 | 165 | // Smooth the median value. 166 | if (self->frame_counter > 0) { 167 | if (current_median < self->mean_value[channel]) { 168 | alpha = kSmoothingDown; // 0.2 in Q15. 169 | } else { 170 | alpha = kSmoothingUp; // 0.99 in Q15. 171 | } 172 | } 173 | tmp32 = (alpha + 1) * self->mean_value[channel]; 174 | tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; 175 | tmp32 += 16384; 176 | self->mean_value[channel] = (int16_t) (tmp32 >> 15); 177 | 178 | return self->mean_value[channel]; 179 | } 180 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/vad_sp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | // This file includes specific signal processing tools used in vad_core.c. 13 | 14 | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_ 15 | #define WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_ 16 | 17 | #include "webrtc/common_audio/vad/vad_core.h" 18 | #include "webrtc/typedefs.h" 19 | 20 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8. 21 | // 22 | // Inputs: 23 | // - signal_in : Input signal. 24 | // - in_length : Length of input signal in samples. 25 | // 26 | // Input & Output: 27 | // - filter_state : Current filter states of the two all-pass filters. The 28 | // |filter_state| is updated after all samples have been 29 | // processed. 30 | // 31 | // Output: 32 | // - signal_out : Downsampled signal (of length |in_length| / 2). 33 | void WebRtcVad_Downsampling(const int16_t* signal_in, 34 | int16_t* signal_out, 35 | int32_t* filter_state, 36 | int in_length); 37 | 38 | // Updates and returns the smoothed feature minimum. As minimum we use the 39 | // median of the five smallest feature values in a 100 frames long window. 40 | // As long as |handle->frame_counter| is zero, that is, we haven't received any 41 | // "valid" data, FindMinimum() outputs the default value of 1600. 42 | // 43 | // Inputs: 44 | // - feature_value : New feature value to update with. 45 | // - channel : Channel number. 46 | // 47 | // Input & Output: 48 | // - handle : State information of the VAD. 49 | // 50 | // Returns: 51 | // : Smoothed minimum value for a moving window. 52 | int16_t WebRtcVad_FindMinimum(VadInstT* handle, 53 | int16_t feature_value, 54 | int channel); 55 | 56 | #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_ 57 | -------------------------------------------------------------------------------- /webrtc/common_audio/vad/webrtc_vad.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc/common_audio/vad/include/webrtc_vad.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 17 | #include "webrtc/common_audio/vad/vad_core.h" 18 | #include "webrtc/typedefs.h" 19 | 20 | static const int kInitCheck = 42; 21 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; 22 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); 23 | static const int kMaxFrameLengthMs = 30; 24 | 25 | int WebRtcVad_Create(VadInst** handle) { 26 | VadInstT* self = NULL; 27 | 28 | if (handle == NULL) { 29 | return -1; 30 | } 31 | 32 | *handle = NULL; 33 | self = (VadInstT*) malloc(sizeof(VadInstT)); 34 | *handle = (VadInst*) self; 35 | 36 | if (self == NULL) { 37 | return -1; 38 | } 39 | 40 | WebRtcSpl_Init(); 41 | 42 | self->init_flag = 0; 43 | 44 | return 0; 45 | } 46 | 47 | void WebRtcVad_Free(VadInst* handle) { 48 | free(handle); 49 | } 50 | 51 | // TODO(bjornv): Move WebRtcVad_InitCore() code here. 52 | int WebRtcVad_Init(VadInst* handle) { 53 | // Initialize the core VAD component. 54 | return WebRtcVad_InitCore((VadInstT*) handle); 55 | } 56 | 57 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here. 58 | int WebRtcVad_set_mode(VadInst* handle, int mode) { 59 | VadInstT* self = (VadInstT*) handle; 60 | 61 | if (handle == NULL) { 62 | return -1; 63 | } 64 | if (self->init_flag != kInitCheck) { 65 | return -1; 66 | } 67 | 68 | return WebRtcVad_set_mode_core(self, mode); 69 | } 70 | 71 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 72 | int frame_length) { 73 | int vad = -1; 74 | VadInstT* self = (VadInstT*) handle; 75 | 76 | if (handle == NULL) { 77 | return -1; 78 | } 79 | 80 | if (self->init_flag != kInitCheck) { 81 | return -1; 82 | } 83 | if (audio_frame == NULL) { 84 | return -1; 85 | } 86 | if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { 87 | return -1; 88 | } 89 | 90 | if (fs == 48000) { 91 | vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); 92 | } else if (fs == 32000) { 93 | vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); 94 | } else if (fs == 16000) { 95 | vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); 96 | } else if (fs == 8000) { 97 | vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); 98 | } 99 | 100 | if (vad > 0) { 101 | vad = 1; 102 | } 103 | return vad; 104 | } 105 | 106 | int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length) { 107 | int return_value = -1; 108 | size_t i; 109 | int valid_length_ms; 110 | int valid_length; 111 | 112 | // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and 113 | // see if we have a matching pair. 114 | for (i = 0; i < kRatesSize; i++) { 115 | if (kValidRates[i] == rate) { 116 | for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; 117 | valid_length_ms += 10) { 118 | valid_length = (kValidRates[i] / 1000 * valid_length_ms); 119 | if (frame_length == valid_length) { 120 | return_value = 0; 121 | break; 122 | } 123 | } 124 | break; 125 | } 126 | } 127 | 128 | return return_value; 129 | } 130 | -------------------------------------------------------------------------------- /webrtc/system_wrappers/interface/cpu_features_wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ 12 | #define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ 13 | 14 | #if defined(__cplusplus) || defined(c_plusplus) 15 | extern "C" { 16 | #endif 17 | 18 | #include "webrtc/typedefs.h" 19 | 20 | // List of features in x86. 21 | typedef enum { 22 | kSSE2, 23 | kSSE3 24 | } CPUFeature; 25 | 26 | // List of features in ARM. 27 | enum { 28 | kCPUFeatureARMv7 = (1 << 0), 29 | kCPUFeatureVFPv3 = (1 << 1), 30 | kCPUFeatureNEON = (1 << 2), 31 | kCPUFeatureLDREXSTREX = (1 << 3) 32 | }; 33 | 34 | typedef int (*WebRtc_CPUInfo)(CPUFeature feature); 35 | 36 | // Returns true if the CPU supports the feature. 37 | extern WebRtc_CPUInfo WebRtc_GetCPUInfo; 38 | 39 | // No CPU feature is available => straight C path. 40 | extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM; 41 | 42 | // Return the features in an ARM device. 43 | // It detects the features in the hardware platform, and returns supported 44 | // values in the above enum definition as a bitmask. 45 | extern uint64_t WebRtc_GetCPUFeaturesARM(void); 46 | 47 | #if defined(__cplusplus) || defined(c_plusplus) 48 | } // extern "C" 49 | #endif 50 | 51 | #endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_CPU_FEATURES_WRAPPER_H_ 52 | -------------------------------------------------------------------------------- /webrtc/typedefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // This file contains platform-specific typedefs and defines. 12 | // Much of it is derived from Chromium's build/build_config.h. 13 | 14 | #ifndef WEBRTC_TYPEDEFS_H_ 15 | #define WEBRTC_TYPEDEFS_H_ 16 | 17 | // Processor architecture detection. For more info on what's defined, see: 18 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx 19 | // http://www.agner.org/optimize/calling_conventions.pdf 20 | // or with gcc, run: "echo | gcc -E -dM -" 21 | #if defined(_M_X64) || defined(__x86_64__) 22 | #define WEBRTC_ARCH_X86_FAMILY 23 | #define WEBRTC_ARCH_X86_64 24 | #define WEBRTC_ARCH_64_BITS 25 | #define WEBRTC_ARCH_LITTLE_ENDIAN 26 | #elif defined(__aarch64__) 27 | #define WEBRTC_ARCH_64_BITS 28 | #define WEBRTC_ARCH_LITTLE_ENDIAN 29 | #elif defined(_M_IX86) || defined(__i386__) 30 | #define WEBRTC_ARCH_X86_FAMILY 31 | #define WEBRTC_ARCH_X86 32 | #define WEBRTC_ARCH_32_BITS 33 | #define WEBRTC_ARCH_LITTLE_ENDIAN 34 | #elif defined(__ARMEL__) 35 | // TODO(ajm): We'd prefer to control platform defines here, but this is 36 | // currently provided by the Android makefiles. Commented to avoid duplicate 37 | // definition warnings. 38 | //#define WEBRTC_ARCH_ARM 39 | // TODO(ajm): Chromium uses the following two defines. Should we switch? 40 | //#define WEBRTC_ARCH_ARM_FAMILY 41 | //#define WEBRTC_ARCH_ARMEL 42 | #define WEBRTC_ARCH_32_BITS 43 | #define WEBRTC_ARCH_LITTLE_ENDIAN 44 | #elif defined(__MIPSEL__) 45 | #define WEBRTC_ARCH_32_BITS 46 | #define WEBRTC_ARCH_LITTLE_ENDIAN 47 | #elif defined(__pnacl__) 48 | #define WEBRTC_ARCH_32_BITS 49 | #define WEBRTC_ARCH_LITTLE_ENDIAN 50 | #else 51 | #error Please add support for your architecture in typedefs.h 52 | #endif 53 | 54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) 55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN 56 | #endif 57 | 58 | #if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \ 59 | (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON)) 60 | #define WEBRTC_CPU_DETECTION 61 | #endif 62 | 63 | #if !defined(_MSC_VER) 64 | #include 65 | #else 66 | // Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h. 67 | typedef signed char int8_t; 68 | typedef signed short int16_t; 69 | typedef signed int int32_t; 70 | typedef __int64 int64_t; 71 | typedef unsigned char uint8_t; 72 | typedef unsigned short uint16_t; 73 | typedef unsigned int uint32_t; 74 | typedef unsigned __int64 uint64_t; 75 | #endif 76 | 77 | // Borrowed from Chromium's base/compiler_specific.h. 78 | // Annotate a virtual method indicating it must be overriding a virtual 79 | // method in the parent class. 80 | // Use like: 81 | // virtual void foo() OVERRIDE; 82 | #if defined(_MSC_VER) 83 | #define OVERRIDE override 84 | #elif defined(__clang__) 85 | // Clang defaults to C++03 and warns about using override. Squelch that. 86 | // Intentionally no push/pop here so all users of OVERRIDE ignore the warning 87 | // too. This is like passing -Wno-c++11-extensions, except that GCC won't die 88 | // (because it won't see this pragma). 89 | #pragma clang diagnostic ignored "-Wc++11-extensions" 90 | #define OVERRIDE override 91 | #elif defined(__GNUC__) && __cplusplus >= 201103 && \ 92 | (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 93 | // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled. 94 | #define OVERRIDE override 95 | #else 96 | #define OVERRIDE 97 | #endif 98 | 99 | // Annotate a function indicating the caller must examine the return value. 100 | // Use like: 101 | // int foo() WARN_UNUSED_RESULT; 102 | // TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and 103 | // libjingle are merged. 104 | #if !defined(WARN_UNUSED_RESULT) 105 | #if defined(__GNUC__) 106 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) 107 | #else 108 | #define WARN_UNUSED_RESULT 109 | #endif 110 | #endif // WARN_UNUSED_RESULT 111 | 112 | // Put after a variable that might not be used, to prevent compiler warnings: 113 | // int result ATTRIBUTE_UNUSED = DoSomething(); 114 | // assert(result == 17); 115 | #ifndef ATTRIBUTE_UNUSED 116 | #if defined(__GNUC__) || defined(__clang__) 117 | #define ATTRIBUTE_UNUSED __attribute__((unused)) 118 | #else 119 | #define ATTRIBUTE_UNUSED 120 | #endif 121 | #endif 122 | 123 | // Macro to be used for switch-case fallthrough (required for enabling 124 | // -Wimplicit-fallthrough warning on Clang). 125 | #ifndef FALLTHROUGH 126 | #if defined(__clang__) 127 | #define FALLTHROUGH() [[clang::fallthrough]] 128 | #else 129 | #define FALLTHROUGH() do { } while (0) 130 | #endif 131 | #endif 132 | 133 | // Annotate a function that will not return control flow to the caller. 134 | #if defined(_MSC_VER) 135 | #define NO_RETURN __declspec(noreturn) 136 | #elif defined(__GNUC__) 137 | #define NO_RETURN __attribute__((noreturn)) 138 | #else 139 | #define NO_RETURN 140 | #endif 141 | 142 | #endif // WEBRTC_TYPEDEFS_H_ 143 | --------------------------------------------------------------------------------