├── VoiceActiveCheck.cpp ├── VoiceActiveCheck.h ├── WebrtcNS.cpp ├── WebrtcNS.h ├── readme.txt └── webrtc_vad ├── LICENSE ├── core ├── include │ ├── vad.h │ └── webrtc_vad.h ├── vad.cc ├── vad_core.c ├── vad_core.h ├── vad_filterbank.c ├── vad_filterbank.h ├── vad_gmm.c ├── vad_gmm.h ├── vad_sp.c ├── vad_sp.h └── webrtc_vad.c ├── ns ├── defines.h ├── noise_suppression.c ├── noise_suppression.h ├── noise_suppression_x.c ├── noise_suppression_x.h ├── ns_core.c ├── ns_core.h ├── nsx_core.c ├── nsx_core.h ├── nsx_core_c.c ├── nsx_defines.h └── windows_private.h ├── ns_math ├── complex_bit_reverse.c ├── complex_fft.c ├── complex_fft_tables.h ├── copy_set_operations.c ├── fft4g.c ├── fft4g.h ├── real_fft.c ├── real_fft.h └── spl_sqrt_floor.c └── spl ├── cross_correlation.c ├── division_operations.c ├── downsample_fast.c ├── energy.c ├── get_scaling_square.c ├── min_max_operations.c ├── resample.c ├── resample_48khz.c ├── resample_by_2.c ├── resample_by_2_internal.c ├── resample_by_2_internal.h ├── resample_fractional.c ├── signal_processing_library.h ├── spl_init.c ├── spl_inl.c ├── spl_inl.h └── vector_scaling_operations.c /VoiceActiveCheck.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // VoiceActiveCheck.cpp 3 | // VADTEST 4 | // 5 | // Created by zhangyu on 2017/10/11. 6 | // Copyright © 2017年 Michong. All rights reserved. 7 | // 8 | 9 | #include "VoiceActiveCheck.h" 10 | #include "webrtc_vad.h" 11 | 12 | namespace webrtc { 13 | class VoiceActiveCheck::Vad { 14 | public: 15 | Vad() { 16 | reset(); 17 | } 18 | ~Vad() { 19 | if (state_) 20 | WebRtcVad_Free(state_); 21 | } 22 | void reset() { 23 | if (state_) 24 | WebRtcVad_Free(state_); 25 | state_ = WebRtcVad_Create(); 26 | WebRtcVad_Init(state_); 27 | } 28 | VadInst* state() { return state_; } 29 | private: 30 | VadInst* state_ = nullptr; 31 | }; 32 | 33 | VoiceActiveCheck::VoiceActiveCheck(int sample_rate_hz) { 34 | sample_rate_hz_ = sample_rate_hz; 35 | vad_ = new VoiceActiveCheck::Vad(); 36 | frame_size_samples_ = 37 | static_cast(frame_size_ms_ * sample_rate_hz_) / 1000; 38 | //preset & reset 39 | reset(); 40 | } 41 | 42 | VoiceActiveCheck::~VoiceActiveCheck() { 43 | if (vad_) { 44 | delete vad_; 45 | vad_ = nullptr; 46 | } 47 | } 48 | 49 | bool VoiceActiveCheck:: isActiveVoice(int16_t *buf) { 50 | int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, 51 | buf, 52 | frame_size_samples_); 53 | return vad_ret != 0; 54 | } 55 | 56 | void VoiceActiveCheck::set_likelihood(VoiceActiveCheck::Likelihood likelihood) { 57 | if (likelihood_ == likelihood) { 58 | return; 59 | } 60 | likelihood_ = likelihood; 61 | 62 | int mode = 2; 63 | switch (likelihood) { 64 | case VoiceActiveCheck::kLowestLikelihood: 65 | mode = 4; 66 | break; 67 | case VoiceActiveCheck::kVeryLowLikelihood: 68 | mode = 3; 69 | break; 70 | case VoiceActiveCheck::kLowLikelihood: 71 | mode = 2; 72 | break; 73 | case VoiceActiveCheck::kModerateLikelihood: 74 | mode = 1; 75 | break; 76 | case VoiceActiveCheck::kHighLikelihood: 77 | mode = 0; 78 | break; 79 | default: 80 | break; 81 | } 82 | WebRtcVad_set_mode(vad_->state(), mode); 83 | } 84 | 85 | void VoiceActiveCheck::reset() { 86 | vad_->reset(); 87 | Likelihood hood = likelihood_; 88 | likelihood_ = (Likelihood)1001; 89 | set_likelihood(hood); 90 | } 91 | } 92 | 93 | -------------------------------------------------------------------------------- /VoiceActiveCheck.h: -------------------------------------------------------------------------------- 1 | // 2 | // VoiceActiveCheck.hpp 3 | // VADTEST 4 | // 5 | // Created by zhangyu on 2017/10/11. 6 | // Copyright © 2017年 Michong. All rights reserved. 7 | // 8 | 9 | #ifndef VoiceActiveCheck_hpp 10 | #define VoiceActiveCheck_hpp 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | namespace webrtc { 17 | class VoiceActiveCheck { 18 | public: 19 | VoiceActiveCheck(int sample_rate_hz); 20 | ~VoiceActiveCheck(); 21 | 22 | bool isActiveVoice(int16_t *buf); 23 | 24 | enum Likelihood { 25 | kLowestLikelihood, 26 | kVeryLowLikelihood, 27 | kLowLikelihood, 28 | kModerateLikelihood, 29 | kHighLikelihood 30 | }; 31 | void set_likelihood(Likelihood likelihood); 32 | Likelihood likelihood(); 33 | 34 | void reset(); 35 | private: 36 | class Vad; 37 | int frame_size_ms_ = 10; 38 | size_t frame_size_samples_ = 0; 39 | int sample_rate_hz_ = 0; 40 | Vad *vad_ = nullptr; 41 | 42 | Likelihood likelihood_ = kLowestLikelihood; 43 | }; 44 | } 45 | 46 | #endif /* VoiceActiveCheck_hpp */ 47 | -------------------------------------------------------------------------------- /WebrtcNS.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // WebrtcNS.cpp 3 | // VADTEST 4 | // 5 | // Created by zhangyu on 2017/10/23. 6 | // Copyright © 2017年 Michong. All rights reserved. 7 | // 8 | 9 | #include "WebrtcNS.h" 10 | #include "noise_suppression_x.h" 11 | 12 | namespace webrtc { 13 | class WebrtcNS::Suppressor { 14 | public: 15 | explicit Suppressor(int sample_rate_hz) { 16 | state_ = WebRtcNsx_Create(); 17 | WebRtcNsx_Init(state_, sample_rate_hz); 18 | } 19 | ~Suppressor() { 20 | if (state_ != nullptr) { 21 | WebRtcNsx_Free(state_); 22 | state_ = nullptr; 23 | } 24 | } 25 | void reset(int sample_rate_hz) { 26 | if (state_) 27 | WebRtcNsx_Free(state_); 28 | state_ = WebRtcNsx_Create(); 29 | WebRtcNsx_Init(state_, sample_rate_hz); 30 | } 31 | NsxHandle* state() { return state_; } 32 | private: 33 | NsxHandle* state_ = nullptr; 34 | }; 35 | 36 | WebrtcNS::WebrtcNS(int sampleRateHz) { 37 | sample_rate_hz_ = sampleRateHz; 38 | suppressors_ = new WebrtcNS::Suppressor(sample_rate_hz_); 39 | frame_size_samples_ = 40 | static_cast(10 * sample_rate_hz_) / 1000; 41 | reset(); 42 | } 43 | WebrtcNS::~WebrtcNS() { 44 | delete suppressors_; 45 | suppressors_ = nullptr; 46 | } 47 | 48 | void WebrtcNS::processAudio(short **input, short **output, int sampleCount) { 49 | // input and output signals should always be 10ms (80 or 160 samples) 50 | if (sampleCount >= frame_size_samples_) { 51 | WebRtcNsx_Process(suppressors_->state(), input, frame_size_samples_,output); 52 | } 53 | } 54 | 55 | int *WebrtcNS::noise_estimate(int *length, int *estimateValue) 56 | { 57 | const uint32_t *r_value = WebRtcNsx_noise_estimate(suppressors_->state(), estimateValue); 58 | int tlength = (int)WebRtcNsx_num_freq(); 59 | *length = tlength; 60 | return (int *)r_value; 61 | } 62 | 63 | // mode : 0: Mild, 1: Medium , 2: Aggressive 64 | void WebrtcNS::setLevel(WebrtcNS::NoiseSuppression level) { 65 | if (level_ == level) { 66 | return; 67 | } 68 | int policy = 1; 69 | switch (level) { 70 | case NoiseSuppression::kLow: 71 | policy = 0; 72 | break; 73 | case NoiseSuppression::kModerate: 74 | policy = 1; 75 | break; 76 | case NoiseSuppression::kHigh: 77 | policy = 2; 78 | break; 79 | case NoiseSuppression::kVeryHigh: 80 | policy = 3; 81 | break; 82 | default: 83 | break; 84 | } 85 | level_ = level; 86 | WebRtcNsx_set_policy(suppressors_->state(), level_); 87 | } 88 | 89 | void WebrtcNS::reset() { 90 | suppressors_->reset(sample_rate_hz_); 91 | NoiseSuppression hood = level_; 92 | level_ = (NoiseSuppression)1001; 93 | setLevel(hood); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /WebrtcNS.h: -------------------------------------------------------------------------------- 1 | // 2 | // WebrtcNS.hpp 3 | // VADTEST 4 | // 5 | // Created by zhangyu on 2017/10/23. 6 | // Copyright © 2017年 Michong. All rights reserved. 7 | // 8 | 9 | #ifndef WebrtcNS_hpp 10 | #define WebrtcNS_hpp 11 | 12 | #include 13 | 14 | namespace webrtc { 15 | class WebrtcNS { 16 | public: 17 | WebrtcNS(int sampleRateHz); 18 | ~WebrtcNS(); 19 | 20 | void processAudio(short **input, short **output, int sampleCount); 21 | 22 | //return sub_estimate 23 | int *noise_estimate(int *length, int *estimateValue); 24 | 25 | enum NoiseSuppression { 26 | kLow, 27 | kModerate, 28 | kHigh, 29 | kVeryHigh 30 | }; 31 | 32 | // mode : 0: Mild, 1: Medium , 2: Aggressive 33 | void setLevel(WebrtcNS::NoiseSuppression level); 34 | 35 | void reset(); 36 | 37 | private: 38 | 39 | class Suppressor; 40 | int sample_rate_hz_ = 0; 41 | size_t frame_size_samples_ = 0; 42 | Suppressor *suppressors_ = nullptr; 43 | NoiseSuppression level_ = kModerate; 44 | }; 45 | } 46 | 47 | 48 | #endif /* WebrtcNS_hpp */ 49 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | v0.0.1 4 | 用于发声检测,新增参数Likelihood:kLowestLikelihood 5 | 6 | use: 7 | m_ivad = new webrtc::VoiceActiveCheck(48000); 8 | int valid = m_ivad->isActiveVoice(m_ibuffer); 9 | -------------------------------------------------------------------------------- /webrtc_vad/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, The WebRTC project authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | * Neither the name of Google nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /webrtc_vad/core/include/vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_VAD_INCLUDE_VAD_H_ 12 | #define COMMON_AUDIO_VAD_INCLUDE_VAD_H_ 13 | 14 | #include 15 | 16 | #include "webrtc_vad.h" 17 | #include 18 | 19 | namespace webrtc { 20 | 21 | class Vad { 22 | public: 23 | enum Aggressiveness { 24 | kVadNormal = 0, 25 | kVadLowBitrate = 1, 26 | kVadAggressive = 2, 27 | kVadVeryAggressive = 3 28 | }; 29 | 30 | enum Activity { kPassive = 0, kActive = 1, kError = -1 }; 31 | 32 | virtual ~Vad() = default; 33 | 34 | // Calculates a VAD decision for the given audio frame. Valid sample rates 35 | // are 8000, 16000, and 32000 Hz; the number of samples must be such that the 36 | // frame is 10, 20, or 30 ms long. 37 | virtual Activity VoiceActivity(const int16_t* audio, 38 | size_t num_samples, 39 | int sample_rate_hz) = 0; 40 | 41 | // Resets VAD state. 42 | virtual void Reset() = 0; 43 | }; 44 | 45 | // Returns a Vad instance that's implemented on top of WebRtcVad. 46 | //std::unique_ptr CreateVad(Vad::Aggressiveness aggressiveness); 47 | 48 | } // namespace webrtc 49 | 50 | #endif // COMMON_AUDIO_VAD_INCLUDE_VAD_H_ 51 | -------------------------------------------------------------------------------- /webrtc_vad/core/include/webrtc_vad.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file includes the VAD API calls. Specific function calls are given below. 14 | */ 15 | 16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ 18 | 19 | #include 20 | 21 | #include 22 | 23 | typedef struct WebRtcVadInst VadInst; 24 | 25 | #ifdef __cplusplus 26 | extern "C" { 27 | #endif 28 | 29 | // Creates an instance to the VAD structure. 30 | VadInst* WebRtcVad_Create(void); 31 | 32 | // Frees the dynamic memory of a specified VAD instance. 33 | // 34 | // - handle [i] : Pointer to VAD instance that should be freed. 35 | void WebRtcVad_Free(VadInst* handle); 36 | 37 | // Initializes a VAD instance. 38 | // 39 | // - handle [i/o] : Instance that should be initialized. 40 | // 41 | // returns : 0 - (OK), 42 | // -1 - (null pointer or Default mode could not be set). 43 | int WebRtcVad_Init(VadInst* handle); 44 | 45 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more 46 | // restrictive in reporting speech. Put in other words the probability of being 47 | // speech when the VAD returns 1 is increased with increasing mode. As a 48 | // consequence also the missed detection rate goes up. 49 | // 50 | // - handle [i/o] : VAD instance. 51 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). 52 | // 53 | // returns : 0 - (OK), 54 | // -1 - (null pointer, mode could not be set or the VAD instance 55 | // has not been initialized). 56 | int WebRtcVad_set_mode(VadInst* handle, int mode); 57 | 58 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates 59 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). 60 | // 61 | // - handle [i/o] : VAD Instance. Needs to be initialized by 62 | // WebRtcVad_Init() before call. 63 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 64 | // - audio_frame [i] : Audio frame buffer. 65 | // - frame_length [i] : Length of audio frame buffer in number of samples. 66 | // 67 | // returns : 1 - (Active Voice), 68 | // 0 - (Non-active Voice), 69 | // -1 - (Error) 70 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 71 | size_t frame_length); 72 | 73 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, 74 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. 75 | // 76 | // - rate [i] : Sampling frequency (Hz). 77 | // - frame_length [i] : Speech frame buffer length in number of samples. 78 | // 79 | // returns : 0 - (valid combination), -1 - (invalid combination) 80 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | 86 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT 87 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "vad.h" 12 | 13 | #include 14 | 15 | 16 | namespace webrtc { 17 | 18 | namespace { 19 | 20 | class VadImpl final : public Vad { 21 | public: 22 | explicit VadImpl(Aggressiveness aggressiveness) 23 | : handle_(nullptr), aggressiveness_(aggressiveness) { 24 | Reset(); 25 | } 26 | 27 | ~VadImpl() override { WebRtcVad_Free(handle_); } 28 | 29 | Activity VoiceActivity(const int16_t* audio, 30 | size_t num_samples, 31 | int sample_rate_hz) override { 32 | int ret = WebRtcVad_Process(handle_, sample_rate_hz, audio, num_samples); 33 | switch (ret) { 34 | case 0: 35 | return kPassive; 36 | case 1: 37 | return kActive; 38 | default: 39 | return kError; 40 | } 41 | } 42 | 43 | void Reset() override { 44 | if (handle_) 45 | WebRtcVad_Free(handle_); 46 | handle_ = WebRtcVad_Create(); 47 | } 48 | 49 | private: 50 | VadInst* handle_; 51 | Aggressiveness aggressiveness_; 52 | }; 53 | 54 | } // namespace 55 | 56 | //std::unique_ptr CreateVad(Vad::Aggressiveness aggressiveness) { 57 | // return std::unique_ptr(new VadImpl(aggressiveness)); 58 | //} 59 | 60 | } // namespace webrtc 61 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file includes the descriptions of the core VAD calls. 14 | */ 15 | 16 | #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_ 17 | #define COMMON_AUDIO_VAD_VAD_CORE_H_ 18 | 19 | #include "signal_processing_library.h" 20 | #include 21 | 22 | enum { kNumChannels = 6 }; // Number of frequency bands (named channels). 23 | enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. 24 | enum { kTableSize = kNumChannels * kNumGaussians }; 25 | enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. 26 | 27 | typedef struct VadInstT_ { 28 | int vad; 29 | int32_t downsampling_filter_states[4]; 30 | WebRtcSpl_State48khzTo8khz state_48_to_8; 31 | int16_t noise_means[kTableSize]; 32 | int16_t speech_means[kTableSize]; 33 | int16_t noise_stds[kTableSize]; 34 | int16_t speech_stds[kTableSize]; 35 | // TODO(bjornv): Change to |frame_count|. 36 | int32_t frame_counter; 37 | int16_t over_hang; // Over Hang 38 | int16_t num_of_speech; 39 | // TODO(bjornv): Change to |age_vector|. 40 | int16_t index_vector[16 * kNumChannels]; 41 | int16_t low_value_vector[16 * kNumChannels]; 42 | // TODO(bjornv): Change to |median|. 43 | int16_t mean_value[kNumChannels]; 44 | int16_t upper_state[5]; 45 | int16_t lower_state[5]; 46 | int16_t hp_filter_state[4]; 47 | int16_t over_hang_max_1[3]; 48 | int16_t over_hang_max_2[3]; 49 | int16_t individual[3]; 50 | int16_t total[3]; 51 | 52 | int init_flag; 53 | } VadInstT; 54 | 55 | // Initializes the core VAD component. The default aggressiveness mode is 56 | // controlled by |kDefaultMode| in vad_core.c. 57 | // 58 | // - self [i/o] : Instance that should be initialized 59 | // 60 | // returns : 0 (OK), -1 (null pointer in or if the default mode can't be 61 | // set) 62 | int WebRtcVad_InitCore(VadInstT* self); 63 | 64 | /**************************************************************************** 65 | * WebRtcVad_set_mode_core(...) 66 | * 67 | * This function changes the VAD settings 68 | * 69 | * Input: 70 | * - inst : VAD instance 71 | * - mode : Aggressiveness degree 72 | * 0 (High quality) - 3 (Highly aggressive) 73 | * 74 | * Output: 75 | * - inst : Changed instance 76 | * 77 | * Return value : 0 - Ok 78 | * -1 - Error 79 | */ 80 | 81 | int WebRtcVad_set_mode_core(VadInstT* self, int mode); 82 | 83 | /**************************************************************************** 84 | * WebRtcVad_CalcVad48khz(...) 85 | * WebRtcVad_CalcVad32khz(...) 86 | * WebRtcVad_CalcVad16khz(...) 87 | * WebRtcVad_CalcVad8khz(...) 88 | * 89 | * Calculate probability for active speech and make VAD decision. 90 | * 91 | * Input: 92 | * - inst : Instance that should be initialized 93 | * - speech_frame : Input speech frame 94 | * - frame_length : Number of input samples 95 | * 96 | * Output: 97 | * - inst : Updated filter states etc. 98 | * 99 | * Return value : VAD decision 100 | * 0 - No active speech 101 | * 1-6 - Active speech 102 | */ 103 | int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, 104 | size_t frame_length); 105 | int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, 106 | size_t frame_length); 107 | int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, 108 | size_t frame_length); 109 | int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, 110 | size_t frame_length); 111 | 112 | #endif // COMMON_AUDIO_VAD_VAD_CORE_H_ 113 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_filterbank.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "vad_filterbank.h" 12 | 13 | #include "signal_processing_library.h" 14 | 15 | // Constants used in LogOfEnergy(). 16 | static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. 17 | static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 18 | 19 | // Coefficients used by HighPassFilter, Q14. 20 | static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; 21 | static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; 22 | 23 | // Allpass filter coefficients, upper and lower, in Q15. 24 | // Upper: 0.64, Lower: 0.17 25 | static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; 26 | 27 | // Adjustment for division with two in SplitFilter. 28 | static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; 29 | 30 | // High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is 31 | // sampled at 500 Hz. 32 | // 33 | // - data_in [i] : Input audio data sampled at 500 Hz. 34 | // - data_length [i] : Length of input and output data. 35 | // - filter_state [i/o] : State of the filter. 36 | // - data_out [o] : Output audio data in the frequency interval 37 | // 80 - 250 Hz. 38 | static void HighPassFilter(const int16_t* data_in, size_t data_length, 39 | int16_t* filter_state, int16_t* data_out) { 40 | size_t i; 41 | const int16_t* in_ptr = data_in; 42 | int16_t* out_ptr = data_out; 43 | int32_t tmp32 = 0; 44 | 45 | 46 | // The sum of the absolute values of the impulse response: 47 | // The zero/pole-filter has a max amplification of a single sample of: 1.4546 48 | // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 49 | // The all-zero section has a max amplification of a single sample of: 1.6189 50 | // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 51 | // The all-pole section has a max amplification of a single sample of: 1.9931 52 | // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 53 | 54 | for (i = 0; i < data_length; i++) { 55 | // All-zero section (filter coefficients in Q14). 56 | tmp32 = kHpZeroCoefs[0] * *in_ptr; 57 | tmp32 += kHpZeroCoefs[1] * filter_state[0]; 58 | tmp32 += kHpZeroCoefs[2] * filter_state[1]; 59 | filter_state[1] = filter_state[0]; 60 | filter_state[0] = *in_ptr++; 61 | 62 | // All-pole section (filter coefficients in Q14). 63 | tmp32 -= kHpPoleCoefs[1] * filter_state[2]; 64 | tmp32 -= kHpPoleCoefs[2] * filter_state[3]; 65 | filter_state[3] = filter_state[2]; 66 | filter_state[2] = (int16_t) (tmp32 >> 14); 67 | *out_ptr++ = filter_state[2]; 68 | } 69 | } 70 | 71 | // All pass filtering of |data_in|, used before splitting the signal into two 72 | // frequency bands (low pass vs high pass). 73 | // Note that |data_in| and |data_out| can NOT correspond to the same address. 74 | // 75 | // - data_in [i] : Input audio signal given in Q0. 76 | // - data_length [i] : Length of input and output data. 77 | // - filter_coefficient [i] : Given in Q15. 78 | // - filter_state [i/o] : State of the filter given in Q(-1). 79 | // - data_out [o] : Output audio signal given in Q(-1). 80 | static void AllPassFilter(const int16_t* data_in, size_t data_length, 81 | int16_t filter_coefficient, int16_t* filter_state, 82 | int16_t* data_out) { 83 | // The filter can only cause overflow (in the w16 output variable) 84 | // if more than 4 consecutive input numbers are of maximum value and 85 | // has the the same sign as the impulse responses first taps. 86 | // First 6 taps of the impulse response: 87 | // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 88 | 89 | size_t i; 90 | int16_t tmp16 = 0; 91 | int32_t tmp32 = 0; 92 | int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15 93 | 94 | for (i = 0; i < data_length; i++) { 95 | tmp32 = state32 + filter_coefficient * *data_in; 96 | tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) 97 | *data_out++ = tmp16; 98 | state32 = (*data_in << 14) - filter_coefficient * tmp16; // Q14 99 | state32 <<= 1; // Q15. 100 | data_in += 2; 101 | } 102 | 103 | *filter_state = (int16_t) (state32 >> 16); // Q(-1) 104 | } 105 | 106 | // Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to 107 | // an upper (high pass) part and a lower (low pass) part respectively. 108 | // 109 | // - data_in [i] : Input audio data to be split into two frequency bands. 110 | // - data_length [i] : Length of |data_in|. 111 | // - upper_state [i/o] : State of the upper filter, given in Q(-1). 112 | // - lower_state [i/o] : State of the lower filter, given in Q(-1). 113 | // - hp_data_out [o] : Output audio data of the upper half of the spectrum. 114 | // The length is |data_length| / 2. 115 | // - lp_data_out [o] : Output audio data of the lower half of the spectrum. 116 | // The length is |data_length| / 2. 117 | static void SplitFilter(const int16_t* data_in, size_t data_length, 118 | int16_t* upper_state, int16_t* lower_state, 119 | int16_t* hp_data_out, int16_t* lp_data_out) { 120 | size_t i; 121 | size_t half_length = data_length >> 1; // Downsampling by 2. 122 | int16_t tmp_out; 123 | 124 | // All-pass filtering upper branch. 125 | AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, 126 | hp_data_out); 127 | 128 | // All-pass filtering lower branch. 129 | AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, 130 | lp_data_out); 131 | 132 | // Make LP and HP signals. 133 | for (i = 0; i < half_length; i++) { 134 | tmp_out = *hp_data_out; 135 | *hp_data_out++ -= *lp_data_out; 136 | *lp_data_out++ += tmp_out; 137 | } 138 | } 139 | 140 | // Calculates the energy of |data_in| in dB, and also updates an overall 141 | // |total_energy| if necessary. 142 | // 143 | // - data_in [i] : Input audio data for energy calculation. 144 | // - data_length [i] : Length of input data. 145 | // - offset [i] : Offset value added to |log_energy|. 146 | // - total_energy [i/o] : An external energy updated with the energy of 147 | // |data_in|. 148 | // NOTE: |total_energy| is only updated if 149 | // |total_energy| <= |kMinEnergy|. 150 | // - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. 151 | static void LogOfEnergy(const int16_t* data_in, size_t data_length, 152 | int16_t offset, int16_t* total_energy, 153 | int16_t* log_energy) { 154 | // |tot_rshifts| accumulates the number of right shifts performed on |energy|. 155 | int tot_rshifts = 0; 156 | // The |energy| will be normalized to 15 bits. We use unsigned integer because 157 | // we eventually will mask out the fractional part. 158 | uint32_t energy = 0; 159 | 160 | energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, 161 | &tot_rshifts); 162 | 163 | if (energy != 0) { 164 | // By construction, normalizing to 15 bits is equivalent with 17 leading 165 | // zeros of an unsigned 32 bit value. 166 | int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy); 167 | // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is 168 | // (14 << 10), which is what we initialize |log2_energy| with. For a more 169 | // detailed derivations, see below. 170 | int16_t log2_energy = kLogEnergyIntPart; 171 | 172 | tot_rshifts += normalizing_rshifts; 173 | // Normalize |energy| to 15 bits. 174 | // |tot_rshifts| is now the total number of right shifts performed on 175 | // |energy| after normalization. This means that |energy| is in 176 | // Q(-tot_rshifts). 177 | if (normalizing_rshifts < 0) { 178 | energy <<= -normalizing_rshifts; 179 | } else { 180 | energy >>= normalizing_rshifts; 181 | } 182 | 183 | // Calculate the energy of |data_in| in dB, in Q4. 184 | // 185 | // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") = 186 | // 160 * log10(|energy| * 2^|tot_rshifts|) = 187 | // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) = 188 | // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) = 189 | // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) = 190 | // |kLogConst| * (|log2_energy| + |tot_rshifts|) 191 | // 192 | // We know by construction that |energy| is normalized to 15 bits. Hence, 193 | // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15. 194 | // Further, we'd like |log2_energy| in Q10 195 | // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) = 196 | // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) = 197 | // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~= 198 | // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) = 199 | // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4) 200 | // 201 | // Note that frac_Q15 = (|energy| & 0x00003FFF) 202 | 203 | // Calculate and add the fractional part to |log2_energy|. 204 | log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); 205 | 206 | // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0. 207 | // Note that we in our derivation above have accounted for an output in Q4. 208 | *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) + 209 | ((tot_rshifts * kLogConst) >> 9)); 210 | 211 | if (*log_energy < 0) { 212 | *log_energy = 0; 213 | } 214 | } else { 215 | *log_energy = offset; 216 | return; 217 | } 218 | 219 | *log_energy += offset; 220 | 221 | // Update the approximate |total_energy| with the energy of |data_in|, if 222 | // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an 223 | // energy indicator in WebRtcVad_GmmProbability() in vad_core.c. 224 | if (*total_energy <= kMinEnergy) { 225 | if (tot_rshifts >= 0) { 226 | // We know by construction that the |energy| > |kMinEnergy| in Q0, so add 227 | // an arbitrary value such that |total_energy| exceeds |kMinEnergy|. 228 | *total_energy += kMinEnergy + 1; 229 | } else { 230 | // By construction |energy| is represented by 15 bits, hence any number of 231 | // right shifted |energy| will fit in an int16_t. In addition, adding the 232 | // value to |total_energy| is wrap around safe as long as 233 | // |kMinEnergy| < 8192. 234 | *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. 235 | } 236 | } 237 | } 238 | 239 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, 240 | size_t data_length, int16_t* features) { 241 | int16_t total_energy = 0; 242 | // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to 243 | // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will 244 | // have at most 120 samples after the first split and at most 60 samples after 245 | // the second split. 246 | int16_t hp_120[120], lp_120[120]; 247 | int16_t hp_60[60], lp_60[60]; 248 | const size_t half_data_length = data_length >> 1; 249 | size_t length = half_data_length; // |data_length| / 2, corresponds to 250 | // bandwidth = 2000 Hz after downsampling. 251 | 252 | // Initialize variables for the first SplitFilter(). 253 | int frequency_band = 0; 254 | const int16_t* in_ptr = data_in; // [0 - 4000] Hz. 255 | int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. 256 | int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. 257 | 258 | // Split at 2000 Hz and downsample. 259 | SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], 260 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 261 | 262 | // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. 263 | frequency_band = 1; 264 | in_ptr = hp_120; // [2000 - 4000] Hz. 265 | hp_out_ptr = hp_60; // [3000 - 4000] Hz. 266 | lp_out_ptr = lp_60; // [2000 - 3000] Hz. 267 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 268 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 269 | 270 | // Energy in 3000 Hz - 4000 Hz. 271 | length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. 272 | 273 | LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]); 274 | 275 | // Energy in 2000 Hz - 3000 Hz. 276 | LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]); 277 | 278 | // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. 279 | frequency_band = 2; 280 | in_ptr = lp_120; // [0 - 2000] Hz. 281 | hp_out_ptr = hp_60; // [1000 - 2000] Hz. 282 | lp_out_ptr = lp_60; // [0 - 1000] Hz. 283 | length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz. 284 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 285 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 286 | 287 | // Energy in 1000 Hz - 2000 Hz. 288 | length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. 289 | LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]); 290 | 291 | // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. 292 | frequency_band = 3; 293 | in_ptr = lp_60; // [0 - 1000] Hz. 294 | hp_out_ptr = hp_120; // [500 - 1000] Hz. 295 | lp_out_ptr = lp_120; // [0 - 500] Hz. 296 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 297 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 298 | 299 | // Energy in 500 Hz - 1000 Hz. 300 | length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz. 301 | LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]); 302 | 303 | // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. 304 | frequency_band = 4; 305 | in_ptr = lp_120; // [0 - 500] Hz. 306 | hp_out_ptr = hp_60; // [250 - 500] Hz. 307 | lp_out_ptr = lp_60; // [0 - 250] Hz. 308 | SplitFilter(in_ptr, length, &self->upper_state[frequency_band], 309 | &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); 310 | 311 | // Energy in 250 Hz - 500 Hz. 312 | length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz. 313 | LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); 314 | 315 | // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. 316 | HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); 317 | 318 | // Energy in 80 Hz - 250 Hz. 319 | LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); 320 | 321 | return total_energy; 322 | } 323 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_filterbank.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file includes feature calculating functionality used in vad_core.c. 13 | */ 14 | 15 | #ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 16 | #define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 17 | 18 | #include "vad_core.h" 19 | 20 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the 21 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD: 22 | // 80 Hz - 250 Hz 23 | // 250 Hz - 500 Hz 24 | // 500 Hz - 1000 Hz 25 | // 1000 Hz - 2000 Hz 26 | // 2000 Hz - 3000 Hz 27 | // 3000 Hz - 4000 Hz 28 | // 29 | // The values are given in Q4 and written to |features|. Further, an approximate 30 | // overall energy is returned. The return value is used in 31 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above 32 | // the threshold |kMinEnergy|. 33 | // 34 | // - self [i/o] : State information of the VAD. 35 | // - data_in [i] : Input audio data, for feature extraction. 36 | // - data_length [i] : Audio data size, in number of samples. 37 | // - features [o] : 10 * log10(energy in each frequency band), Q4. 38 | // - returns : Total energy of the signal (NOTE! This value is not 39 | // exact. It is only used in a comparison.) 40 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, 41 | size_t data_length, int16_t* features); 42 | 43 | #endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ 44 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_gmm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "vad_gmm.h" 12 | 13 | #include "signal_processing_library.h" 14 | 15 | 16 | static const int32_t kCompVar = 22005; 17 | static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. 18 | 19 | // For a normal distribution, the probability of |input| is calculated and 20 | // returned (in Q20). The formula for normal distributed probability is 21 | // 22 | // 1 / s * exp(-(x - m)^2 / (2 * s^2)) 23 | // 24 | // where the parameters are given in the following Q domains: 25 | // m = |mean| (Q7) 26 | // s = |std| (Q7) 27 | // x = |input| (Q4) 28 | // in addition to the probability we output |delta| (in Q11) used when updating 29 | // the noise/speech model. 30 | int32_t WebRtcVad_GaussianProbability(int16_t input, 31 | int16_t mean, 32 | int16_t std, 33 | int16_t* delta) { 34 | int16_t tmp16, inv_std, inv_std2, exp_value = 0; 35 | int32_t tmp32; 36 | 37 | // Calculate |inv_std| = 1 / s, in Q10. 38 | // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. 39 | // Q-domain: Q17 / Q7 = Q10. 40 | tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); 41 | inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); 42 | 43 | // Calculate |inv_std2| = 1 / s^2, in Q14. 44 | tmp16 = (inv_std >> 2); // Q10 -> Q8. 45 | // Q-domain: (Q8 * Q8) >> 2 = Q14. 46 | inv_std2 = (int16_t)((tmp16 * tmp16) >> 2); 47 | // TODO(bjornv): Investigate if changing to 48 | // inv_std2 = (int16_t)((inv_std * inv_std) >> 6); 49 | // gives better accuracy. 50 | 51 | tmp16 = (input << 3); // Q4 -> Q7 52 | tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 53 | 54 | // To be used later, when updating noise/speech model. 55 | // |delta| = (x - m) / s^2, in Q11. 56 | // Q-domain: (Q14 * Q7) >> 10 = Q11. 57 | *delta = (int16_t)((inv_std2 * tmp16) >> 10); 58 | 59 | // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing 60 | // division by two with one shift. 61 | // Q-domain: (Q11 * Q7) >> 8 = Q10. 62 | tmp32 = (*delta * tmp16) >> 9; 63 | 64 | // If the exponent is small enough to give a non-zero probability we calculate 65 | // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) 66 | // ~= exp2(-log2(exp(1)) * |tmp32|). 67 | if (tmp32 < kCompVar) { 68 | // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. 69 | // Q-domain: (Q12 * Q10) >> 12 = Q10. 70 | tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12); 71 | tmp16 = -tmp16; 72 | exp_value = (0x0400 | (tmp16 & 0x03FF)); 73 | tmp16 ^= 0xFFFF; 74 | tmp16 >>= 10; 75 | tmp16 += 1; 76 | // Get |exp_value| = exp(-|tmp32|) in Q10. 77 | exp_value >>= tmp16; 78 | } 79 | 80 | // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. 81 | // Q-domain: Q10 * Q10 = Q20. 82 | return inv_std * exp_value; 83 | } 84 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_gmm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Gaussian probability calculations internally used in vad_core.c. 12 | 13 | #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_ 14 | #define COMMON_AUDIO_VAD_VAD_GMM_H_ 15 | 16 | #include 17 | 18 | // Calculates the probability for |input|, given that |input| comes from a 19 | // normal distribution with mean and standard deviation (|mean|, |std|). 20 | // 21 | // Inputs: 22 | // - input : input sample in Q4. 23 | // - mean : mean input in the statistical model, Q7. 24 | // - std : standard deviation, Q7. 25 | // 26 | // Output: 27 | // 28 | // - delta : input used when updating the model, Q11. 29 | // |delta| = (|input| - |mean|) / |std|^2. 30 | // 31 | // Return: 32 | // (probability for |input|) = 33 | // 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); 34 | int32_t WebRtcVad_GaussianProbability(int16_t input, 35 | int16_t mean, 36 | int16_t std, 37 | int16_t* delta); 38 | 39 | #endif // COMMON_AUDIO_VAD_VAD_GMM_H_ 40 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_sp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "vad_sp.h" 12 | 13 | #include "signal_processing_library.h" 14 | #include "vad_core.h" 15 | 16 | // Allpass filter coefficients, upper and lower, in Q13. 17 | // Upper: 0.64, Lower: 0.17. 18 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. 19 | static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. 20 | static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. 21 | 22 | // TODO(bjornv): Move this function to vad_filterbank.c. 23 | // Downsampling filter based on splitting filter and allpass functions. 24 | void WebRtcVad_Downsampling(const int16_t* signal_in, 25 | int16_t* signal_out, 26 | int32_t* filter_state, 27 | size_t in_length) { 28 | int16_t tmp16_1 = 0, tmp16_2 = 0; 29 | int32_t tmp32_1 = filter_state[0]; 30 | int32_t tmp32_2 = filter_state[1]; 31 | size_t n = 0; 32 | // Downsampling by 2 gives half length. 33 | size_t half_length = (in_length >> 1); 34 | 35 | // Filter coefficients in Q13, filter state in Q0. 36 | for (n = 0; n < half_length; n++) { 37 | // All-pass filtering upper branch. 38 | tmp16_1 = (int16_t) ((tmp32_1 >> 1) + 39 | ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); 40 | *signal_out = tmp16_1; 41 | tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); 42 | 43 | // All-pass filtering lower branch. 44 | tmp16_2 = (int16_t) ((tmp32_2 >> 1) + 45 | ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); 46 | *signal_out++ += tmp16_2; 47 | tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); 48 | } 49 | // Store the filter states. 50 | filter_state[0] = tmp32_1; 51 | filter_state[1] = tmp32_2; 52 | } 53 | 54 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16 55 | // smallest values the last 100 frames. Then calculates and returns the median 56 | // of the five smallest values. 57 | int16_t WebRtcVad_FindMinimum(VadInstT* self, 58 | int16_t feature_value, 59 | int channel) { 60 | int i = 0, j = 0; 61 | int position = -1; 62 | // Offset to beginning of the 16 minimum values in memory. 63 | const int offset = (channel << 4); 64 | int16_t current_median = 1600; 65 | int16_t alpha = 0; 66 | int32_t tmp32 = 0; 67 | // Pointer to memory for the 16 minimum values and the age of each value of 68 | // the |channel|. 69 | int16_t* age = &self->index_vector[offset]; 70 | int16_t* smallest_values = &self->low_value_vector[offset]; 71 | 72 | // Each value in |smallest_values| is getting 1 loop older. Update |age|, and 73 | // remove old values. 74 | for (i = 0; i < 16; i++) { 75 | if (age[i] != 100) { 76 | age[i]++; 77 | } else { 78 | // Too old value. Remove from memory and shift larger values downwards. 79 | for (j = i; j < 16; j++) { 80 | smallest_values[j] = smallest_values[j + 1]; 81 | age[j] = age[j + 1]; 82 | } 83 | age[15] = 101; 84 | smallest_values[15] = 10000; 85 | } 86 | } 87 | 88 | // Check if |feature_value| is smaller than any of the values in 89 | // |smallest_values|. If so, find the |position| where to insert the new value 90 | // (|feature_value|). 91 | if (feature_value < smallest_values[7]) { 92 | if (feature_value < smallest_values[3]) { 93 | if (feature_value < smallest_values[1]) { 94 | if (feature_value < smallest_values[0]) { 95 | position = 0; 96 | } else { 97 | position = 1; 98 | } 99 | } else if (feature_value < smallest_values[2]) { 100 | position = 2; 101 | } else { 102 | position = 3; 103 | } 104 | } else if (feature_value < smallest_values[5]) { 105 | if (feature_value < smallest_values[4]) { 106 | position = 4; 107 | } else { 108 | position = 5; 109 | } 110 | } else if (feature_value < smallest_values[6]) { 111 | position = 6; 112 | } else { 113 | position = 7; 114 | } 115 | } else if (feature_value < smallest_values[15]) { 116 | if (feature_value < smallest_values[11]) { 117 | if (feature_value < smallest_values[9]) { 118 | if (feature_value < smallest_values[8]) { 119 | position = 8; 120 | } else { 121 | position = 9; 122 | } 123 | } else if (feature_value < smallest_values[10]) { 124 | position = 10; 125 | } else { 126 | position = 11; 127 | } 128 | } else if (feature_value < smallest_values[13]) { 129 | if (feature_value < smallest_values[12]) { 130 | position = 12; 131 | } else { 132 | position = 13; 133 | } 134 | } else if (feature_value < smallest_values[14]) { 135 | position = 14; 136 | } else { 137 | position = 15; 138 | } 139 | } 140 | 141 | // If we have detected a new small value, insert it at the correct position 142 | // and shift larger values up. 143 | if (position > -1) { 144 | for (i = 15; i > position; i--) { 145 | smallest_values[i] = smallest_values[i - 1]; 146 | age[i] = age[i - 1]; 147 | } 148 | smallest_values[position] = feature_value; 149 | age[position] = 1; 150 | } 151 | 152 | // Get |current_median|. 153 | if (self->frame_counter > 2) { 154 | current_median = smallest_values[2]; 155 | } else if (self->frame_counter > 0) { 156 | current_median = smallest_values[0]; 157 | } 158 | 159 | // Smooth the median value. 160 | if (self->frame_counter > 0) { 161 | if (current_median < self->mean_value[channel]) { 162 | alpha = kSmoothingDown; // 0.2 in Q15. 163 | } else { 164 | alpha = kSmoothingUp; // 0.99 in Q15. 165 | } 166 | } 167 | tmp32 = (alpha + 1) * self->mean_value[channel]; 168 | tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; 169 | tmp32 += 16384; 170 | self->mean_value[channel] = (int16_t) (tmp32 >> 15); 171 | 172 | return self->mean_value[channel]; 173 | } 174 | -------------------------------------------------------------------------------- /webrtc_vad/core/vad_sp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | // This file includes specific signal processing tools used in vad_core.c. 13 | 14 | #ifndef COMMON_AUDIO_VAD_VAD_SP_H_ 15 | #define COMMON_AUDIO_VAD_VAD_SP_H_ 16 | 17 | #include "vad_core.h" 18 | 19 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8. 20 | // 21 | // Inputs: 22 | // - signal_in : Input signal. 23 | // - in_length : Length of input signal in samples. 24 | // 25 | // Input & Output: 26 | // - filter_state : Current filter states of the two all-pass filters. The 27 | // |filter_state| is updated after all samples have been 28 | // processed. 29 | // 30 | // Output: 31 | // - signal_out : Downsampled signal (of length |in_length| / 2). 32 | void WebRtcVad_Downsampling(const int16_t* signal_in, 33 | int16_t* signal_out, 34 | int32_t* filter_state, 35 | size_t in_length); 36 | 37 | // Updates and returns the smoothed feature minimum. As minimum we use the 38 | // median of the five smallest feature values in a 100 frames long window. 39 | // As long as |handle->frame_counter| is zero, that is, we haven't received any 40 | // "valid" data, FindMinimum() outputs the default value of 1600. 41 | // 42 | // Inputs: 43 | // - feature_value : New feature value to update with. 44 | // - channel : Channel number. 45 | // 46 | // Input & Output: 47 | // - handle : State information of the VAD. 48 | // 49 | // Returns: 50 | // : Smoothed minimum value for a moving window. 51 | int16_t WebRtcVad_FindMinimum(VadInstT* handle, 52 | int16_t feature_value, 53 | int channel); 54 | 55 | #endif // COMMON_AUDIO_VAD_VAD_SP_H_ 56 | -------------------------------------------------------------------------------- /webrtc_vad/core/webrtc_vad.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "webrtc_vad.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "signal_processing_library.h" 17 | #include "vad_core.h" 18 | 19 | static const int kInitCheck = 42; 20 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; 21 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); 22 | static const int kMaxFrameLengthMs = 30; 23 | 24 | VadInst* WebRtcVad_Create() { 25 | VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT)); 26 | 27 | WebRtcSpl_Init(); 28 | self->init_flag = 0; 29 | 30 | return (VadInst*)self; 31 | } 32 | 33 | void WebRtcVad_Free(VadInst* handle) { 34 | free(handle); 35 | } 36 | 37 | // TODO(bjornv): Move WebRtcVad_InitCore() code here. 38 | int WebRtcVad_Init(VadInst* handle) { 39 | // Initialize the core VAD component. 40 | return WebRtcVad_InitCore((VadInstT*) handle); 41 | } 42 | 43 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here. 44 | int WebRtcVad_set_mode(VadInst* handle, int mode) { 45 | VadInstT* self = (VadInstT*) handle; 46 | 47 | if (handle == NULL) { 48 | return -1; 49 | } 50 | if (self->init_flag != kInitCheck) { 51 | return -1; 52 | } 53 | 54 | return WebRtcVad_set_mode_core(self, mode); 55 | } 56 | 57 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, 58 | size_t frame_length) { 59 | int vad = -1; 60 | VadInstT* self = (VadInstT*) handle; 61 | 62 | if (handle == NULL) { 63 | return -1; 64 | } 65 | 66 | if (self->init_flag != kInitCheck) { 67 | return -1; 68 | } 69 | if (audio_frame == NULL) { 70 | return -1; 71 | } 72 | if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { 73 | return -1; 74 | } 75 | 76 | if (fs == 48000) { 77 | vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); 78 | } else if (fs == 32000) { 79 | vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); 80 | } else if (fs == 16000) { 81 | vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); 82 | } else if (fs == 8000) { 83 | vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); 84 | } 85 | return vad; 86 | } 87 | 88 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { 89 | int return_value = -1; 90 | size_t i; 91 | int valid_length_ms; 92 | size_t valid_length; 93 | 94 | // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and 95 | // see if we have a matching pair. 96 | for (i = 0; i < kRatesSize; i++) { 97 | if (kValidRates[i] == rate) { 98 | for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; 99 | valid_length_ms += 10) { 100 | valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms); 101 | if (frame_length == valid_length) { 102 | return_value = 0; 103 | break; 104 | } 105 | } 106 | break; 107 | } 108 | } 109 | 110 | return return_value; 111 | } 112 | -------------------------------------------------------------------------------- /webrtc_vad/ns/defines.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ 13 | 14 | #define BLOCKL_MAX 160 // max processing block length: 160 15 | #define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 16 | #define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 17 | #define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2 18 | 19 | #define QUANTILE (float)0.25 20 | 21 | #define SIMULT 3 22 | #define END_STARTUP_LONG 200 23 | #define END_STARTUP_SHORT 50 24 | #define FACTOR (float)40.0 25 | #define WIDTH (float)0.01 26 | 27 | // Length of fft work arrays. 28 | #define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2)) 29 | #define W_LENGTH (ANAL_BLOCKL_MAX >> 1) 30 | 31 | //PARAMETERS FOR NEW METHOD 32 | #define DD_PR_SNR (float)0.98 // DD update of prior SNR 33 | #define LRT_TAVG (float)0.50 // tavg parameter for LRT (previously 0.90) 34 | #define SPECT_FL_TAVG (float)0.30 // tavg parameter for spectral flatness measure 35 | #define SPECT_DIFF_TAVG (float)0.30 // tavg parameter for spectral difference measure 36 | #define PRIOR_UPDATE (float)0.10 // update parameter of prior model 37 | #define NOISE_UPDATE (float)0.90 // update parameter for noise 38 | #define SPEECH_UPDATE (float)0.99 // update parameter when likely speech 39 | #define WIDTH_PR_MAP (float)4.0 // width parameter in sigmoid map for prior model 40 | #define LRT_FEATURE_THR (float)0.5 // default threshold for LRT feature 41 | #define SF_FEATURE_THR (float)0.5 // default threshold for Spectral Flatness feature 42 | #define SD_FEATURE_THR (float)0.5 // default threshold for Spectral Difference feature 43 | #define PROB_RANGE (float)0.20 // probability threshold for noise state in 44 | // speech/noise likelihood 45 | #define HIST_PAR_EST 1000 // histogram size for estimation of parameters 46 | #define GAMMA_PAUSE (float)0.05 // update for conservative noise estimate 47 | // 48 | #define B_LIM (float)0.5 // threshold in final energy gain factor calculation 49 | #endif // MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ 50 | -------------------------------------------------------------------------------- /webrtc_vad/ns/noise_suppression.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "noise_suppression.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "defines.h" 17 | #include "ns_core.h" 18 | 19 | NsHandle* WebRtcNs_Create(void) { 20 | NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC)); 21 | self->initFlag = 0; 22 | return (NsHandle*)self; 23 | } 24 | 25 | void WebRtcNs_Free(NsHandle* NS_inst) { 26 | free(NS_inst); 27 | } 28 | 29 | int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) { 30 | return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs); 31 | } 32 | 33 | int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) { 34 | return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode); 35 | } 36 | 37 | void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) { 38 | WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe); 39 | } 40 | 41 | void WebRtcNs_Process(NsHandle* NS_inst, 42 | const float* const* spframe, 43 | size_t num_bands, 44 | float* const* outframe) { 45 | WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands, 46 | outframe); 47 | } 48 | 49 | float WebRtcNs_prior_speech_probability(NsHandle* handle) { 50 | NoiseSuppressionC* self = (NoiseSuppressionC*)handle; 51 | if (handle == NULL) { 52 | return -1; 53 | } 54 | if (self->initFlag == 0) { 55 | return -1; 56 | } 57 | return self->priorSpeechProb; 58 | } 59 | 60 | const float* WebRtcNs_noise_estimate(const NsHandle* handle) { 61 | const NoiseSuppressionC* self = (const NoiseSuppressionC*)handle; 62 | if (handle == NULL || self->initFlag == 0) { 63 | return NULL; 64 | } 65 | return self->noise; 66 | } 67 | 68 | size_t WebRtcNs_num_freq(void) { 69 | return HALF_ANAL_BLOCKL; 70 | } 71 | -------------------------------------------------------------------------------- /webrtc_vad/ns/noise_suppression.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ 13 | 14 | #include 15 | 16 | #include 17 | 18 | typedef struct NsHandleT NsHandle; 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | /* 25 | * This function creates an instance of the floating point Noise Suppression. 26 | */ 27 | NsHandle* WebRtcNs_Create(void); 28 | 29 | /* 30 | * This function frees the dynamic memory of a specified noise suppression 31 | * instance. 32 | * 33 | * Input: 34 | * - NS_inst : Pointer to NS instance that should be freed 35 | */ 36 | void WebRtcNs_Free(NsHandle* NS_inst); 37 | 38 | /* 39 | * This function initializes a NS instance and has to be called before any other 40 | * processing is made. 41 | * 42 | * Input: 43 | * - NS_inst : Instance that should be initialized 44 | * - fs : sampling frequency 45 | * 46 | * Output: 47 | * - NS_inst : Initialized instance 48 | * 49 | * Return value : 0 - Ok 50 | * -1 - Error 51 | */ 52 | int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs); 53 | 54 | /* 55 | * This changes the aggressiveness of the noise suppression method. 56 | * 57 | * Input: 58 | * - NS_inst : Noise suppression instance. 59 | * - mode : 0: Mild, 1: Medium , 2: Aggressive 60 | * 61 | * Output: 62 | * - NS_inst : Updated instance. 63 | * 64 | * Return value : 0 - Ok 65 | * -1 - Error 66 | */ 67 | int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); 68 | 69 | /* 70 | * This functions estimates the background noise for the inserted speech frame. 71 | * The input and output signals should always be 10ms (80 or 160 samples). 72 | * 73 | * Input 74 | * - NS_inst : Noise suppression instance. 75 | * - spframe : Pointer to speech frame buffer for L band 76 | * 77 | * Output: 78 | * - NS_inst : Updated NS instance 79 | */ 80 | void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe); 81 | 82 | /* 83 | * This functions does Noise Suppression for the inserted speech frame. The 84 | * input and output signals should always be 10ms (80 or 160 samples). 85 | * 86 | * Input 87 | * - NS_inst : Noise suppression instance. 88 | * - spframe : Pointer to speech frame buffer for each band 89 | * - num_bands : Number of bands 90 | * 91 | * Output: 92 | * - NS_inst : Updated NS instance 93 | * - outframe : Pointer to output frame for each band 94 | */ 95 | void WebRtcNs_Process(NsHandle* NS_inst, 96 | const float* const* spframe, 97 | size_t num_bands, 98 | float* const* outframe); 99 | 100 | /* Returns the internally used prior speech probability of the current frame. 101 | * There is a frequency bin based one as well, with which this should not be 102 | * confused. 103 | * 104 | * Input 105 | * - handle : Noise suppression instance. 106 | * 107 | * Return value : Prior speech probability in interval [0.0, 1.0]. 108 | * -1 - NULL pointer or uninitialized instance. 109 | */ 110 | float WebRtcNs_prior_speech_probability(NsHandle* handle); 111 | 112 | /* Returns a pointer to the noise estimate per frequency bin. The number of 113 | * frequency bins can be provided using WebRtcNs_num_freq(). 114 | * 115 | * Input 116 | * - handle : Noise suppression instance. 117 | * 118 | * Return value : Pointer to the noise estimate per frequency bin. 119 | * Returns NULL if the input is a NULL pointer or an 120 | * uninitialized instance. 121 | */ 122 | const float* WebRtcNs_noise_estimate(const NsHandle* handle); 123 | 124 | /* Returns the number of frequency bins, which is the length of the noise 125 | * estimate for example. 126 | * 127 | * Return value : Number of frequency bins. 128 | */ 129 | size_t WebRtcNs_num_freq(void); 130 | 131 | #ifdef __cplusplus 132 | } 133 | #endif 134 | 135 | #endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ 136 | -------------------------------------------------------------------------------- /webrtc_vad/ns/noise_suppression_x.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "noise_suppression_x.h" 12 | #include "signal_processing_library.h" 13 | #include 14 | 15 | #include "real_fft.h" 16 | #include "nsx_core.h" 17 | #include "nsx_defines.h" 18 | 19 | NsxHandle* WebRtcNsx_Create(void) { 20 | NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC)); 21 | WebRtcSpl_Init(); 22 | self->real_fft = NULL; 23 | self->initFlag = 0; 24 | return (NsxHandle*)self; 25 | } 26 | 27 | void WebRtcNsx_Free(NsxHandle* nsxInst) { 28 | WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft); 29 | free(nsxInst); 30 | } 31 | 32 | int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) { 33 | return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs); 34 | } 35 | 36 | int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) { 37 | return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode); 38 | } 39 | 40 | void WebRtcNsx_Process(NsxHandle* nsxInst, 41 | const short* const* speechFrame, 42 | int num_bands, 43 | short* const* outFrame) { 44 | WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame, 45 | num_bands, outFrame); 46 | } 47 | 48 | const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst, 49 | int* q_noise) { 50 | *q_noise = 11; 51 | const NoiseSuppressionFixedC* self = (const NoiseSuppressionFixedC*)nsxInst; 52 | if (nsxInst == NULL || self->initFlag == 0) { 53 | return NULL; 54 | } 55 | *q_noise += self->prevQNoise; 56 | return self->prevNoiseU32; 57 | } 58 | 59 | size_t WebRtcNsx_num_freq(void) { 60 | return HALF_ANAL_BLOCKL; 61 | } 62 | -------------------------------------------------------------------------------- /webrtc_vad/ns/noise_suppression_x.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ 13 | 14 | #include 15 | 16 | #include 17 | 18 | typedef struct NsxHandleT NsxHandle; 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | /* 25 | * This function creates an instance of the fixed point Noise Suppression. 26 | */ 27 | NsxHandle* WebRtcNsx_Create(void); 28 | 29 | /* 30 | * This function frees the dynamic memory of a specified Noise Suppression 31 | * instance. 32 | * 33 | * Input: 34 | * - nsxInst : Pointer to NS instance that should be freed 35 | */ 36 | void WebRtcNsx_Free(NsxHandle* nsxInst); 37 | 38 | /* 39 | * This function initializes a NS instance 40 | * 41 | * Input: 42 | * - nsxInst : Instance that should be initialized 43 | * - fs : sampling frequency 44 | * 45 | * Output: 46 | * - nsxInst : Initialized instance 47 | * 48 | * Return value : 0 - Ok 49 | * -1 - Error 50 | */ 51 | int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs); 52 | 53 | /* 54 | * This changes the aggressiveness of the noise suppression method. 55 | * 56 | * Input: 57 | * - nsxInst : Instance that should be initialized 58 | * - mode : 0: Mild, 1: Medium , 2: Aggressive 59 | * 60 | * Output: 61 | * - nsxInst : Initialized instance 62 | * 63 | * Return value : 0 - Ok 64 | * -1 - Error 65 | */ 66 | int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); 67 | 68 | /* 69 | * This functions does noise suppression for the inserted speech frame. The 70 | * input and output signals should always be 10ms (80 or 160 samples). 71 | * 72 | * Input 73 | * - nsxInst : NSx instance. Needs to be initiated before call. 74 | * - speechFrame : Pointer to speech frame buffer for each band 75 | * - num_bands : Number of bands 76 | * 77 | * Output: 78 | * - nsxInst : Updated NSx instance 79 | * - outFrame : Pointer to output frame for each band 80 | */ 81 | void WebRtcNsx_Process(NsxHandle* nsxInst, 82 | const short* const* speechFrame, 83 | int num_bands, 84 | short* const* outFrame); 85 | 86 | /* Returns a pointer to the noise estimate per frequency bin. The number of 87 | * frequency bins can be provided using WebRtcNsx_num_freq(). 88 | * 89 | * Input 90 | * - nsxInst : NSx instance. Needs to be initiated before call. 91 | * - q_noise : Q value of the noise estimate, which is the number of 92 | * bits that it needs to be right-shifted to be 93 | * normalized. 94 | * 95 | * Return value : Pointer to the noise estimate per frequency bin. 96 | * Returns NULL if the input is a NULL pointer or an 97 | * uninitialized instance. 98 | */ 99 | const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst, 100 | int* q_noise); 101 | 102 | /* Returns the number of frequency bins, which is the length of the noise 103 | * estimate for example. 104 | * 105 | * Return value : Number of frequency bins. 106 | */ 107 | size_t WebRtcNsx_num_freq(void); 108 | 109 | #ifdef __cplusplus 110 | } 111 | #endif 112 | 113 | #endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ 114 | -------------------------------------------------------------------------------- /webrtc_vad/ns/ns_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ 13 | 14 | #include "defines.h" 15 | 16 | typedef struct NSParaExtract_ { 17 | // Bin size of histogram. 18 | float binSizeLrt; 19 | float binSizeSpecFlat; 20 | float binSizeSpecDiff; 21 | // Range of histogram over which LRT threshold is computed. 22 | float rangeAvgHistLrt; 23 | // Scale parameters: multiply dominant peaks of the histograms by scale factor 24 | // to obtain thresholds for prior model. 25 | float factor1ModelPars; // For LRT and spectral difference. 26 | float factor2ModelPars; // For spectral_flatness: used when noise is flatter 27 | // than speech. 28 | // Peak limit for spectral flatness (varies between 0 and 1). 29 | float thresPosSpecFlat; 30 | // Limit on spacing of two highest peaks in histogram: spacing determined by 31 | // bin size. 32 | float limitPeakSpacingSpecFlat; 33 | float limitPeakSpacingSpecDiff; 34 | // Limit on relevance of second peak. 35 | float limitPeakWeightsSpecFlat; 36 | float limitPeakWeightsSpecDiff; 37 | // Limit on fluctuation of LRT feature. 38 | float thresFluctLrt; 39 | // Limit on the max and min values for the feature thresholds. 40 | float maxLrt; 41 | float minLrt; 42 | float maxSpecFlat; 43 | float minSpecFlat; 44 | float maxSpecDiff; 45 | float minSpecDiff; 46 | // Criteria of weight of histogram peak to accept/reject feature. 47 | int thresWeightSpecFlat; 48 | int thresWeightSpecDiff; 49 | 50 | } NSParaExtract; 51 | 52 | typedef struct NoiseSuppressionC_ { 53 | uint32_t fs; 54 | size_t blockLen; 55 | size_t windShift; 56 | size_t anaLen; 57 | size_t magnLen; 58 | int aggrMode; 59 | const float* window; 60 | float analyzeBuf[ANAL_BLOCKL_MAX]; 61 | float dataBuf[ANAL_BLOCKL_MAX]; 62 | float syntBuf[ANAL_BLOCKL_MAX]; 63 | 64 | int initFlag; 65 | // Parameters for quantile noise estimation. 66 | float density[SIMULT * HALF_ANAL_BLOCKL]; 67 | float lquantile[SIMULT * HALF_ANAL_BLOCKL]; 68 | float quantile[HALF_ANAL_BLOCKL]; 69 | int counter[SIMULT]; 70 | int updates; 71 | // Parameters for Wiener filter. 72 | float smooth[HALF_ANAL_BLOCKL]; 73 | float overdrive; 74 | float denoiseBound; 75 | int gainmap; 76 | // FFT work arrays. 77 | size_t ip[IP_LENGTH]; 78 | float wfft[W_LENGTH]; 79 | 80 | // Parameters for new method: some not needed, will reduce/cleanup later. 81 | int32_t blockInd; // Frame index counter. 82 | int modelUpdatePars[4]; // Parameters for updating or estimating. 83 | // Thresholds/weights for prior model. 84 | float priorModelPars[7]; // Parameters for prior model. 85 | float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame. 86 | float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame. 87 | // Magnitude spectrum of previous analyze frame. 88 | float magnPrevAnalyze[HALF_ANAL_BLOCKL]; 89 | // Magnitude spectrum of previous process frame. 90 | float magnPrevProcess[HALF_ANAL_BLOCKL]; 91 | float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing. 92 | float priorSpeechProb; // Prior speech/noise probability. 93 | float featureData[7]; 94 | // Conservative noise spectrum estimate. 95 | float magnAvgPause[HALF_ANAL_BLOCKL]; 96 | float signalEnergy; // Energy of |magn|. 97 | float sumMagn; 98 | float whiteNoiseLevel; // Initial noise estimate. 99 | float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate. 100 | float pinkNoiseNumerator; // Pink noise parameter: numerator. 101 | float pinkNoiseExp; // Pink noise parameter: power of frequencies. 102 | float parametricNoise[HALF_ANAL_BLOCKL]; 103 | // Parameters for feature extraction. 104 | NSParaExtract featureExtractionParams; 105 | // Histograms for parameter estimation. 106 | int histLrt[HIST_PAR_EST]; 107 | int histSpecFlat[HIST_PAR_EST]; 108 | int histSpecDiff[HIST_PAR_EST]; 109 | // Quantities for high band estimate. 110 | float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT. 111 | // Buffering data for HB. 112 | float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; 113 | 114 | } NoiseSuppressionC; 115 | 116 | #ifdef __cplusplus 117 | extern "C" { 118 | #endif 119 | 120 | /**************************************************************************** 121 | * WebRtcNs_InitCore(...) 122 | * 123 | * This function initializes a noise suppression instance 124 | * 125 | * Input: 126 | * - self : Instance that should be initialized 127 | * - fs : Sampling frequency 128 | * 129 | * Output: 130 | * - self : Initialized instance 131 | * 132 | * Return value : 0 - Ok 133 | * -1 - Error 134 | */ 135 | int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs); 136 | 137 | /**************************************************************************** 138 | * WebRtcNs_set_policy_core(...) 139 | * 140 | * This changes the aggressiveness of the noise suppression method. 141 | * 142 | * Input: 143 | * - self : Instance that should be initialized 144 | * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB) 145 | * 146 | * Output: 147 | * - self : Initialized instance 148 | * 149 | * Return value : 0 - Ok 150 | * -1 - Error 151 | */ 152 | int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode); 153 | 154 | /**************************************************************************** 155 | * WebRtcNs_AnalyzeCore 156 | * 157 | * Estimate the background noise. 158 | * 159 | * Input: 160 | * - self : Instance that should be initialized 161 | * - speechFrame : Input speech frame for lower band 162 | * 163 | * Output: 164 | * - self : Updated instance 165 | */ 166 | void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame); 167 | 168 | /**************************************************************************** 169 | * WebRtcNs_ProcessCore 170 | * 171 | * Do noise suppression. 172 | * 173 | * Input: 174 | * - self : Instance that should be initialized 175 | * - inFrame : Input speech frame for each band 176 | * - num_bands : Number of bands 177 | * 178 | * Output: 179 | * - self : Updated instance 180 | * - outFrame : Output speech frame for each band 181 | */ 182 | void WebRtcNs_ProcessCore(NoiseSuppressionC* self, 183 | const float* const* inFrame, 184 | size_t num_bands, 185 | float* const* outFrame); 186 | 187 | #ifdef __cplusplus 188 | } 189 | #endif 190 | #endif // MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ 191 | -------------------------------------------------------------------------------- /webrtc_vad/ns/nsx_core.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ 13 | 14 | #ifdef NS_FILEDEBUG 15 | #include 16 | #endif 17 | 18 | #include "nsx_defines.h" 19 | 20 | typedef struct NoiseSuppressionFixedC_ { 21 | uint32_t fs; 22 | 23 | const int16_t* window; 24 | int16_t analysisBuffer[ANAL_BLOCKL_MAX]; 25 | int16_t synthesisBuffer[ANAL_BLOCKL_MAX]; 26 | uint16_t noiseSupFilter[HALF_ANAL_BLOCKL]; 27 | uint16_t overdrive; /* Q8 */ 28 | uint16_t denoiseBound; /* Q14 */ 29 | const int16_t* factor2Table; 30 | int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL]; 31 | int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL]; 32 | int16_t noiseEstCounter[SIMULT]; 33 | int16_t noiseEstQuantile[HALF_ANAL_BLOCKL]; 34 | 35 | size_t anaLen; 36 | size_t anaLen2; 37 | size_t magnLen; 38 | int aggrMode; 39 | int stages; 40 | int initFlag; 41 | int gainMap; 42 | 43 | int32_t maxLrt; 44 | int32_t minLrt; 45 | // Log LRT factor with time-smoothing in Q8. 46 | int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; 47 | int32_t featureLogLrt; 48 | int32_t thresholdLogLrt; 49 | int16_t weightLogLrt; 50 | 51 | uint32_t featureSpecDiff; 52 | uint32_t thresholdSpecDiff; 53 | int16_t weightSpecDiff; 54 | 55 | uint32_t featureSpecFlat; 56 | uint32_t thresholdSpecFlat; 57 | int16_t weightSpecFlat; 58 | 59 | // Conservative estimate of noise spectrum. 60 | int32_t avgMagnPause[HALF_ANAL_BLOCKL]; 61 | uint32_t magnEnergy; 62 | uint32_t sumMagn; 63 | uint32_t curAvgMagnEnergy; 64 | uint32_t timeAvgMagnEnergy; 65 | uint32_t timeAvgMagnEnergyTmp; 66 | 67 | uint32_t whiteNoiseLevel; // Initial noise estimate. 68 | // Initial magnitude spectrum estimate. 69 | uint32_t initMagnEst[HALF_ANAL_BLOCKL]; 70 | // Pink noise parameters: 71 | int32_t pinkNoiseNumerator; // Numerator. 72 | int32_t pinkNoiseExp; // Power of freq. 73 | int minNorm; // Smallest normalization factor. 74 | int zeroInputSignal; // Zero input signal flag. 75 | 76 | // Noise spectrum from previous frame. 77 | uint32_t prevNoiseU32[HALF_ANAL_BLOCKL]; 78 | // Magnitude spectrum from previous frame. 79 | uint16_t prevMagnU16[HALF_ANAL_BLOCKL]; 80 | // Prior speech/noise probability in Q14. 81 | int16_t priorNonSpeechProb; 82 | 83 | int blockIndex; // Frame index counter. 84 | // Parameter for updating or estimating thresholds/weights for prior model. 85 | int modelUpdate; 86 | int cntThresUpdate; 87 | 88 | // Histograms for parameter estimation. 89 | int16_t histLrt[HIST_PAR_EST]; 90 | int16_t histSpecFlat[HIST_PAR_EST]; 91 | int16_t histSpecDiff[HIST_PAR_EST]; 92 | 93 | // Quantities for high band estimate. 94 | int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; 95 | 96 | int qNoise; 97 | int prevQNoise; 98 | int prevQMagn; 99 | size_t blockLen10ms; 100 | 101 | int16_t real[ANAL_BLOCKL_MAX]; 102 | int16_t imag[ANAL_BLOCKL_MAX]; 103 | int32_t energyIn; 104 | int scaleEnergyIn; 105 | int normData; 106 | 107 | struct RealFFT* real_fft; 108 | } NoiseSuppressionFixedC; 109 | 110 | #ifdef __cplusplus 111 | extern "C" 112 | { 113 | #endif 114 | 115 | /**************************************************************************** 116 | * WebRtcNsx_InitCore(...) 117 | * 118 | * This function initializes a noise suppression instance 119 | * 120 | * Input: 121 | * - inst : Instance that should be initialized 122 | * - fs : Sampling frequency 123 | * 124 | * Output: 125 | * - inst : Initialized instance 126 | * 127 | * Return value : 0 - Ok 128 | * -1 - Error 129 | */ 130 | int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs); 131 | 132 | /**************************************************************************** 133 | * WebRtcNsx_set_policy_core(...) 134 | * 135 | * This changes the aggressiveness of the noise suppression method. 136 | * 137 | * Input: 138 | * - inst : Instance that should be initialized 139 | * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) 140 | * 141 | * Output: 142 | * - inst : Initialized instance 143 | * 144 | * Return value : 0 - Ok 145 | * -1 - Error 146 | */ 147 | int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode); 148 | 149 | /**************************************************************************** 150 | * WebRtcNsx_ProcessCore 151 | * 152 | * Do noise suppression. 153 | * 154 | * Input: 155 | * - inst : Instance that should be initialized 156 | * - inFrame : Input speech frame for each band 157 | * - num_bands : Number of bands 158 | * 159 | * Output: 160 | * - inst : Updated instance 161 | * - outFrame : Output speech frame for each band 162 | */ 163 | void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, 164 | const short* const* inFrame, 165 | int num_bands, 166 | short* const* outFrame); 167 | 168 | /**************************************************************************** 169 | * Some function pointers, for internal functions shared by ARM NEON and 170 | * generic C code. 171 | */ 172 | // Noise Estimation. 173 | typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst, 174 | uint16_t* magn, 175 | uint32_t* noise, 176 | int16_t* q_noise); 177 | extern NoiseEstimation WebRtcNsx_NoiseEstimation; 178 | 179 | // Filter the data in the frequency domain, and create spectrum. 180 | typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst, 181 | int16_t* freq_buff); 182 | extern PrepareSpectrum WebRtcNsx_PrepareSpectrum; 183 | 184 | // For the noise supression process, synthesis, read out fully processed 185 | // segment, and update synthesis buffer. 186 | typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst, 187 | int16_t* out_frame, 188 | int16_t gain_factor); 189 | extern SynthesisUpdate WebRtcNsx_SynthesisUpdate; 190 | 191 | // Update analysis buffer for lower band, and window data before FFT. 192 | typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst, 193 | int16_t* out, 194 | int16_t* new_speech); 195 | extern AnalysisUpdate WebRtcNsx_AnalysisUpdate; 196 | 197 | // Denormalize the real-valued signal |in|, the output from inverse FFT. 198 | typedef void (*Denormalize)(NoiseSuppressionFixedC* inst, 199 | int16_t* in, 200 | int factor); 201 | extern Denormalize WebRtcNsx_Denormalize; 202 | 203 | // Normalize the real-valued signal |in|, the input to forward FFT. 204 | typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst, 205 | const int16_t* in, 206 | int16_t* out); 207 | extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; 208 | 209 | // Compute speech/noise probability. 210 | // Intended to be private. 211 | void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, 212 | uint16_t* nonSpeechProbFinal, 213 | uint32_t* priorLocSnr, 214 | uint32_t* postLocSnr); 215 | 216 | #if defined(WEBRTC_HAS_NEON) 217 | // For the above function pointers, functions for generic platforms are declared 218 | // and defined as static in file nsx_core.c, while those for ARM Neon platforms 219 | // are declared below and defined in file nsx_core_neon.c. 220 | void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, 221 | uint16_t* magn, 222 | uint32_t* noise, 223 | int16_t* q_noise); 224 | void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, 225 | int16_t* out_frame, 226 | int16_t gain_factor); 227 | void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, 228 | int16_t* out, 229 | int16_t* new_speech); 230 | void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, 231 | int16_t* freq_buff); 232 | #endif 233 | 234 | #if defined(MIPS32_LE) 235 | // For the above function pointers, functions for generic platforms are declared 236 | // and defined as static in file nsx_core.c, while those for MIPS platforms 237 | // are declared below and defined in file nsx_core_mips.c. 238 | void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, 239 | int16_t* out_frame, 240 | int16_t gain_factor); 241 | void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, 242 | int16_t* out, 243 | int16_t* new_speech); 244 | void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, 245 | int16_t* freq_buff); 246 | void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, 247 | const int16_t* in, 248 | int16_t* out); 249 | #if defined(MIPS_DSP_R1_LE) 250 | void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, 251 | int16_t* in, 252 | int factor); 253 | #endif 254 | 255 | #endif 256 | 257 | #ifdef __cplusplus 258 | } 259 | #endif 260 | 261 | #endif // MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ 262 | -------------------------------------------------------------------------------- /webrtc_vad/ns/nsx_core_c.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | #include "signal_processing_library.h" 11 | #include "noise_suppression_x.h" 12 | #include "nsx_core.h" 13 | #include "nsx_defines.h" 14 | 15 | static const int16_t kIndicatorTable[17] = { 16 | 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, 17 | 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 18 | }; 19 | 20 | // Compute speech/noise probability 21 | // speech/noise probability is returned in: probSpeechFinal 22 | //snrLocPrior is the prior SNR for each frequency (in Q11) 23 | //snrLocPost is the post SNR for each frequency (in Q11) 24 | void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, 25 | uint16_t* nonSpeechProbFinal, 26 | uint32_t* priorLocSnr, 27 | uint32_t* postLocSnr) { 28 | uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; 29 | int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; 30 | int32_t frac32, logTmp; 31 | int32_t logLrtTimeAvgKsumFX; 32 | int16_t indPriorFX16; 33 | int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; 34 | size_t i; 35 | int normTmp, normTmp2, nShifts; 36 | 37 | // compute feature based on average LR factor 38 | // this is the average over all frequencies of the smooth log LRT 39 | logLrtTimeAvgKsumFX = 0; 40 | for (i = 0; i < inst->magnLen; i++) { 41 | besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 42 | normTmp = WebRtcSpl_NormU32(postLocSnr[i]); 43 | num = postLocSnr[i] << normTmp; // Q(11+normTmp) 44 | if (normTmp > 10) { 45 | den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp) 46 | } else { 47 | den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp) 48 | } 49 | if (den > 0) { 50 | besselTmpFX32 -= num / den; // Q11 51 | } else { 52 | besselTmpFX32 = 0; 53 | } 54 | 55 | // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) 56 | // - inst->logLrtTimeAvg[i]); 57 | // Here, LRT_TAVG = 0.5 58 | zeros = WebRtcSpl_NormU32(priorLocSnr[i]); 59 | frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); 60 | tmp32 = (frac32 * frac32 * -43) >> 19; 61 | tmp32 += ((int16_t)frac32 * 5412) >> 12; 62 | frac32 = tmp32 + 37; 63 | // tmp32 = log2(priorLocSnr[i]) 64 | tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 65 | logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2) 66 | // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12. 67 | tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2; 68 | inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 69 | 70 | logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 71 | } 72 | inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> 73 | (inst->stages + 11); 74 | 75 | // done with computation of LR factor 76 | 77 | // 78 | //compute the indicator functions 79 | // 80 | 81 | // average LRT feature 82 | // FLOAT code 83 | // indicator0 = 0.5 * (tanh(widthPrior * 84 | // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); 85 | tmpIndFX = 16384; // Q14(1.0) 86 | tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 87 | nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; 88 | //use larger width in tanh map for pause regions 89 | if (tmp32no1 < 0) { 90 | tmpIndFX = 0; 91 | tmp32no1 = -tmp32no1; 92 | //widthPrior = widthPrior * 2.0; 93 | nShifts++; 94 | } 95 | tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 96 | // compute indicator function: sigmoid map 97 | if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) { 98 | tableIndex = (int16_t)(tmp32no1 >> 14); 99 | tmp16no2 = kIndicatorTable[tableIndex]; 100 | tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 101 | frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 102 | tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); 103 | if (tmpIndFX == 0) { 104 | tmpIndFX = 8192 - tmp16no2; // Q14 105 | } else { 106 | tmpIndFX = 8192 + tmp16no2; // Q14 107 | } 108 | } 109 | indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 110 | 111 | //spectral flatness feature 112 | if (inst->weightSpecFlat) { 113 | tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 114 | tmpIndFX = 16384; // Q14(1.0) 115 | //use larger width in tanh map for pause regions 116 | tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 117 | nShifts = 4; 118 | if (inst->thresholdSpecFlat < tmpU32no1) { 119 | tmpIndFX = 0; 120 | tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; 121 | //widthPrior = widthPrior * 2.0; 122 | nShifts++; 123 | } 124 | tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14 125 | // compute indicator function: sigmoid map 126 | // FLOAT code 127 | // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * 128 | // (threshPrior1 - tmpFloat1)) + 1.0); 129 | if (tmpU32no1 < (16 << 14)) { 130 | tableIndex = (int16_t)(tmpU32no1 >> 14); 131 | tmp16no2 = kIndicatorTable[tableIndex]; 132 | tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 133 | frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 134 | tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); 135 | if (tmpIndFX) { 136 | tmpIndFX = 8192 + tmp16no2; // Q14 137 | } else { 138 | tmpIndFX = 8192 - tmp16no2; // Q14 139 | } 140 | } 141 | indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 142 | } 143 | 144 | //for template spectral-difference 145 | if (inst->weightSpecDiff) { 146 | tmpU32no1 = 0; 147 | if (inst->featureSpecDiff) { 148 | normTmp = WEBRTC_SPL_MIN(20 - inst->stages, 149 | WebRtcSpl_NormU32(inst->featureSpecDiff)); 150 | tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) 151 | tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); 152 | if (tmpU32no2 > 0) { 153 | // Q(20 - inst->stages) 154 | tmpU32no1 /= tmpU32no2; 155 | } else { 156 | tmpU32no1 = (uint32_t)(0x7fffffff); 157 | } 158 | } 159 | tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; 160 | tmpU32no2 = tmpU32no1 - tmpU32no3; 161 | nShifts = 1; 162 | tmpIndFX = 16384; // Q14(1.0) 163 | //use larger width in tanh map for pause regions 164 | if (tmpU32no2 & 0x80000000) { 165 | tmpIndFX = 0; 166 | tmpU32no2 = tmpU32no3 - tmpU32no1; 167 | //widthPrior = widthPrior * 2.0; 168 | nShifts--; 169 | } 170 | tmpU32no1 = tmpU32no2 >> nShifts; 171 | // compute indicator function: sigmoid map 172 | /* FLOAT code 173 | indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); 174 | */ 175 | if (tmpU32no1 < (16 << 14)) { 176 | tableIndex = (int16_t)(tmpU32no1 >> 14); 177 | tmp16no2 = kIndicatorTable[tableIndex]; 178 | tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; 179 | frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 180 | tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( 181 | tmp16no1, frac, 14); 182 | if (tmpIndFX) { 183 | tmpIndFX = 8192 + tmp16no2; 184 | } else { 185 | tmpIndFX = 8192 - tmp16no2; 186 | } 187 | } 188 | indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 189 | } 190 | 191 | //combine the indicator function with the feature weights 192 | // FLOAT code 193 | // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * 194 | // indicator1 + weightIndPrior2 * indicator2); 195 | indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 196 | // done with computing indicator function 197 | 198 | //compute the prior probability 199 | // FLOAT code 200 | // inst->priorNonSpeechProb += PRIOR_UPDATE * 201 | // (indPriorNonSpeech - inst->priorNonSpeechProb); 202 | tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 203 | inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); 204 | 205 | //final speech probability: combine prior model with LR factor: 206 | 207 | memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); 208 | 209 | if (inst->priorNonSpeechProb > 0) { 210 | for (i = 0; i < inst->magnLen; i++) { 211 | // FLOAT code 212 | // invLrt = exp(inst->logLrtTimeAvg[i]); 213 | // invLrt = inst->priorSpeechProb * invLrt; 214 | // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / 215 | // (1.0 - inst->priorSpeechProb + invLrt); 216 | // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; 217 | // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / 218 | // (inst->priorNonSpeechProb + invLrt); 219 | if (inst->logLrtTimeAvgW32[i] < 65300) { 220 | tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12 221 | intPart = (int16_t)(tmp32no1 >> 12); 222 | if (intPart < -8) { 223 | intPart = -8; 224 | } 225 | frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 226 | 227 | // Quadratic approximation of 2^frac 228 | tmp32no2 = (frac * frac * 44) >> 19; // Q12. 229 | tmp32no2 += (frac * 84) >> 7; // Q12 230 | invLrtFX = (1 << (8 + intPart)) + 231 | WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 232 | 233 | normTmp = WebRtcSpl_NormW32(invLrtFX); 234 | normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); 235 | if (normTmp + normTmp2 >= 7) { 236 | if (normTmp + normTmp2 < 15) { 237 | invLrtFX >>= 15 - normTmp2 - normTmp; 238 | // Q(normTmp+normTmp2-7) 239 | tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); 240 | // Q(normTmp+normTmp2+7) 241 | invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); 242 | // Q14 243 | } else { 244 | tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); 245 | // Q22 246 | invLrtFX = tmp32no1 >> 8; // Q14. 247 | } 248 | 249 | tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22 250 | 251 | nonSpeechProbFinal[i] = tmp32no1 / 252 | (inst->priorNonSpeechProb + invLrtFX); // Q8 253 | } 254 | } 255 | } 256 | } 257 | } 258 | 259 | -------------------------------------------------------------------------------- /webrtc_vad/ns/nsx_defines.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include 12 | #ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ 13 | #define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ 14 | 15 | #define ANAL_BLOCKL_MAX 256 /* Max analysis block length */ 16 | #define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */ 17 | #define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */ 18 | #define SIMULT 3 19 | #define END_STARTUP_LONG 200 20 | #define END_STARTUP_SHORT 50 21 | #define FACTOR_Q16 2621440 /* 40 in Q16 */ 22 | #define FACTOR_Q7 5120 /* 40 in Q7 */ 23 | #define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */ 24 | #define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */ 25 | 26 | /* PARAMETERS FOR NEW METHOD */ 27 | #define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */ 28 | #define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */ 29 | #define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */ 30 | #define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */ 31 | #define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */ 32 | #define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */ 33 | 34 | /* Probability threshold for noise state in speech/noise likelihood. */ 35 | #define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */ 36 | #define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */ 37 | 38 | /* FEATURE EXTRACTION CONFIG */ 39 | /* Bin size of histogram */ 40 | #define BIN_SIZE_LRT 10 41 | /* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */ 42 | /* Thresholds for prior model */ 43 | #define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */ 44 | /* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */ 45 | #define FACTOR_2_FLAT_Q10 922 46 | /* Peak limit for spectral flatness (varies between 0 and 1) */ 47 | #define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */ 48 | /* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */ 49 | #define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */ 50 | /* Limit on relevance of second peak */ 51 | #define LIM_PEAK_WEIGHT_FLAT_DIFF 2 52 | #define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */ 53 | /* Limit on the max and min values for the feature thresholds */ 54 | #define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */ 55 | #define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */ 56 | #define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */ 57 | #define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */ 58 | /* Criteria of weight of histogram peak to accept/reject feature */ 59 | #define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */ 60 | 61 | #define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */ 62 | #define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */ 63 | #define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */ 64 | 65 | #endif /* MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */ 66 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/complex_bit_reverse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "signal_processing_library.h" 12 | 13 | /* Tables for data buffer indexes that are bit reversed and thus need to be 14 | * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap 15 | * operations, while index_7[{1, 3, 5, ...}] are for the right side of the 16 | * operation. Same for index_8. 17 | */ 18 | 19 | /* Indexes for the case of stages == 7. */ 20 | static const int16_t index_7[112] = { 21 | 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, 22 | 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, 23 | 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, 24 | 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, 25 | 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, 26 | 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, 27 | 103, 115, 111, 123 28 | }; 29 | 30 | /* Indexes for the case of stages == 8. */ 31 | static const int16_t index_8[240] = { 32 | 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, 33 | 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, 34 | 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, 35 | 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, 36 | 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, 37 | 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, 38 | 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, 39 | 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, 40 | 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, 41 | 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, 42 | 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, 43 | 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, 44 | 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, 45 | 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, 46 | 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 47 | }; 48 | 49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { 50 | /* For any specific value of stages, we know exactly the indexes that are 51 | * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of 52 | * stages are 7 and 8, so we use tables to save unnecessary iterations and 53 | * calculations for these two cases. 54 | */ 55 | if (stages == 7 || stages == 8) { 56 | int m = 0; 57 | int length = 112; 58 | const int16_t* index = index_7; 59 | 60 | if (stages == 8) { 61 | length = 240; 62 | index = index_8; 63 | } 64 | 65 | /* Decimation in time. Swap the elements with bit-reversed indexes. */ 66 | for (m = 0; m < length; m += 2) { 67 | /* We declare a int32_t* type pointer, to load both the 16-bit real 68 | * and imaginary elements from complex_data in one instruction, reducing 69 | * complexity. 70 | */ 71 | int32_t* complex_data_ptr = (int32_t*)complex_data; 72 | int32_t temp = 0; 73 | 74 | temp = complex_data_ptr[index[m]]; /* Real and imaginary */ 75 | complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; 76 | complex_data_ptr[index[m + 1]] = temp; 77 | } 78 | } 79 | else { 80 | int m = 0, mr = 0, l = 0; 81 | int n = 1 << stages; 82 | int nn = n - 1; 83 | 84 | /* Decimation in time - re-order data */ 85 | for (m = 1; m <= nn; ++m) { 86 | int32_t* complex_data_ptr = (int32_t*)complex_data; 87 | int32_t temp = 0; 88 | 89 | /* Find out indexes that are bit-reversed. */ 90 | l = n; 91 | do { 92 | l >>= 1; 93 | } while (l > nn - mr); 94 | mr = (mr & (l - 1)) + l; 95 | 96 | if (mr <= m) { 97 | continue; 98 | } 99 | 100 | /* Swap the elements with bit-reversed indexes. 101 | * This is similar to the loop in the stages == 7 or 8 cases. 102 | */ 103 | temp = complex_data_ptr[m]; /* Real and imaginary */ 104 | complex_data_ptr[m] = complex_data_ptr[mr]; 105 | complex_data_ptr[mr] = temp; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/complex_fft.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_ComplexFFT(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "complex_fft_tables.h" 19 | #include "signal_processing_library.h" 20 | 21 | #define CFFTSFT 14 22 | #define CFFTRND 1 23 | #define CFFTRND2 16384 24 | 25 | #define CIFFTSFT 14 26 | #define CIFFTRND 1 27 | 28 | 29 | int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) 30 | { 31 | int i, j, l, k, istep, n, m; 32 | int16_t wr, wi; 33 | int32_t tr32, ti32, qr32, qi32; 34 | 35 | /* The 1024-value is a constant given from the size of kSinTable1024[], 36 | * and should not be changed depending on the input parameter 'stages' 37 | */ 38 | n = 1 << stages; 39 | if (n > 1024) 40 | return -1; 41 | 42 | l = 1; 43 | k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change 44 | depending on the input parameter 'stages' */ 45 | 46 | if (mode == 0) 47 | { 48 | // mode==0: Low-complexity and Low-accuracy mode 49 | while (l < n) 50 | { 51 | istep = l << 1; 52 | 53 | for (m = 0; m < l; ++m) 54 | { 55 | j = m << k; 56 | 57 | /* The 256-value is a constant given as 1/4 of the size of 58 | * kSinTable1024[], and should not be changed depending on the input 59 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 60 | */ 61 | wr = kSinTable1024[j + 256]; 62 | wi = -kSinTable1024[j]; 63 | 64 | for (i = m; i < n; i += istep) 65 | { 66 | j = i + l; 67 | 68 | tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; 69 | 70 | ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; 71 | 72 | qr32 = (int32_t)frfi[2 * i]; 73 | qi32 = (int32_t)frfi[2 * i + 1]; 74 | frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); 75 | frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); 76 | frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); 77 | frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); 78 | } 79 | } 80 | 81 | --k; 82 | l = istep; 83 | 84 | } 85 | 86 | } else 87 | { 88 | // mode==1: High-complexity and High-accuracy mode 89 | while (l < n) 90 | { 91 | istep = l << 1; 92 | 93 | for (m = 0; m < l; ++m) 94 | { 95 | j = m << k; 96 | 97 | /* The 256-value is a constant given as 1/4 of the size of 98 | * kSinTable1024[], and should not be changed depending on the input 99 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 100 | */ 101 | wr = kSinTable1024[j + 256]; 102 | wi = -kSinTable1024[j]; 103 | 104 | #ifdef WEBRTC_ARCH_ARM_V7 105 | int32_t wri = 0; 106 | __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : 107 | "r"((int32_t)wr), "r"((int32_t)wi)); 108 | #endif 109 | 110 | for (i = m; i < n; i += istep) 111 | { 112 | j = i + l; 113 | 114 | #ifdef WEBRTC_ARCH_ARM_V7 115 | register int32_t frfi_r; 116 | __asm __volatile( 117 | "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," 118 | " lsl #16\n\t" 119 | "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" 120 | "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" 121 | :[frfi_r]"=&r"(frfi_r), 122 | [tr32]"=&r"(tr32), 123 | [ti32]"=r"(ti32) 124 | :[frfi_even]"r"((int32_t)frfi[2*j]), 125 | [frfi_odd]"r"((int32_t)frfi[2*j +1]), 126 | [wri]"r"(wri), 127 | [cfftrnd]"r"(CFFTRND)); 128 | #else 129 | tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; 130 | 131 | ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; 132 | #endif 133 | 134 | tr32 >>= 15 - CFFTSFT; 135 | ti32 >>= 15 - CFFTSFT; 136 | 137 | qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT; 138 | qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT; 139 | 140 | frfi[2 * j] = (int16_t)( 141 | (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); 142 | frfi[2 * j + 1] = (int16_t)( 143 | (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); 144 | frfi[2 * i] = (int16_t)( 145 | (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); 146 | frfi[2 * i + 1] = (int16_t)( 147 | (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); 148 | } 149 | } 150 | 151 | --k; 152 | l = istep; 153 | } 154 | } 155 | return 0; 156 | } 157 | 158 | int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) 159 | { 160 | size_t i, j, l, istep, n, m; 161 | int k, scale, shift; 162 | int16_t wr, wi; 163 | int32_t tr32, ti32, qr32, qi32; 164 | int32_t tmp32, round2; 165 | 166 | /* The 1024-value is a constant given from the size of kSinTable1024[], 167 | * and should not be changed depending on the input parameter 'stages' 168 | */ 169 | n = 1 << stages; 170 | if (n > 1024) 171 | return -1; 172 | 173 | scale = 0; 174 | 175 | l = 1; 176 | k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change 177 | depending on the input parameter 'stages' */ 178 | 179 | while (l < n) 180 | { 181 | // variable scaling, depending upon data 182 | shift = 0; 183 | round2 = 8192; 184 | 185 | tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); 186 | if (tmp32 > 13573) 187 | { 188 | shift++; 189 | scale++; 190 | round2 <<= 1; 191 | } 192 | if (tmp32 > 27146) 193 | { 194 | shift++; 195 | scale++; 196 | round2 <<= 1; 197 | } 198 | 199 | istep = l << 1; 200 | 201 | if (mode == 0) 202 | { 203 | // mode==0: Low-complexity and Low-accuracy mode 204 | for (m = 0; m < l; ++m) 205 | { 206 | j = m << k; 207 | 208 | /* The 256-value is a constant given as 1/4 of the size of 209 | * kSinTable1024[], and should not be changed depending on the input 210 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 211 | */ 212 | wr = kSinTable1024[j + 256]; 213 | wi = kSinTable1024[j]; 214 | 215 | for (i = m; i < n; i += istep) 216 | { 217 | j = i + l; 218 | 219 | tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; 220 | 221 | ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; 222 | 223 | qr32 = (int32_t)frfi[2 * i]; 224 | qi32 = (int32_t)frfi[2 * i + 1]; 225 | frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); 226 | frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); 227 | frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); 228 | frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); 229 | } 230 | } 231 | } else 232 | { 233 | // mode==1: High-complexity and High-accuracy mode 234 | 235 | for (m = 0; m < l; ++m) 236 | { 237 | j = m << k; 238 | 239 | /* The 256-value is a constant given as 1/4 of the size of 240 | * kSinTable1024[], and should not be changed depending on the input 241 | * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 242 | */ 243 | wr = kSinTable1024[j + 256]; 244 | wi = kSinTable1024[j]; 245 | 246 | #ifdef WEBRTC_ARCH_ARM_V7 247 | int32_t wri = 0; 248 | __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : 249 | "r"((int32_t)wr), "r"((int32_t)wi)); 250 | #endif 251 | 252 | for (i = m; i < n; i += istep) 253 | { 254 | j = i + l; 255 | 256 | #ifdef WEBRTC_ARCH_ARM_V7 257 | register int32_t frfi_r; 258 | __asm __volatile( 259 | "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" 260 | "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" 261 | "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" 262 | :[frfi_r]"=&r"(frfi_r), 263 | [tr32]"=&r"(tr32), 264 | [ti32]"=r"(ti32) 265 | :[frfi_even]"r"((int32_t)frfi[2*j]), 266 | [frfi_odd]"r"((int32_t)frfi[2*j +1]), 267 | [wri]"r"(wri), 268 | [cifftrnd]"r"(CIFFTRND) 269 | ); 270 | #else 271 | 272 | tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; 273 | 274 | ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; 275 | #endif 276 | tr32 >>= 15 - CIFFTSFT; 277 | ti32 >>= 15 - CIFFTSFT; 278 | 279 | qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT; 280 | qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT; 281 | 282 | frfi[2 * j] = (int16_t)( 283 | (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); 284 | frfi[2 * j + 1] = (int16_t)( 285 | (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); 286 | frfi[2 * i] = (int16_t)( 287 | (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); 288 | frfi[2 * i + 1] = (int16_t)( 289 | (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); 290 | } 291 | } 292 | 293 | } 294 | --k; 295 | l = istep; 296 | } 297 | return scale; 298 | } 299 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/complex_fft_tables.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 13 | #define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 14 | 15 | #include // NOLINT(build/include) 16 | 17 | static const int16_t kSinTable1024[] = { 18 | 0, 201, 402, 603, 804, 1005, 1206, 1406, 19 | 1607, 1808, 2009, 2209, 2410, 2610, 2811, 3011, 20 | 3211, 3411, 3611, 3811, 4011, 4210, 4409, 4608, 21 | 4807, 5006, 5205, 5403, 5601, 5799, 5997, 6195, 22 | 6392, 6589, 6786, 6982, 7179, 7375, 7571, 7766, 23 | 7961, 8156, 8351, 8545, 8739, 8932, 9126, 9319, 24 | 9511, 9703, 9895, 10087, 10278, 10469, 10659, 10849, 25 | 11038, 11227, 11416, 11604, 11792, 11980, 12166, 12353, 26 | 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827, 27 | 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, 28 | 15446, 15623, 15799, 15975, 16150, 16325, 16499, 16672, 29 | 16845, 17017, 17189, 17360, 17530, 17699, 17868, 18036, 30 | 18204, 18371, 18537, 18702, 18867, 19031, 19194, 19357, 31 | 19519, 19680, 19840, 20000, 20159, 20317, 20474, 20631, 32 | 20787, 20942, 21096, 21249, 21402, 21554, 21705, 21855, 33 | 22004, 22153, 22301, 22448, 22594, 22739, 22883, 23027, 34 | 23169, 23311, 23452, 23592, 23731, 23869, 24006, 24143, 35 | 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201, 36 | 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, 37 | 26318, 26437, 26556, 26673, 26789, 26905, 27019, 27132, 38 | 27244, 27355, 27466, 27575, 27683, 27790, 27896, 28001, 39 | 28105, 28208, 28309, 28410, 28510, 28608, 28706, 28802, 40 | 28897, 28992, 29085, 29177, 29268, 29358, 29446, 29534, 41 | 29621, 29706, 29790, 29873, 29955, 30036, 30116, 30195, 42 | 30272, 30349, 30424, 30498, 30571, 30643, 30713, 30783, 43 | 30851, 30918, 30984, 31049, 31113, 31175, 31236, 31297, 44 | 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735, 45 | 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, 46 | 32137, 32176, 32213, 32249, 32284, 32318, 32350, 32382, 47 | 32412, 32441, 32468, 32495, 32520, 32544, 32567, 32588, 48 | 32609, 32628, 32646, 32662, 32678, 32692, 32705, 32717, 49 | 32727, 32736, 32744, 32751, 32757, 32761, 32764, 32766, 50 | 32767, 32766, 32764, 32761, 32757, 32751, 32744, 32736, 51 | 32727, 32717, 32705, 32692, 32678, 32662, 32646, 32628, 52 | 32609, 32588, 32567, 32544, 32520, 32495, 32468, 32441, 53 | 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176, 54 | 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, 55 | 31785, 31735, 31684, 31633, 31580, 31525, 31470, 31413, 56 | 31356, 31297, 31236, 31175, 31113, 31049, 30984, 30918, 57 | 30851, 30783, 30713, 30643, 30571, 30498, 30424, 30349, 58 | 30272, 30195, 30116, 30036, 29955, 29873, 29790, 29706, 59 | 29621, 29534, 29446, 29358, 29268, 29177, 29085, 28992, 60 | 28897, 28802, 28706, 28608, 28510, 28410, 28309, 28208, 61 | 28105, 28001, 27896, 27790, 27683, 27575, 27466, 27355, 62 | 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437, 63 | 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, 64 | 25329, 25201, 25072, 24942, 24811, 24679, 24546, 24413, 65 | 24278, 24143, 24006, 23869, 23731, 23592, 23452, 23311, 66 | 23169, 23027, 22883, 22739, 22594, 22448, 22301, 22153, 67 | 22004, 21855, 21705, 21554, 21402, 21249, 21096, 20942, 68 | 20787, 20631, 20474, 20317, 20159, 20000, 19840, 19680, 69 | 19519, 19357, 19194, 19031, 18867, 18702, 18537, 18371, 70 | 18204, 18036, 17868, 17699, 17530, 17360, 17189, 17017, 71 | 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623, 72 | 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, 73 | 14009, 13827, 13645, 13462, 13278, 13094, 12909, 12724, 74 | 12539, 12353, 12166, 11980, 11792, 11604, 11416, 11227, 75 | 11038, 10849, 10659, 10469, 10278, 10087, 9895, 9703, 76 | 9511, 9319, 9126, 8932, 8739, 8545, 8351, 8156, 77 | 7961, 7766, 7571, 7375, 7179, 6982, 6786, 6589, 78 | 6392, 6195, 5997, 5799, 5601, 5403, 5205, 5006, 79 | 4807, 4608, 4409, 4210, 4011, 3811, 3611, 3411, 80 | 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808, 81 | 1607, 1406, 1206, 1005, 804, 603, 402, 201, 82 | 0, -201, -402, -603, -804, -1005, -1206, -1406, 83 | -1607, -1808, -2009, -2209, -2410, -2610, -2811, -3011, 84 | -3211, -3411, -3611, -3811, -4011, -4210, -4409, -4608, 85 | -4807, -5006, -5205, -5403, -5601, -5799, -5997, -6195, 86 | -6392, -6589, -6786, -6982, -7179, -7375, -7571, -7766, 87 | -7961, -8156, -8351, -8545, -8739, -8932, -9126, -9319, 88 | -9511, -9703, -9895, -10087, -10278, -10469, -10659, -10849, 89 | -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353, 90 | -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, 91 | -14009, -14191, -14372, -14552, -14732, -14911, -15090, -15268, 92 | -15446, -15623, -15799, -15975, -16150, -16325, -16499, -16672, 93 | -16845, -17017, -17189, -17360, -17530, -17699, -17868, -18036, 94 | -18204, -18371, -18537, -18702, -18867, -19031, -19194, -19357, 95 | -19519, -19680, -19840, -20000, -20159, -20317, -20474, -20631, 96 | -20787, -20942, -21096, -21249, -21402, -21554, -21705, -21855, 97 | -22004, -22153, -22301, -22448, -22594, -22739, -22883, -23027, 98 | -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143, 99 | -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, 100 | -25329, -25456, -25582, -25707, -25831, -25954, -26077, -26198, 101 | -26318, -26437, -26556, -26673, -26789, -26905, -27019, -27132, 102 | -27244, -27355, -27466, -27575, -27683, -27790, -27896, -28001, 103 | -28105, -28208, -28309, -28410, -28510, -28608, -28706, -28802, 104 | -28897, -28992, -29085, -29177, -29268, -29358, -29446, -29534, 105 | -29621, -29706, -29790, -29873, -29955, -30036, -30116, -30195, 106 | -30272, -30349, -30424, -30498, -30571, -30643, -30713, -30783, 107 | -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297, 108 | -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, 109 | -31785, -31833, -31880, -31926, -31970, -32014, -32056, -32097, 110 | -32137, -32176, -32213, -32249, -32284, -32318, -32350, -32382, 111 | -32412, -32441, -32468, -32495, -32520, -32544, -32567, -32588, 112 | -32609, -32628, -32646, -32662, -32678, -32692, -32705, -32717, 113 | -32727, -32736, -32744, -32751, -32757, -32761, -32764, -32766, 114 | -32767, -32766, -32764, -32761, -32757, -32751, -32744, -32736, 115 | -32727, -32717, -32705, -32692, -32678, -32662, -32646, -32628, 116 | -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441, 117 | -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, 118 | -32137, -32097, -32056, -32014, -31970, -31926, -31880, -31833, 119 | -31785, -31735, -31684, -31633, -31580, -31525, -31470, -31413, 120 | -31356, -31297, -31236, -31175, -31113, -31049, -30984, -30918, 121 | -30851, -30783, -30713, -30643, -30571, -30498, -30424, -30349, 122 | -30272, -30195, -30116, -30036, -29955, -29873, -29790, -29706, 123 | -29621, -29534, -29446, -29358, -29268, -29177, -29085, -28992, 124 | -28897, -28802, -28706, -28608, -28510, -28410, -28309, -28208, 125 | -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355, 126 | -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, 127 | -26318, -26198, -26077, -25954, -25831, -25707, -25582, -25456, 128 | -25329, -25201, -25072, -24942, -24811, -24679, -24546, -24413, 129 | -24278, -24143, -24006, -23869, -23731, -23592, -23452, -23311, 130 | -23169, -23027, -22883, -22739, -22594, -22448, -22301, -22153, 131 | -22004, -21855, -21705, -21554, -21402, -21249, -21096, -20942, 132 | -20787, -20631, -20474, -20317, -20159, -20000, -19840, -19680, 133 | -19519, -19357, -19194, -19031, -18867, -18702, -18537, -18371, 134 | -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017, 135 | -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, 136 | -15446, -15268, -15090, -14911, -14732, -14552, -14372, -14191, 137 | -14009, -13827, -13645, -13462, -13278, -13094, -12909, -12724, 138 | -12539, -12353, -12166, -11980, -11792, -11604, -11416, -11227, 139 | -11038, -10849, -10659, -10469, -10278, -10087, -9895, -9703, 140 | -9511, -9319, -9126, -8932, -8739, -8545, -8351, -8156, 141 | -7961, -7766, -7571, -7375, -7179, -6982, -6786, -6589, 142 | -6392, -6195, -5997, -5799, -5601, -5403, -5205, -5006, 143 | -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411, 144 | -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, 145 | -1607, -1406, -1206, -1005, -804, -603, -402, -201 146 | }; 147 | 148 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ 149 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/copy_set_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the implementation of functions 14 | * WebRtcSpl_MemSetW16() 15 | * WebRtcSpl_MemSetW32() 16 | * WebRtcSpl_MemCpyReversedOrder() 17 | * WebRtcSpl_CopyFromEndW16() 18 | * WebRtcSpl_ZerosArrayW16() 19 | * WebRtcSpl_ZerosArrayW32() 20 | * 21 | * The description header can be found in signal_processing_library.h 22 | * 23 | */ 24 | 25 | #include 26 | #include "signal_processing_library.h" 27 | 28 | 29 | void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length) 30 | { 31 | size_t j; 32 | int16_t *arrptr = ptr; 33 | 34 | for (j = length; j > 0; j--) 35 | { 36 | *arrptr++ = set_value; 37 | } 38 | } 39 | 40 | void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length) 41 | { 42 | size_t j; 43 | int32_t *arrptr = ptr; 44 | 45 | for (j = length; j > 0; j--) 46 | { 47 | *arrptr++ = set_value; 48 | } 49 | } 50 | 51 | void WebRtcSpl_MemCpyReversedOrder(int16_t* dest, 52 | int16_t* source, 53 | size_t length) 54 | { 55 | size_t j; 56 | int16_t* destPtr = dest; 57 | int16_t* sourcePtr = source; 58 | 59 | for (j = 0; j < length; j++) 60 | { 61 | *destPtr-- = *sourcePtr++; 62 | } 63 | } 64 | 65 | void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in, 66 | size_t length, 67 | size_t samples, 68 | int16_t *vector_out) 69 | { 70 | // Copy the last of the input vector to vector_out 71 | WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples); 72 | } 73 | 74 | void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length) 75 | { 76 | WebRtcSpl_MemSetW16(vector, 0, length); 77 | } 78 | 79 | void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length) 80 | { 81 | WebRtcSpl_MemSetW32(vector, 0, length); 82 | } 83 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/fft4g.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_FFT4G_H_ 12 | #define COMMON_AUDIO_FFT4G_H_ 13 | 14 | #if defined(__cplusplus) 15 | extern "C" { 16 | #endif 17 | 18 | // Refer to fft4g.c for documentation. 19 | void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w); 20 | 21 | #if defined(__cplusplus) 22 | } 23 | #endif 24 | 25 | #endif // COMMON_AUDIO_FFT4G_H_ 26 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/real_fft.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "real_fft.h" 12 | #include "signal_processing_library.h" 13 | #include 14 | 15 | 16 | struct RealFFT { 17 | int order; 18 | }; 19 | 20 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order) { 21 | struct RealFFT* self = NULL; 22 | 23 | if (order > kMaxFFTOrder || order < 0) { 24 | return NULL; 25 | } 26 | 27 | self = malloc(sizeof(struct RealFFT)); 28 | if (self == NULL) { 29 | return NULL; 30 | } 31 | self->order = order; 32 | 33 | return self; 34 | } 35 | 36 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self) { 37 | if (self != NULL) { 38 | free(self); 39 | } 40 | } 41 | 42 | // The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and 43 | // WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued 44 | // FFT implementation in SPL. 45 | 46 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self, 47 | const int16_t* real_data_in, 48 | int16_t* complex_data_out) { 49 | int i = 0; 50 | int j = 0; 51 | int result = 0; 52 | int n = 1 << self->order; 53 | // The complex-value FFT implementation needs a buffer to hold 2^order 54 | // 16-bit COMPLEX numbers, for both time and frequency data. 55 | int16_t complex_buffer[2 << kMaxFFTOrder]; 56 | 57 | // Insert zeros to the imaginary parts for complex forward FFT input. 58 | for (i = 0, j = 0; i < n; i += 1, j += 2) { 59 | complex_buffer[j] = real_data_in[i]; 60 | complex_buffer[j + 1] = 0; 61 | }; 62 | 63 | WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); 64 | result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1); 65 | 66 | // For real FFT output, use only the first N + 2 elements from 67 | // complex forward FFT. 68 | memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2)); 69 | 70 | return result; 71 | } 72 | 73 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self, 74 | const int16_t* complex_data_in, 75 | int16_t* real_data_out) { 76 | int i = 0; 77 | int j = 0; 78 | int result = 0; 79 | int n = 1 << self->order; 80 | // Create the buffer specific to complex-valued FFT implementation. 81 | int16_t complex_buffer[2 << kMaxFFTOrder]; 82 | 83 | // For n-point FFT, first copy the first n + 2 elements into complex 84 | // FFT, then construct the remaining n - 2 elements by real FFT's 85 | // conjugate-symmetric properties. 86 | memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2)); 87 | for (i = n + 2; i < 2 * n; i += 2) { 88 | complex_buffer[i] = complex_data_in[2 * n - i]; 89 | complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1]; 90 | } 91 | 92 | WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); 93 | result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1); 94 | 95 | // Strip out the imaginary parts of the complex inverse FFT output. 96 | for (i = 0, j = 0; i < n; i += 1, j += 2) { 97 | real_data_out[i] = complex_buffer[j]; 98 | } 99 | 100 | return result; 101 | } 102 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/real_fft.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 13 | 14 | #include 15 | // For ComplexFFT(), the maximum fft order is 10; 16 | // for OpenMax FFT in ARM, it is 12; 17 | // WebRTC APM uses orders of only 7 and 8. 18 | enum {kMaxFFTOrder = 10}; 19 | 20 | struct RealFFT; 21 | 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order); 27 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self); 28 | 29 | // Compute an FFT for a real-valued signal of length of 2^order, 30 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the 31 | // specification structure, which must be initialized prior to calling the FFT 32 | // function with WebRtcSpl_CreateRealFFT(). 33 | // The relationship between the input and output sequences can 34 | // be expressed in terms of the DFT, i.e.: 35 | // x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) 36 | // n=0,1,2,...N-1 37 | // N=2^order. 38 | // The conjugate-symmetric output sequence is represented using a CCS vector, 39 | // which is of length N+2, and is organized as follows: 40 | // Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 41 | // Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 42 | // where R[n] and I[n], respectively, denote the real and imaginary components 43 | // for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. 44 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to 45 | // the foldover frequency. 46 | // 47 | // Input Arguments: 48 | // self - pointer to preallocated and initialized FFT specification structure. 49 | // real_data_in - the input signal. For an ARM Neon platform, it must be 50 | // aligned on a 32-byte boundary. 51 | // 52 | // Output Arguments: 53 | // complex_data_out - the output complex signal with (2^order + 2) 16-bit 54 | // elements. For an ARM Neon platform, it must be different 55 | // from real_data_in, and aligned on a 32-byte boundary. 56 | // 57 | // Return Value: 58 | // 0 - FFT calculation is successful. 59 | // -1 - Error with bad arguments (null pointers). 60 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self, 61 | const int16_t* real_data_in, 62 | int16_t* complex_data_out); 63 | 64 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of 65 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by 66 | // the specification structure, which must be initialized prior to calling the 67 | // FFT function with WebRtcSpl_CreateRealFFT(). 68 | // For a transform of length M, the input sequence is represented using a packed 69 | // CCS vector of length M+2, which is explained in the comments for 70 | // WebRtcSpl_RealForwardFFTC above. 71 | // 72 | // Input Arguments: 73 | // self - pointer to preallocated and initialized FFT specification structure. 74 | // complex_data_in - the input complex signal with (2^order + 2) 16-bit 75 | // elements. For an ARM Neon platform, it must be aligned on 76 | // a 32-byte boundary. 77 | // 78 | // Output Arguments: 79 | // real_data_out - the output real signal. For an ARM Neon platform, it must 80 | // be different to complex_data_in, and aligned on a 32-byte 81 | // boundary. 82 | // 83 | // Return Value: 84 | // 0 or a positive number - a value that the elements in the |real_data_out| 85 | // should be shifted left with in order to get 86 | // correct physical values. 87 | // -1 - Error with bad arguments (null pointers). 88 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self, 89 | const int16_t* complex_data_in, 90 | int16_t* real_data_out); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | 96 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ 97 | -------------------------------------------------------------------------------- /webrtc_vad/ns_math/spl_sqrt_floor.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Written by Wilco Dijkstra, 1996. The following email exchange establishes the 3 | * license. 4 | * 5 | * From: Wilco Dijkstra 6 | * Date: Fri, Jun 24, 2011 at 3:20 AM 7 | * Subject: Re: sqrt routine 8 | * To: Kevin Ma 9 | * Hi Kevin, 10 | * Thanks for asking. Those routines are public domain (originally posted to 11 | * comp.sys.arm a long time ago), so you can use them freely for any purpose. 12 | * Cheers, 13 | * Wilco 14 | * 15 | * ----- Original Message ----- 16 | * From: "Kevin Ma" 17 | * To: 18 | * Sent: Thursday, June 23, 2011 11:44 PM 19 | * Subject: Fwd: sqrt routine 20 | * Hi Wilco, 21 | * I saw your sqrt routine from several web sites, including 22 | * http://www.finesse.demon.co.uk/steven/sqrt.html. 23 | * Just wonder if there's any copyright information with your Successive 24 | * approximation routines, or if I can freely use it for any purpose. 25 | * Thanks. 26 | * Kevin 27 | */ 28 | 29 | // Minor modifications in code style for WebRTC, 2012. 30 | 31 | #include "signal_processing_library.h" 32 | 33 | /* 34 | * Algorithm: 35 | * Successive approximation of the equation (root + delta) ^ 2 = N 36 | * until delta < 1. If delta < 1 we have the integer part of SQRT (N). 37 | * Use delta = 2^i for i = 15 .. 0. 38 | * 39 | * Output precision is 16 bits. Note for large input values (close to 40 | * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) 41 | * contains the MSB information (a non-sign value). Do with caution 42 | * if you need to cast the output to int16_t type. 43 | * 44 | * If the input value is negative, it returns 0. 45 | */ 46 | 47 | #define WEBRTC_SPL_SQRT_ITER(N) \ 48 | try1 = root + (1 << (N)); \ 49 | if (value >= try1 << (N)) \ 50 | { \ 51 | value -= try1 << (N); \ 52 | root |= 2 << (N); \ 53 | } 54 | 55 | int32_t WebRtcSpl_SqrtFloor(int32_t value) 56 | { 57 | int32_t root = 0, try1; 58 | 59 | WEBRTC_SPL_SQRT_ITER (15); 60 | WEBRTC_SPL_SQRT_ITER (14); 61 | WEBRTC_SPL_SQRT_ITER (13); 62 | WEBRTC_SPL_SQRT_ITER (12); 63 | WEBRTC_SPL_SQRT_ITER (11); 64 | WEBRTC_SPL_SQRT_ITER (10); 65 | WEBRTC_SPL_SQRT_ITER ( 9); 66 | WEBRTC_SPL_SQRT_ITER ( 8); 67 | WEBRTC_SPL_SQRT_ITER ( 7); 68 | WEBRTC_SPL_SQRT_ITER ( 6); 69 | WEBRTC_SPL_SQRT_ITER ( 5); 70 | WEBRTC_SPL_SQRT_ITER ( 4); 71 | WEBRTC_SPL_SQRT_ITER ( 3); 72 | WEBRTC_SPL_SQRT_ITER ( 2); 73 | WEBRTC_SPL_SQRT_ITER ( 1); 74 | WEBRTC_SPL_SQRT_ITER ( 0); 75 | 76 | return root >> 1; 77 | } 78 | -------------------------------------------------------------------------------- /webrtc_vad/spl/cross_correlation.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "signal_processing_library.h" 12 | 13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ 14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, 15 | const int16_t* seq1, 16 | const int16_t* seq2, 17 | size_t dim_seq, 18 | size_t dim_cross_correlation, 19 | int right_shifts, 20 | int step_seq2) { 21 | size_t i = 0, j = 0; 22 | 23 | for (i = 0; i < dim_cross_correlation; i++) { 24 | int32_t corr = 0; 25 | for (j = 0; j < dim_seq; j++) 26 | corr += (seq1[j] * seq2[j]) >> right_shifts; 27 | seq2 += step_seq2; 28 | *cross_correlation++ = corr; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /webrtc_vad/spl/division_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the divisions 14 | * WebRtcSpl_DivU32U16() 15 | * WebRtcSpl_DivW32W16() 16 | * WebRtcSpl_DivW32W16ResW16() 17 | * WebRtcSpl_DivResultInQ31() 18 | * WebRtcSpl_DivW32HiLow() 19 | * 20 | * The description header can be found in signal_processing_library.h 21 | * 22 | */ 23 | 24 | #include "signal_processing_library.h" 25 | 26 | 27 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) 28 | { 29 | // Guard against division with 0 30 | if (den != 0) 31 | { 32 | return (uint32_t)(num / den); 33 | } else 34 | { 35 | return (uint32_t)0xFFFFFFFF; 36 | } 37 | } 38 | 39 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) 40 | { 41 | // Guard against division with 0 42 | if (den != 0) 43 | { 44 | return (int32_t)(num / den); 45 | } else 46 | { 47 | return (int32_t)0x7FFFFFFF; 48 | } 49 | } 50 | 51 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) 52 | { 53 | // Guard against division with 0 54 | if (den != 0) 55 | { 56 | return (int16_t)(num / den); 57 | } else 58 | { 59 | return (int16_t)0x7FFF; 60 | } 61 | } 62 | 63 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) 64 | { 65 | int32_t L_num = num; 66 | int32_t L_den = den; 67 | int32_t div = 0; 68 | int k = 31; 69 | int change_sign = 0; 70 | 71 | if (num == 0) 72 | return 0; 73 | 74 | if (num < 0) 75 | { 76 | change_sign++; 77 | L_num = -num; 78 | } 79 | if (den < 0) 80 | { 81 | change_sign++; 82 | L_den = -den; 83 | } 84 | while (k--) 85 | { 86 | div <<= 1; 87 | L_num <<= 1; 88 | if (L_num >= L_den) 89 | { 90 | L_num -= L_den; 91 | div++; 92 | } 93 | } 94 | if (change_sign == 1) 95 | { 96 | div = -div; 97 | } 98 | return div; 99 | } 100 | 101 | int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) 102 | { 103 | int16_t approx, tmp_hi, tmp_low, num_hi, num_low; 104 | int32_t tmpW32; 105 | 106 | approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); 107 | // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) 108 | 109 | // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) 110 | tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); 111 | // tmpW32 = den * approx 112 | 113 | tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx)) 114 | // UBSan: 2147483647 - -2 cannot be represented in type 'int' 115 | 116 | // Store tmpW32 in hi and low format 117 | tmp_hi = (int16_t)(tmpW32 >> 16); 118 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 119 | 120 | // tmpW32 = 1/den in Q29 121 | tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; 122 | 123 | // 1/den in hi and low format 124 | tmp_hi = (int16_t)(tmpW32 >> 16); 125 | tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); 126 | 127 | // Store num in hi and low format 128 | num_hi = (int16_t)(num >> 16); 129 | num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); 130 | 131 | // num * (1/den) by 32 bit multiplication (result in Q28) 132 | 133 | tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + 134 | (num_low * tmp_hi >> 15); 135 | 136 | // Put result in Q31 (convert from Q28) 137 | tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); 138 | 139 | return tmpW32; 140 | } 141 | -------------------------------------------------------------------------------- /webrtc_vad/spl/downsample_fast.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "signal_processing_library.h" 12 | 13 | 14 | // TODO(Bjornv): Change the function parameter order to WebRTC code style. 15 | // C version of WebRtcSpl_DownsampleFast() for generic platforms. 16 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in, 17 | size_t data_in_length, 18 | int16_t* data_out, 19 | size_t data_out_length, 20 | const int16_t* __restrict coefficients, 21 | size_t coefficients_length, 22 | int factor, 23 | size_t delay) { 24 | // int16_t* const original_data_out = data_out; 25 | size_t i = 0; 26 | size_t j = 0; 27 | int32_t out_s32 = 0; 28 | size_t endpos = delay + factor * (data_out_length - 1) + 1; 29 | 30 | // Return error if any of the running conditions doesn't meet. 31 | if (data_out_length == 0 || coefficients_length == 0 32 | || data_in_length < endpos) { 33 | return -1; 34 | } 35 | 36 | for (i = delay; i < endpos; i += factor) { 37 | out_s32 = 2048; // Round value, 0.5 in Q12. 38 | 39 | for (j = 0; j < coefficients_length; j++) { 40 | out_s32 += coefficients[j] * data_in[i - j]; // Q12. 41 | } 42 | 43 | out_s32 >>= 12; // Q0. 44 | 45 | // Saturate and store the output. 46 | *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); 47 | } 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /webrtc_vad/spl/energy.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_Energy(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "signal_processing_library.h" 19 | 20 | int32_t WebRtcSpl_Energy(int16_t* vector, 21 | size_t vector_length, 22 | int* scale_factor) 23 | { 24 | int32_t en = 0; 25 | size_t i; 26 | int scaling = 27 | WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); 28 | size_t looptimes = vector_length; 29 | int16_t *vectorptr = vector; 30 | 31 | for (i = 0; i < looptimes; i++) 32 | { 33 | en += (*vectorptr * *vectorptr) >> scaling; 34 | vectorptr++; 35 | } 36 | *scale_factor = scaling; 37 | 38 | return en; 39 | } 40 | -------------------------------------------------------------------------------- /webrtc_vad/spl/get_scaling_square.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the function WebRtcSpl_GetScalingSquare(). 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "signal_processing_library.h" 19 | 20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, 21 | size_t in_vector_length, 22 | size_t times) 23 | { 24 | int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); 25 | size_t i; 26 | int16_t smax = -1; 27 | int16_t sabs; 28 | int16_t *sptr = in_vector; 29 | int16_t t; 30 | size_t looptimes = in_vector_length; 31 | 32 | for (i = looptimes; i > 0; i--) 33 | { 34 | sabs = (*sptr > 0 ? *sptr++ : -*sptr++); 35 | smax = (sabs > smax ? sabs : smax); 36 | } 37 | t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); 38 | 39 | if (smax == 0) 40 | { 41 | return 0; // Since norm(0) returns 0 42 | } else 43 | { 44 | return (t > nbits) ? 0 : nbits - t; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /webrtc_vad/spl/min_max_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file contains the implementation of functions 13 | * WebRtcSpl_MaxAbsValueW16C() 14 | * WebRtcSpl_MaxAbsValueW32C() 15 | * WebRtcSpl_MaxValueW16C() 16 | * WebRtcSpl_MaxValueW32C() 17 | * WebRtcSpl_MinValueW16C() 18 | * WebRtcSpl_MinValueW32C() 19 | * WebRtcSpl_MaxAbsIndexW16() 20 | * WebRtcSpl_MaxIndexW16() 21 | * WebRtcSpl_MaxIndexW32() 22 | * WebRtcSpl_MinIndexW16() 23 | * WebRtcSpl_MinIndexW32() 24 | * 25 | */ 26 | 27 | #include 28 | 29 | #include "signal_processing_library.h" 30 | 31 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine 32 | // WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) 33 | // TODO(kma): Move the next six functions into min_max_operations_c.c. 34 | 35 | // Maximum absolute value of word16 vector. C version for generic platforms. 36 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { 37 | size_t i = 0; 38 | int absolute = 0, maximum = 0; 39 | 40 | 41 | for (i = 0; i < length; i++) { 42 | absolute = abs((int)vector[i]); 43 | 44 | if (absolute > maximum) { 45 | maximum = absolute; 46 | } 47 | } 48 | 49 | // Guard the case for abs(-32768). 50 | if (maximum > WEBRTC_SPL_WORD16_MAX) { 51 | maximum = WEBRTC_SPL_WORD16_MAX; 52 | } 53 | 54 | return (int16_t)maximum; 55 | } 56 | 57 | // Maximum absolute value of word32 vector. C version for generic platforms. 58 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { 59 | // Use uint32_t for the local variables, to accommodate the return value 60 | // of abs(0x80000000), which is 0x80000000. 61 | 62 | uint32_t absolute = 0, maximum = 0; 63 | size_t i = 0; 64 | 65 | for (i = 0; i < length; i++) { 66 | absolute = abs((int)vector[i]); 67 | if (absolute > maximum) { 68 | maximum = absolute; 69 | } 70 | } 71 | 72 | maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); 73 | 74 | return (int32_t)maximum; 75 | } 76 | 77 | // Maximum value of word16 vector. C version for generic platforms. 78 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { 79 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 80 | size_t i = 0; 81 | 82 | for (i = 0; i < length; i++) { 83 | if (vector[i] > maximum) 84 | maximum = vector[i]; 85 | } 86 | return maximum; 87 | } 88 | 89 | // Maximum value of word32 vector. C version for generic platforms. 90 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { 91 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 92 | size_t i = 0; 93 | 94 | for (i = 0; i < length; i++) { 95 | if (vector[i] > maximum) 96 | maximum = vector[i]; 97 | } 98 | return maximum; 99 | } 100 | 101 | // Minimum value of word16 vector. C version for generic platforms. 102 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { 103 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 104 | size_t i = 0; 105 | 106 | for (i = 0; i < length; i++) { 107 | if (vector[i] < minimum) 108 | minimum = vector[i]; 109 | } 110 | return minimum; 111 | } 112 | 113 | // Minimum value of word32 vector. C version for generic platforms. 114 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { 115 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 116 | size_t i = 0; 117 | 118 | for (i = 0; i < length; i++) { 119 | if (vector[i] < minimum) 120 | minimum = vector[i]; 121 | } 122 | return minimum; 123 | } 124 | 125 | // Index of maximum absolute value in a word16 vector. 126 | size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { 127 | // Use type int for local variables, to accomodate the value of abs(-32768). 128 | 129 | size_t i = 0, index = 0; 130 | int absolute = 0, maximum = 0; 131 | 132 | for (i = 0; i < length; i++) { 133 | absolute = abs((int)vector[i]); 134 | 135 | if (absolute > maximum) { 136 | maximum = absolute; 137 | index = i; 138 | } 139 | } 140 | 141 | return index; 142 | } 143 | 144 | // Index of maximum value in a word16 vector. 145 | size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { 146 | size_t i = 0, index = 0; 147 | int16_t maximum = WEBRTC_SPL_WORD16_MIN; 148 | 149 | for (i = 0; i < length; i++) { 150 | if (vector[i] > maximum) { 151 | maximum = vector[i]; 152 | index = i; 153 | } 154 | } 155 | 156 | return index; 157 | } 158 | 159 | // Index of maximum value in a word32 vector. 160 | size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { 161 | size_t i = 0, index = 0; 162 | int32_t maximum = WEBRTC_SPL_WORD32_MIN; 163 | 164 | for (i = 0; i < length; i++) { 165 | if (vector[i] > maximum) { 166 | maximum = vector[i]; 167 | index = i; 168 | } 169 | } 170 | 171 | return index; 172 | } 173 | 174 | // Index of minimum value in a word16 vector. 175 | size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { 176 | size_t i = 0, index = 0; 177 | int16_t minimum = WEBRTC_SPL_WORD16_MAX; 178 | 179 | for (i = 0; i < length; i++) { 180 | if (vector[i] < minimum) { 181 | minimum = vector[i]; 182 | index = i; 183 | } 184 | } 185 | 186 | return index; 187 | } 188 | 189 | // Index of minimum value in a word32 vector. 190 | size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { 191 | size_t i = 0, index = 0; 192 | int32_t minimum = WEBRTC_SPL_WORD32_MAX; 193 | 194 | for (i = 0; i < length; i++) { 195 | if (vector[i] < minimum) { 196 | minimum = vector[i]; 197 | index = i; 198 | } 199 | } 200 | 201 | return index; 202 | } 203 | -------------------------------------------------------------------------------- /webrtc_vad/spl/resample.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the resampling functions for 22 kHz. 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "signal_processing_library.h" 19 | #include "resample_by_2_internal.h" 20 | 21 | // Declaration of internally used functions 22 | static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out, 23 | int32_t K); 24 | 25 | void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out, 26 | int32_t K); 27 | 28 | // interpolation coefficients 29 | static const int16_t kCoefficients32To22[5][9] = { 30 | {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154}, 31 | {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38}, 32 | {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137}, 33 | {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71}, 34 | { 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110} 35 | }; 36 | 37 | ////////////////////// 38 | // 22 kHz -> 16 kHz // 39 | ////////////////////// 40 | 41 | // number of subblocks; options: 1, 2, 4, 5, 10 42 | #define SUB_BLOCKS_22_16 5 43 | 44 | // 22 -> 16 resampler 45 | void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out, 46 | WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem) 47 | { 48 | int k; 49 | 50 | // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size) 51 | for (k = 0; k < SUB_BLOCKS_22_16; k++) 52 | { 53 | ///// 22 --> 44 ///// 54 | // int16_t in[220/SUB_BLOCKS_22_16] 55 | // int32_t out[440/SUB_BLOCKS_22_16] 56 | ///// 57 | WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44); 58 | 59 | ///// 44 --> 32 ///// 60 | // int32_t in[440/SUB_BLOCKS_22_16] 61 | // int32_t out[320/SUB_BLOCKS_22_16] 62 | ///// 63 | // copy state to and from input array 64 | tmpmem[8] = state->S_44_32[0]; 65 | tmpmem[9] = state->S_44_32[1]; 66 | tmpmem[10] = state->S_44_32[2]; 67 | tmpmem[11] = state->S_44_32[3]; 68 | tmpmem[12] = state->S_44_32[4]; 69 | tmpmem[13] = state->S_44_32[5]; 70 | tmpmem[14] = state->S_44_32[6]; 71 | tmpmem[15] = state->S_44_32[7]; 72 | state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8]; 73 | state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9]; 74 | state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10]; 75 | state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11]; 76 | state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12]; 77 | state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13]; 78 | state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14]; 79 | state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15]; 80 | 81 | WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16); 82 | 83 | ///// 32 --> 16 ///// 84 | // int32_t in[320/SUB_BLOCKS_22_16] 85 | // int32_t out[160/SUB_BLOCKS_22_16] 86 | ///// 87 | WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16); 88 | 89 | // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead 90 | in += 220 / SUB_BLOCKS_22_16; 91 | out += 160 / SUB_BLOCKS_22_16; 92 | } 93 | } 94 | 95 | // initialize state of 22 -> 16 resampler 96 | void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) 97 | { 98 | int k; 99 | for (k = 0; k < 8; k++) 100 | { 101 | state->S_22_44[k] = 0; 102 | state->S_44_32[k] = 0; 103 | state->S_32_16[k] = 0; 104 | } 105 | } 106 | 107 | ////////////////////// 108 | // 16 kHz -> 22 kHz // 109 | ////////////////////// 110 | 111 | // number of subblocks; options: 1, 2, 4, 5, 10 112 | #define SUB_BLOCKS_16_22 4 113 | 114 | // 16 -> 22 resampler 115 | void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out, 116 | WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem) 117 | { 118 | int k; 119 | 120 | // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size) 121 | for (k = 0; k < SUB_BLOCKS_16_22; k++) 122 | { 123 | ///// 16 --> 32 ///// 124 | // int16_t in[160/SUB_BLOCKS_16_22] 125 | // int32_t out[320/SUB_BLOCKS_16_22] 126 | ///// 127 | WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32); 128 | 129 | ///// 32 --> 22 ///// 130 | // int32_t in[320/SUB_BLOCKS_16_22] 131 | // int32_t out[220/SUB_BLOCKS_16_22] 132 | ///// 133 | // copy state to and from input array 134 | tmpmem[0] = state->S_32_22[0]; 135 | tmpmem[1] = state->S_32_22[1]; 136 | tmpmem[2] = state->S_32_22[2]; 137 | tmpmem[3] = state->S_32_22[3]; 138 | tmpmem[4] = state->S_32_22[4]; 139 | tmpmem[5] = state->S_32_22[5]; 140 | tmpmem[6] = state->S_32_22[6]; 141 | tmpmem[7] = state->S_32_22[7]; 142 | state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22]; 143 | state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1]; 144 | state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2]; 145 | state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3]; 146 | state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4]; 147 | state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5]; 148 | state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6]; 149 | state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7]; 150 | 151 | WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22); 152 | 153 | // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead 154 | in += 160 / SUB_BLOCKS_16_22; 155 | out += 220 / SUB_BLOCKS_16_22; 156 | } 157 | } 158 | 159 | // initialize state of 16 -> 22 resampler 160 | void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) 161 | { 162 | int k; 163 | for (k = 0; k < 8; k++) 164 | { 165 | state->S_16_32[k] = 0; 166 | state->S_32_22[k] = 0; 167 | } 168 | } 169 | 170 | ////////////////////// 171 | // 22 kHz -> 8 kHz // 172 | ////////////////////// 173 | 174 | // number of subblocks; options: 1, 2, 5, 10 175 | #define SUB_BLOCKS_22_8 2 176 | 177 | // 22 -> 8 resampler 178 | void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out, 179 | WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem) 180 | { 181 | int k; 182 | 183 | // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size) 184 | for (k = 0; k < SUB_BLOCKS_22_8; k++) 185 | { 186 | ///// 22 --> 22 lowpass ///// 187 | // int16_t in[220/SUB_BLOCKS_22_8] 188 | // int32_t out[220/SUB_BLOCKS_22_8] 189 | ///// 190 | WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22); 191 | 192 | ///// 22 --> 16 ///// 193 | // int32_t in[220/SUB_BLOCKS_22_8] 194 | // int32_t out[160/SUB_BLOCKS_22_8] 195 | ///// 196 | // copy state to and from input array 197 | tmpmem[8] = state->S_22_16[0]; 198 | tmpmem[9] = state->S_22_16[1]; 199 | tmpmem[10] = state->S_22_16[2]; 200 | tmpmem[11] = state->S_22_16[3]; 201 | tmpmem[12] = state->S_22_16[4]; 202 | tmpmem[13] = state->S_22_16[5]; 203 | tmpmem[14] = state->S_22_16[6]; 204 | tmpmem[15] = state->S_22_16[7]; 205 | state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8]; 206 | state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9]; 207 | state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10]; 208 | state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11]; 209 | state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12]; 210 | state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13]; 211 | state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14]; 212 | state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15]; 213 | 214 | WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8); 215 | 216 | ///// 16 --> 8 ///// 217 | // int32_t in[160/SUB_BLOCKS_22_8] 218 | // int32_t out[80/SUB_BLOCKS_22_8] 219 | ///// 220 | WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8); 221 | 222 | // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead 223 | in += 220 / SUB_BLOCKS_22_8; 224 | out += 80 / SUB_BLOCKS_22_8; 225 | } 226 | } 227 | 228 | // initialize state of 22 -> 8 resampler 229 | void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) 230 | { 231 | int k; 232 | for (k = 0; k < 8; k++) 233 | { 234 | state->S_22_22[k] = 0; 235 | state->S_22_22[k + 8] = 0; 236 | state->S_22_16[k] = 0; 237 | state->S_16_8[k] = 0; 238 | } 239 | } 240 | 241 | ////////////////////// 242 | // 8 kHz -> 22 kHz // 243 | ////////////////////// 244 | 245 | // number of subblocks; options: 1, 2, 5, 10 246 | #define SUB_BLOCKS_8_22 2 247 | 248 | // 8 -> 22 resampler 249 | void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out, 250 | WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem) 251 | { 252 | int k; 253 | 254 | // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size) 255 | for (k = 0; k < SUB_BLOCKS_8_22; k++) 256 | { 257 | ///// 8 --> 16 ///// 258 | // int16_t in[80/SUB_BLOCKS_8_22] 259 | // int32_t out[160/SUB_BLOCKS_8_22] 260 | ///// 261 | WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16); 262 | 263 | ///// 16 --> 11 ///// 264 | // int32_t in[160/SUB_BLOCKS_8_22] 265 | // int32_t out[110/SUB_BLOCKS_8_22] 266 | ///// 267 | // copy state to and from input array 268 | tmpmem[10] = state->S_16_11[0]; 269 | tmpmem[11] = state->S_16_11[1]; 270 | tmpmem[12] = state->S_16_11[2]; 271 | tmpmem[13] = state->S_16_11[3]; 272 | tmpmem[14] = state->S_16_11[4]; 273 | tmpmem[15] = state->S_16_11[5]; 274 | tmpmem[16] = state->S_16_11[6]; 275 | tmpmem[17] = state->S_16_11[7]; 276 | state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10]; 277 | state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11]; 278 | state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12]; 279 | state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13]; 280 | state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14]; 281 | state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15]; 282 | state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16]; 283 | state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17]; 284 | 285 | WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22); 286 | 287 | ///// 11 --> 22 ///// 288 | // int32_t in[110/SUB_BLOCKS_8_22] 289 | // int16_t out[220/SUB_BLOCKS_8_22] 290 | ///// 291 | WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22); 292 | 293 | // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead 294 | in += 80 / SUB_BLOCKS_8_22; 295 | out += 220 / SUB_BLOCKS_8_22; 296 | } 297 | } 298 | 299 | // initialize state of 8 -> 22 resampler 300 | void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) 301 | { 302 | int k; 303 | for (k = 0; k < 8; k++) 304 | { 305 | state->S_8_16[k] = 0; 306 | state->S_16_11[k] = 0; 307 | state->S_11_22[k] = 0; 308 | } 309 | } 310 | 311 | // compute two inner-products and store them to output array 312 | static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2, 313 | const int16_t* coef_ptr, int32_t* out1, 314 | int32_t* out2) 315 | { 316 | int32_t tmp1 = 16384; 317 | int32_t tmp2 = 16384; 318 | int16_t coef; 319 | 320 | coef = coef_ptr[0]; 321 | tmp1 += coef * in1[0]; 322 | tmp2 += coef * in2[-0]; 323 | 324 | coef = coef_ptr[1]; 325 | tmp1 += coef * in1[1]; 326 | tmp2 += coef * in2[-1]; 327 | 328 | coef = coef_ptr[2]; 329 | tmp1 += coef * in1[2]; 330 | tmp2 += coef * in2[-2]; 331 | 332 | coef = coef_ptr[3]; 333 | tmp1 += coef * in1[3]; 334 | tmp2 += coef * in2[-3]; 335 | 336 | coef = coef_ptr[4]; 337 | tmp1 += coef * in1[4]; 338 | tmp2 += coef * in2[-4]; 339 | 340 | coef = coef_ptr[5]; 341 | tmp1 += coef * in1[5]; 342 | tmp2 += coef * in2[-5]; 343 | 344 | coef = coef_ptr[6]; 345 | tmp1 += coef * in1[6]; 346 | tmp2 += coef * in2[-6]; 347 | 348 | coef = coef_ptr[7]; 349 | tmp1 += coef * in1[7]; 350 | tmp2 += coef * in2[-7]; 351 | 352 | coef = coef_ptr[8]; 353 | *out1 = tmp1 + coef * in1[8]; 354 | *out2 = tmp2 + coef * in2[-8]; 355 | } 356 | 357 | // compute two inner-products and store them to output array 358 | static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2, 359 | const int16_t* coef_ptr, int16_t* out1, 360 | int16_t* out2) 361 | { 362 | int32_t tmp1 = 16384; 363 | int32_t tmp2 = 16384; 364 | int16_t coef; 365 | 366 | coef = coef_ptr[0]; 367 | tmp1 += coef * in1[0]; 368 | tmp2 += coef * in2[-0]; 369 | 370 | coef = coef_ptr[1]; 371 | tmp1 += coef * in1[1]; 372 | tmp2 += coef * in2[-1]; 373 | 374 | coef = coef_ptr[2]; 375 | tmp1 += coef * in1[2]; 376 | tmp2 += coef * in2[-2]; 377 | 378 | coef = coef_ptr[3]; 379 | tmp1 += coef * in1[3]; 380 | tmp2 += coef * in2[-3]; 381 | 382 | coef = coef_ptr[4]; 383 | tmp1 += coef * in1[4]; 384 | tmp2 += coef * in2[-4]; 385 | 386 | coef = coef_ptr[5]; 387 | tmp1 += coef * in1[5]; 388 | tmp2 += coef * in2[-5]; 389 | 390 | coef = coef_ptr[6]; 391 | tmp1 += coef * in1[6]; 392 | tmp2 += coef * in2[-6]; 393 | 394 | coef = coef_ptr[7]; 395 | tmp1 += coef * in1[7]; 396 | tmp2 += coef * in2[-7]; 397 | 398 | coef = coef_ptr[8]; 399 | tmp1 += coef * in1[8]; 400 | tmp2 += coef * in2[-8]; 401 | 402 | // scale down, round and saturate 403 | tmp1 >>= 15; 404 | if (tmp1 > (int32_t)0x00007FFF) 405 | tmp1 = 0x00007FFF; 406 | if (tmp1 < (int32_t)0xFFFF8000) 407 | tmp1 = 0xFFFF8000; 408 | tmp2 >>= 15; 409 | if (tmp2 > (int32_t)0x00007FFF) 410 | tmp2 = 0x00007FFF; 411 | if (tmp2 < (int32_t)0xFFFF8000) 412 | tmp2 = 0xFFFF8000; 413 | *out1 = (int16_t)tmp1; 414 | *out2 = (int16_t)tmp2; 415 | } 416 | 417 | // Resampling ratio: 11/16 418 | // input: int32_t (normalized, not saturated) :: size 16 * K 419 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K 420 | // K: Number of blocks 421 | 422 | void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, 423 | int32_t* Out, 424 | int32_t K) 425 | { 426 | ///////////////////////////////////////////////////////////// 427 | // Filter operation: 428 | // 429 | // Perform resampling (16 input samples -> 11 output samples); 430 | // process in sub blocks of size 16 samples. 431 | int32_t m; 432 | 433 | for (m = 0; m < K; m++) 434 | { 435 | // first output sample 436 | Out[0] = ((int32_t)In[3] << 15) + (1 << 14); 437 | 438 | // sum and accumulate filter coefficients and input samples 439 | WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); 440 | 441 | // sum and accumulate filter coefficients and input samples 442 | WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); 443 | 444 | // sum and accumulate filter coefficients and input samples 445 | WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); 446 | 447 | // sum and accumulate filter coefficients and input samples 448 | WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); 449 | 450 | // sum and accumulate filter coefficients and input samples 451 | WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); 452 | 453 | // update pointers 454 | In += 16; 455 | Out += 11; 456 | } 457 | } 458 | 459 | // Resampling ratio: 11/16 460 | // input: int32_t (normalized, not saturated) :: size 16 * K 461 | // output: int16_t (saturated) :: size 11 * K 462 | // K: Number of blocks 463 | 464 | void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, 465 | int16_t *Out, 466 | int32_t K) 467 | { 468 | ///////////////////////////////////////////////////////////// 469 | // Filter operation: 470 | // 471 | // Perform resampling (16 input samples -> 11 output samples); 472 | // process in sub blocks of size 16 samples. 473 | int32_t tmp; 474 | int32_t m; 475 | 476 | for (m = 0; m < K; m++) 477 | { 478 | // first output sample 479 | tmp = In[3]; 480 | if (tmp > (int32_t)0x00007FFF) 481 | tmp = 0x00007FFF; 482 | if (tmp < (int32_t)0xFFFF8000) 483 | tmp = 0xFFFF8000; 484 | Out[0] = (int16_t)tmp; 485 | 486 | // sum and accumulate filter coefficients and input samples 487 | WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); 488 | 489 | // sum and accumulate filter coefficients and input samples 490 | WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); 491 | 492 | // sum and accumulate filter coefficients and input samples 493 | WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); 494 | 495 | // sum and accumulate filter coefficients and input samples 496 | WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); 497 | 498 | // sum and accumulate filter coefficients and input samples 499 | WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); 500 | 501 | // update pointers 502 | In += 16; 503 | Out += 11; 504 | } 505 | } 506 | -------------------------------------------------------------------------------- /webrtc_vad/spl/resample_48khz.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains resampling functions between 48 kHz and nb/wb. 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include 19 | #include "signal_processing_library.h" 20 | #include "resample_by_2_internal.h" 21 | 22 | //////////////////////////// 23 | ///// 48 kHz -> 16 kHz ///// 24 | //////////////////////////// 25 | 26 | // 48 -> 16 resampler 27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, 28 | WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) 29 | { 30 | ///// 48 --> 48(LP) ///// 31 | // int16_t in[480] 32 | // int32_t out[480] 33 | ///// 34 | WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); 35 | 36 | ///// 48 --> 32 ///// 37 | // int32_t in[480] 38 | // int32_t out[320] 39 | ///// 40 | // copy state to and from input array 41 | memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); 42 | memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); 43 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); 44 | 45 | ///// 32 --> 16 ///// 46 | // int32_t in[320] 47 | // int16_t out[160] 48 | ///// 49 | WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); 50 | } 51 | 52 | // initialize state of 48 -> 16 resampler 53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) 54 | { 55 | memset(state->S_48_48, 0, 16 * sizeof(int32_t)); 56 | memset(state->S_48_32, 0, 8 * sizeof(int32_t)); 57 | memset(state->S_32_16, 0, 8 * sizeof(int32_t)); 58 | } 59 | 60 | //////////////////////////// 61 | ///// 16 kHz -> 48 kHz ///// 62 | //////////////////////////// 63 | 64 | // 16 -> 48 resampler 65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, 66 | WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) 67 | { 68 | ///// 16 --> 32 ///// 69 | // int16_t in[160] 70 | // int32_t out[320] 71 | ///// 72 | WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); 73 | 74 | ///// 32 --> 24 ///// 75 | // int32_t in[320] 76 | // int32_t out[240] 77 | // copy state to and from input array 78 | ///// 79 | memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); 80 | memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); 81 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); 82 | 83 | ///// 24 --> 48 ///// 84 | // int32_t in[240] 85 | // int16_t out[480] 86 | ///// 87 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 88 | } 89 | 90 | // initialize state of 16 -> 48 resampler 91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) 92 | { 93 | memset(state->S_16_32, 0, 8 * sizeof(int32_t)); 94 | memset(state->S_32_24, 0, 8 * sizeof(int32_t)); 95 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 96 | } 97 | 98 | //////////////////////////// 99 | ///// 48 kHz -> 8 kHz ///// 100 | //////////////////////////// 101 | 102 | // 48 -> 8 resampler 103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, 104 | WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) 105 | { 106 | ///// 48 --> 24 ///// 107 | // int16_t in[480] 108 | // int32_t out[240] 109 | ///// 110 | WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); 111 | 112 | ///// 24 --> 24(LP) ///// 113 | // int32_t in[240] 114 | // int32_t out[240] 115 | ///// 116 | WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); 117 | 118 | ///// 24 --> 16 ///// 119 | // int32_t in[240] 120 | // int32_t out[160] 121 | ///// 122 | // copy state to and from input array 123 | memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); 124 | memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); 125 | WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); 126 | 127 | ///// 16 --> 8 ///// 128 | // int32_t in[160] 129 | // int16_t out[80] 130 | ///// 131 | WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); 132 | } 133 | 134 | // initialize state of 48 -> 8 resampler 135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) 136 | { 137 | memset(state->S_48_24, 0, 8 * sizeof(int32_t)); 138 | memset(state->S_24_24, 0, 16 * sizeof(int32_t)); 139 | memset(state->S_24_16, 0, 8 * sizeof(int32_t)); 140 | memset(state->S_16_8, 0, 8 * sizeof(int32_t)); 141 | } 142 | 143 | //////////////////////////// 144 | ///// 8 kHz -> 48 kHz ///// 145 | //////////////////////////// 146 | 147 | // 8 -> 48 resampler 148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, 149 | WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) 150 | { 151 | ///// 8 --> 16 ///// 152 | // int16_t in[80] 153 | // int32_t out[160] 154 | ///// 155 | WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); 156 | 157 | ///// 16 --> 12 ///// 158 | // int32_t in[160] 159 | // int32_t out[120] 160 | ///// 161 | // copy state to and from input array 162 | memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); 163 | memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); 164 | WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); 165 | 166 | ///// 12 --> 24 ///// 167 | // int32_t in[120] 168 | // int16_t out[240] 169 | ///// 170 | WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); 171 | 172 | ///// 24 --> 48 ///// 173 | // int32_t in[240] 174 | // int16_t out[480] 175 | ///// 176 | WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); 177 | } 178 | 179 | // initialize state of 8 -> 48 resampler 180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) 181 | { 182 | memset(state->S_8_16, 0, 8 * sizeof(int32_t)); 183 | memset(state->S_16_12, 0, 8 * sizeof(int32_t)); 184 | memset(state->S_12_24, 0, 8 * sizeof(int32_t)); 185 | memset(state->S_24_48, 0, 8 * sizeof(int32_t)); 186 | } 187 | -------------------------------------------------------------------------------- /webrtc_vad/spl/resample_by_2.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the resampling by two functions. 14 | * The description header can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "signal_processing_library.h" 19 | 20 | #ifdef WEBRTC_ARCH_ARM_V7 21 | 22 | // allpass filter coefficients. 23 | static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15}; 24 | static const uint32_t kResampleAllpass2[3] = 25 | {12199, 37471 << 15, 60255 << 15}; 26 | 27 | // Multiply two 32-bit values and accumulate to another input value. 28 | // Return: state + ((diff * tbl_value) >> 16) 29 | 30 | static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, 31 | int32_t diff, 32 | int32_t state) { 33 | int32_t result; 34 | __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff), 35 | "r"(tbl_value), "r"(state)); 36 | return result; 37 | } 38 | 39 | // Multiply two 32-bit values and accumulate to another input value. 40 | // Return: Return: state + (((diff << 1) * tbl_value) >> 32) 41 | // 42 | // The reason to introduce this function is that, in case we can't use smlawb 43 | // instruction (in MUL_ACCUM_1) due to input value range, we can still use 44 | // smmla to save some cycles. 45 | 46 | static __inline int32_t MUL_ACCUM_2(int32_t tbl_value, 47 | int32_t diff, 48 | int32_t state) { 49 | int32_t result; 50 | __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1), 51 | "r"(tbl_value), "r"(state)); 52 | return result; 53 | } 54 | 55 | #else 56 | 57 | // allpass filter coefficients. 58 | static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; 59 | static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; 60 | 61 | // Multiply a 32-bit value with a 16-bit value and accumulate to another input: 62 | #define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) 63 | #define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) 64 | 65 | #endif // WEBRTC_ARCH_ARM_V7 66 | 67 | 68 | // decimator 69 | #if !defined(MIPS32_LE) 70 | void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, 71 | int16_t* out, int32_t* filtState) { 72 | int32_t tmp1, tmp2, diff, in32, out32; 73 | size_t i; 74 | 75 | register int32_t state0 = filtState[0]; 76 | register int32_t state1 = filtState[1]; 77 | register int32_t state2 = filtState[2]; 78 | register int32_t state3 = filtState[3]; 79 | register int32_t state4 = filtState[4]; 80 | register int32_t state5 = filtState[5]; 81 | register int32_t state6 = filtState[6]; 82 | register int32_t state7 = filtState[7]; 83 | 84 | for (i = (len >> 1); i > 0; i--) { 85 | // lower allpass filter 86 | in32 = (int32_t)(*in++) << 10; 87 | diff = in32 - state1; 88 | tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); 89 | state0 = in32; 90 | diff = tmp1 - state2; 91 | tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); 92 | state1 = tmp1; 93 | diff = tmp2 - state3; 94 | state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); 95 | state2 = tmp2; 96 | 97 | // upper allpass filter 98 | in32 = (int32_t)(*in++) << 10; 99 | diff = in32 - state5; 100 | tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); 101 | state4 = in32; 102 | diff = tmp1 - state6; 103 | tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); 104 | state5 = tmp1; 105 | diff = tmp2 - state7; 106 | state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); 107 | state6 = tmp2; 108 | 109 | // add two allpass outputs, divide by two and round 110 | out32 = (state3 + state7 + 1024) >> 11; 111 | 112 | // limit amplitude to prevent wrap-around, and write to output array 113 | *out++ = WebRtcSpl_SatW32ToW16(out32); 114 | } 115 | 116 | filtState[0] = state0; 117 | filtState[1] = state1; 118 | filtState[2] = state2; 119 | filtState[3] = state3; 120 | filtState[4] = state4; 121 | filtState[5] = state5; 122 | filtState[6] = state6; 123 | filtState[7] = state7; 124 | } 125 | #endif // #if defined(MIPS32_LE) 126 | 127 | 128 | void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len, 129 | int16_t* out, int32_t* filtState) { 130 | int32_t tmp1, tmp2, diff, in32, out32; 131 | size_t i; 132 | 133 | register int32_t state0 = filtState[0]; 134 | register int32_t state1 = filtState[1]; 135 | register int32_t state2 = filtState[2]; 136 | register int32_t state3 = filtState[3]; 137 | register int32_t state4 = filtState[4]; 138 | register int32_t state5 = filtState[5]; 139 | register int32_t state6 = filtState[6]; 140 | register int32_t state7 = filtState[7]; 141 | 142 | for (i = len; i > 0; i--) { 143 | // lower allpass filter 144 | in32 = (int32_t)(*in++) << 10; 145 | diff = in32 - state1; 146 | tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0); 147 | state0 = in32; 148 | diff = tmp1 - state2; 149 | tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1); 150 | state1 = tmp1; 151 | diff = tmp2 - state3; 152 | state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2); 153 | state2 = tmp2; 154 | 155 | // round; limit amplitude to prevent wrap-around; write to output array 156 | out32 = (state3 + 512) >> 10; 157 | *out++ = WebRtcSpl_SatW32ToW16(out32); 158 | 159 | // upper allpass filter 160 | diff = in32 - state5; 161 | tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4); 162 | state4 = in32; 163 | diff = tmp1 - state6; 164 | tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5); 165 | state5 = tmp1; 166 | diff = tmp2 - state7; 167 | state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6); 168 | state6 = tmp2; 169 | 170 | // round; limit amplitude to prevent wrap-around; write to output array 171 | out32 = (state7 + 512) >> 10; 172 | *out++ = WebRtcSpl_SatW32ToW16(out32); 173 | } 174 | 175 | filtState[0] = state0; 176 | filtState[1] = state1; 177 | filtState[2] = state2; 178 | filtState[3] = state3; 179 | filtState[4] = state4; 180 | filtState[5] = state5; 181 | filtState[6] = state6; 182 | filtState[7] = state7; 183 | } 184 | -------------------------------------------------------------------------------- /webrtc_vad/spl/resample_by_2_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This header file contains some internal resampling functions. 14 | * 15 | */ 16 | 17 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 18 | #define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 19 | 20 | #include 21 | /******************************************************************* 22 | * resample_by_2_fast.c 23 | * Functions for internal use in the other resample functions 24 | ******************************************************************/ 25 | void WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, 26 | int32_t *state); 27 | 28 | void WebRtcSpl_DownBy2ShortToInt(const int16_t *in, int32_t len, 29 | int32_t *out, int32_t *state); 30 | 31 | void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, 32 | int32_t *out, int32_t *state); 33 | 34 | void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, 35 | int32_t *state); 36 | 37 | void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, 38 | int16_t *out, int32_t *state); 39 | 40 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, 41 | int32_t* out, int32_t* state); 42 | 43 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, 44 | int32_t* state); 45 | 46 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ 47 | -------------------------------------------------------------------------------- /webrtc_vad/spl/resample_fractional.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains the resampling functions between 48, 44, 32 and 24 kHz. 14 | * The description headers can be found in signal_processing_library.h 15 | * 16 | */ 17 | 18 | #include "signal_processing_library.h" 19 | 20 | // interpolation coefficients 21 | static const int16_t kCoefficients48To32[2][8] = { 22 | {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, 23 | {222, 441, -3783, 12903, 23285, 1087, -2050, 778} 24 | }; 25 | 26 | static const int16_t kCoefficients32To24[3][8] = { 27 | {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, 28 | {386, -381, -2646, 19062, 19062, -2646, -381, 386}, 29 | {90, 721, -3838, 10620, 24406, 2434, -2362, 767} 30 | }; 31 | 32 | static const int16_t kCoefficients44To32[4][9] = { 33 | {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, 34 | {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, 35 | {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, 36 | {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} 37 | }; 38 | 39 | // Resampling ratio: 2/3 40 | // input: int32_t (normalized, not saturated) :: size 3 * K 41 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K 42 | // K: number of blocks 43 | 44 | void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) 45 | { 46 | ///////////////////////////////////////////////////////////// 47 | // Filter operation: 48 | // 49 | // Perform resampling (3 input samples -> 2 output samples); 50 | // process in sub blocks of size 3 samples. 51 | int32_t tmp; 52 | size_t m; 53 | 54 | for (m = 0; m < K; m++) 55 | { 56 | tmp = 1 << 14; 57 | tmp += kCoefficients48To32[0][0] * In[0]; 58 | tmp += kCoefficients48To32[0][1] * In[1]; 59 | tmp += kCoefficients48To32[0][2] * In[2]; 60 | tmp += kCoefficients48To32[0][3] * In[3]; 61 | tmp += kCoefficients48To32[0][4] * In[4]; 62 | tmp += kCoefficients48To32[0][5] * In[5]; 63 | tmp += kCoefficients48To32[0][6] * In[6]; 64 | tmp += kCoefficients48To32[0][7] * In[7]; 65 | Out[0] = tmp; 66 | 67 | tmp = 1 << 14; 68 | tmp += kCoefficients48To32[1][0] * In[1]; 69 | tmp += kCoefficients48To32[1][1] * In[2]; 70 | tmp += kCoefficients48To32[1][2] * In[3]; 71 | tmp += kCoefficients48To32[1][3] * In[4]; 72 | tmp += kCoefficients48To32[1][4] * In[5]; 73 | tmp += kCoefficients48To32[1][5] * In[6]; 74 | tmp += kCoefficients48To32[1][6] * In[7]; 75 | tmp += kCoefficients48To32[1][7] * In[8]; 76 | Out[1] = tmp; 77 | 78 | // update pointers 79 | In += 3; 80 | Out += 2; 81 | } 82 | } 83 | 84 | // Resampling ratio: 3/4 85 | // input: int32_t (normalized, not saturated) :: size 4 * K 86 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K 87 | // K: number of blocks 88 | 89 | void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) 90 | { 91 | ///////////////////////////////////////////////////////////// 92 | // Filter operation: 93 | // 94 | // Perform resampling (4 input samples -> 3 output samples); 95 | // process in sub blocks of size 4 samples. 96 | size_t m; 97 | int32_t tmp; 98 | 99 | for (m = 0; m < K; m++) 100 | { 101 | tmp = 1 << 14; 102 | tmp += kCoefficients32To24[0][0] * In[0]; 103 | tmp += kCoefficients32To24[0][1] * In[1]; 104 | tmp += kCoefficients32To24[0][2] * In[2]; 105 | tmp += kCoefficients32To24[0][3] * In[3]; 106 | tmp += kCoefficients32To24[0][4] * In[4]; 107 | tmp += kCoefficients32To24[0][5] * In[5]; 108 | tmp += kCoefficients32To24[0][6] * In[6]; 109 | tmp += kCoefficients32To24[0][7] * In[7]; 110 | Out[0] = tmp; 111 | 112 | tmp = 1 << 14; 113 | tmp += kCoefficients32To24[1][0] * In[1]; 114 | tmp += kCoefficients32To24[1][1] * In[2]; 115 | tmp += kCoefficients32To24[1][2] * In[3]; 116 | tmp += kCoefficients32To24[1][3] * In[4]; 117 | tmp += kCoefficients32To24[1][4] * In[5]; 118 | tmp += kCoefficients32To24[1][5] * In[6]; 119 | tmp += kCoefficients32To24[1][6] * In[7]; 120 | tmp += kCoefficients32To24[1][7] * In[8]; 121 | Out[1] = tmp; 122 | 123 | tmp = 1 << 14; 124 | tmp += kCoefficients32To24[2][0] * In[2]; 125 | tmp += kCoefficients32To24[2][1] * In[3]; 126 | tmp += kCoefficients32To24[2][2] * In[4]; 127 | tmp += kCoefficients32To24[2][3] * In[5]; 128 | tmp += kCoefficients32To24[2][4] * In[6]; 129 | tmp += kCoefficients32To24[2][5] * In[7]; 130 | tmp += kCoefficients32To24[2][6] * In[8]; 131 | tmp += kCoefficients32To24[2][7] * In[9]; 132 | Out[2] = tmp; 133 | 134 | // update pointers 135 | In += 4; 136 | Out += 3; 137 | } 138 | } 139 | 140 | // 141 | // fractional resampling filters 142 | // Fout = 11/16 * Fin 143 | // Fout = 8/11 * Fin 144 | // 145 | 146 | // compute two inner-products and store them to output array 147 | static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, 148 | const int16_t *coef_ptr, int32_t *out1, 149 | int32_t *out2) 150 | { 151 | int32_t tmp1 = 16384; 152 | int32_t tmp2 = 16384; 153 | int16_t coef; 154 | 155 | coef = coef_ptr[0]; 156 | tmp1 += coef * in1[0]; 157 | tmp2 += coef * in2[-0]; 158 | 159 | coef = coef_ptr[1]; 160 | tmp1 += coef * in1[1]; 161 | tmp2 += coef * in2[-1]; 162 | 163 | coef = coef_ptr[2]; 164 | tmp1 += coef * in1[2]; 165 | tmp2 += coef * in2[-2]; 166 | 167 | coef = coef_ptr[3]; 168 | tmp1 += coef * in1[3]; 169 | tmp2 += coef * in2[-3]; 170 | 171 | coef = coef_ptr[4]; 172 | tmp1 += coef * in1[4]; 173 | tmp2 += coef * in2[-4]; 174 | 175 | coef = coef_ptr[5]; 176 | tmp1 += coef * in1[5]; 177 | tmp2 += coef * in2[-5]; 178 | 179 | coef = coef_ptr[6]; 180 | tmp1 += coef * in1[6]; 181 | tmp2 += coef * in2[-6]; 182 | 183 | coef = coef_ptr[7]; 184 | tmp1 += coef * in1[7]; 185 | tmp2 += coef * in2[-7]; 186 | 187 | coef = coef_ptr[8]; 188 | *out1 = tmp1 + coef * in1[8]; 189 | *out2 = tmp2 + coef * in2[-8]; 190 | } 191 | 192 | // Resampling ratio: 8/11 193 | // input: int32_t (normalized, not saturated) :: size 11 * K 194 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K 195 | // K: number of blocks 196 | 197 | void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) 198 | { 199 | ///////////////////////////////////////////////////////////// 200 | // Filter operation: 201 | // 202 | // Perform resampling (11 input samples -> 8 output samples); 203 | // process in sub blocks of size 11 samples. 204 | int32_t tmp; 205 | size_t m; 206 | 207 | for (m = 0; m < K; m++) 208 | { 209 | tmp = 1 << 14; 210 | 211 | // first output sample 212 | Out[0] = ((int32_t)In[3] << 15) + tmp; 213 | 214 | // sum and accumulate filter coefficients and input samples 215 | tmp += kCoefficients44To32[3][0] * In[5]; 216 | tmp += kCoefficients44To32[3][1] * In[6]; 217 | tmp += kCoefficients44To32[3][2] * In[7]; 218 | tmp += kCoefficients44To32[3][3] * In[8]; 219 | tmp += kCoefficients44To32[3][4] * In[9]; 220 | tmp += kCoefficients44To32[3][5] * In[10]; 221 | tmp += kCoefficients44To32[3][6] * In[11]; 222 | tmp += kCoefficients44To32[3][7] * In[12]; 223 | tmp += kCoefficients44To32[3][8] * In[13]; 224 | Out[4] = tmp; 225 | 226 | // sum and accumulate filter coefficients and input samples 227 | WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); 228 | 229 | // sum and accumulate filter coefficients and input samples 230 | WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); 231 | 232 | // sum and accumulate filter coefficients and input samples 233 | WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); 234 | 235 | // update pointers 236 | In += 11; 237 | Out += 8; 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /webrtc_vad/spl/spl_init.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* The global function contained in this file initializes SPL function 12 | * pointers, currently only for ARM platforms. 13 | * 14 | * Some code came from common/rtcd.c in the WebM project. 15 | */ 16 | 17 | #include "signal_processing_library.h" 18 | 19 | /* Declare function pointers. */ 20 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; 21 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; 22 | MaxValueW16 WebRtcSpl_MaxValueW16; 23 | MaxValueW32 WebRtcSpl_MaxValueW32; 24 | MinValueW16 WebRtcSpl_MinValueW16; 25 | MinValueW32 WebRtcSpl_MinValueW32; 26 | CrossCorrelation WebRtcSpl_CrossCorrelation; 27 | DownsampleFast WebRtcSpl_DownsampleFast; 28 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; 29 | 30 | #if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE) 31 | /* Initialize function pointers to the generic C version. */ 32 | static void InitPointersToC() { 33 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; 34 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 35 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; 36 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; 37 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; 38 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; 39 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; 40 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; 41 | WebRtcSpl_ScaleAndAddVectorsWithRound = 42 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 43 | } 44 | #endif 45 | 46 | #if defined(WEBRTC_HAS_NEON) 47 | /* Initialize function pointers to the Neon version. */ 48 | static void InitPointersToNeon() { 49 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; 50 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; 51 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; 52 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; 53 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; 54 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; 55 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon; 56 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; 57 | WebRtcSpl_ScaleAndAddVectorsWithRound = 58 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 59 | } 60 | #endif 61 | 62 | #if defined(MIPS32_LE) 63 | /* Initialize function pointers to the MIPS version. */ 64 | static void InitPointersToMIPS() { 65 | WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; 66 | WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; 67 | WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; 68 | WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; 69 | WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; 70 | WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; 71 | WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; 72 | #if defined(MIPS_DSP_R1_LE) 73 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; 74 | WebRtcSpl_ScaleAndAddVectorsWithRound = 75 | WebRtcSpl_ScaleAndAddVectorsWithRound_mips; 76 | #else 77 | WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; 78 | WebRtcSpl_ScaleAndAddVectorsWithRound = 79 | WebRtcSpl_ScaleAndAddVectorsWithRoundC; 80 | #endif 81 | } 82 | #endif 83 | 84 | static void InitFunctionPointers(void) { 85 | #if defined(WEBRTC_HAS_NEON) 86 | InitPointersToNeon(); 87 | #elif defined(MIPS32_LE) 88 | InitPointersToMIPS(); 89 | #else 90 | InitPointersToC(); 91 | #endif /* WEBRTC_HAS_NEON */ 92 | } 93 | 94 | //#if defined(WEBRTC_POSIX) 95 | #include 96 | 97 | static void once(void (*func)(void)) { 98 | static pthread_once_t lock = PTHREAD_ONCE_INIT; 99 | pthread_once(&lock, func); 100 | } 101 | /* There's no fallback version as an #else block here to ensure thread safety. 102 | * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build 103 | * system should pick it up. 104 | */ 105 | //#endif /* WEBRTC_POSIX */ 106 | 107 | void WebRtcSpl_Init(void) { 108 | once(InitFunctionPointers); 109 | } 110 | -------------------------------------------------------------------------------- /webrtc_vad/spl/spl_inl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include 12 | 13 | #include "spl_inl.h" 14 | 15 | // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n 16 | // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at 17 | // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in 18 | // n. 19 | const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = { 20 | 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, 21 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, 22 | 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, 23 | -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, 24 | }; 25 | -------------------------------------------------------------------------------- /webrtc_vad/spl/spl_inl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | // This header file includes the inline functions in 13 | // the fix point signal processing library. 14 | 15 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 16 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 17 | 18 | //#include "rtc_base/compile_assert_c.h" 19 | 20 | extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64]; 21 | 22 | // Don't call this directly except in tests! 23 | static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) { 24 | // Normalize n by rounding up to the nearest number that is a sequence of 0 25 | // bits followed by a sequence of 1 bits. This number has the same number of 26 | // leading zeros as the original n. There are exactly 33 such values. 27 | n |= n >> 1; 28 | n |= n >> 2; 29 | n |= n >> 4; 30 | n |= n >> 8; 31 | n |= n >> 16; 32 | 33 | // Multiply the modified n with a constant selected (by exhaustive search) 34 | // such that each of the 33 possible values of n give a product whose 6 most 35 | // significant bits are unique. Then look up the answer in the table. 36 | return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; 37 | } 38 | 39 | // Don't call this directly except in tests! 40 | static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) { 41 | const int leading_zeros = n >> 32 == 0 ? 32 : 0; 42 | return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin( 43 | (uint32_t)(n >> (32 - leading_zeros))); 44 | } 45 | 46 | // Returns the number of leading zero bits in the argument. 47 | static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) { 48 | #ifdef __GNUC__ 49 | // RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); 50 | return n == 0 ? 32 : __builtin_clz(n); 51 | #else 52 | return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n); 53 | #endif 54 | } 55 | 56 | // Returns the number of leading zero bits in the argument. 57 | static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) { 58 | #ifdef __GNUC__ 59 | // RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT 60 | return n == 0 ? 64 : __builtin_clzll(n); 61 | #else 62 | return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n); 63 | #endif 64 | } 65 | 66 | #ifdef WEBRTC_ARCH_ARM_V7 67 | #include "common_audio/signal_processing/include/spl_inl_armv7.h" 68 | #else 69 | 70 | #if defined(MIPS32_LE) 71 | #include "common_audio/signal_processing/include/spl_inl_mips.h" 72 | #endif 73 | 74 | #if !defined(MIPS_DSP_R1_LE) 75 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { 76 | int16_t out16 = (int16_t) value32; 77 | 78 | if (value32 > 32767) 79 | out16 = 32767; 80 | else if (value32 < -32768) 81 | out16 = -32768; 82 | 83 | return out16; 84 | } 85 | 86 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) { 87 | // Do the addition in unsigned numbers, since signed overflow is undefined 88 | // behavior. 89 | const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b); 90 | 91 | // a + b can't overflow if a and b have different signs. If they have the 92 | // same sign, a + b also has the same sign iff it didn't overflow. 93 | if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) { 94 | // The direction of the overflow is obvious from the sign of a + b. 95 | return sum < 0 ? INT32_MAX : INT32_MIN; 96 | } 97 | return sum; 98 | } 99 | 100 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) { 101 | // Do the subtraction in unsigned numbers, since signed overflow is undefined 102 | // behavior. 103 | const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b); 104 | 105 | // a - b can't overflow if a and b have the same sign. If they have different 106 | // signs, a - b has the same sign as a iff it didn't overflow. 107 | if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) { 108 | // The direction of the overflow is obvious from the sign of a - b. 109 | return diff < 0 ? INT32_MAX : INT32_MIN; 110 | } 111 | return diff; 112 | } 113 | 114 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { 115 | return WebRtcSpl_SatW32ToW16((int32_t) a + (int32_t) b); 116 | } 117 | 118 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { 119 | return WebRtcSpl_SatW32ToW16((int32_t) var1 - (int32_t) var2); 120 | } 121 | #endif // #if !defined(MIPS_DSP_R1_LE) 122 | 123 | #if !defined(MIPS32_LE) 124 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { 125 | return 32 - WebRtcSpl_CountLeadingZeros32(n); 126 | } 127 | 128 | // Return the number of steps a can be left-shifted without overflow, 129 | // or 0 if a == 0. 130 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) { 131 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1; 132 | } 133 | 134 | // Return the number of steps a can be left-shifted without overflow, 135 | // or 0 if a == 0. 136 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { 137 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a); 138 | } 139 | 140 | // Return the number of steps a can be left-shifted without overflow, 141 | // or 0 if a == 0. 142 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) { 143 | const int32_t a32 = a; 144 | return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17; 145 | } 146 | 147 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { 148 | return (a * b + c); 149 | } 150 | #endif // #if !defined(MIPS32_LE) 151 | 152 | #endif // WEBRTC_ARCH_ARM_V7 153 | 154 | #endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ 155 | -------------------------------------------------------------------------------- /webrtc_vad/spl/vector_scaling_operations.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | 12 | /* 13 | * This file contains implementations of the functions 14 | * WebRtcSpl_VectorBitShiftW16() 15 | * WebRtcSpl_VectorBitShiftW32() 16 | * WebRtcSpl_VectorBitShiftW32ToW16() 17 | * WebRtcSpl_ScaleVector() 18 | * WebRtcSpl_ScaleVectorWithSat() 19 | * WebRtcSpl_ScaleAndAddVectors() 20 | * WebRtcSpl_ScaleAndAddVectorsWithRoundC() 21 | */ 22 | 23 | #include "signal_processing_library.h" 24 | 25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, 26 | const int16_t *in, int16_t right_shifts) 27 | { 28 | size_t i; 29 | 30 | if (right_shifts > 0) 31 | { 32 | for (i = length; i > 0; i--) 33 | { 34 | (*res++) = ((*in++) >> right_shifts); 35 | } 36 | } else 37 | { 38 | for (i = length; i > 0; i--) 39 | { 40 | (*res++) = ((*in++) * (1 << (-right_shifts))); 41 | } 42 | } 43 | } 44 | 45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, 46 | size_t vector_length, 47 | const int32_t *in_vector, 48 | int16_t right_shifts) 49 | { 50 | size_t i; 51 | 52 | if (right_shifts > 0) 53 | { 54 | for (i = vector_length; i > 0; i--) 55 | { 56 | (*out_vector++) = ((*in_vector++) >> right_shifts); 57 | } 58 | } else 59 | { 60 | for (i = vector_length; i > 0; i--) 61 | { 62 | (*out_vector++) = ((*in_vector++) << (-right_shifts)); 63 | } 64 | } 65 | } 66 | 67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, 68 | const int32_t* in, int right_shifts) { 69 | size_t i; 70 | int32_t tmp_w32; 71 | 72 | if (right_shifts >= 0) { 73 | for (i = length; i > 0; i--) { 74 | tmp_w32 = (*in++) >> right_shifts; 75 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 76 | } 77 | } else { 78 | int left_shifts = -right_shifts; 79 | for (i = length; i > 0; i--) { 80 | tmp_w32 = (*in++) << left_shifts; 81 | (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); 82 | } 83 | } 84 | } 85 | 86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, 87 | int16_t gain, size_t in_vector_length, 88 | int16_t right_shifts) 89 | { 90 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 91 | size_t i; 92 | const int16_t *inptr; 93 | int16_t *outptr; 94 | 95 | inptr = in_vector; 96 | outptr = out_vector; 97 | 98 | for (i = 0; i < in_vector_length; i++) 99 | { 100 | *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); 101 | } 102 | } 103 | 104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, 105 | int16_t gain, size_t in_vector_length, 106 | int16_t right_shifts) 107 | { 108 | // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts 109 | size_t i; 110 | const int16_t *inptr; 111 | int16_t *outptr; 112 | 113 | inptr = in_vector; 114 | outptr = out_vector; 115 | 116 | for (i = 0; i < in_vector_length; i++) { 117 | *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); 118 | } 119 | } 120 | 121 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, 122 | const int16_t *in2, int16_t gain2, int shift2, 123 | int16_t *out, size_t vector_length) 124 | { 125 | // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 126 | size_t i; 127 | const int16_t *in1ptr; 128 | const int16_t *in2ptr; 129 | int16_t *outptr; 130 | 131 | in1ptr = in1; 132 | in2ptr = in2; 133 | outptr = out; 134 | 135 | for (i = 0; i < vector_length; i++) 136 | { 137 | *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + 138 | (int16_t)((gain2 * *in2ptr++) >> shift2); 139 | } 140 | } 141 | 142 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. 143 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, 144 | int16_t in_vector1_scale, 145 | const int16_t* in_vector2, 146 | int16_t in_vector2_scale, 147 | int right_shifts, 148 | int16_t* out_vector, 149 | size_t length) { 150 | size_t i = 0; 151 | int round_value = (1 << right_shifts) >> 1; 152 | 153 | if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || 154 | length == 0 || right_shifts < 0) { 155 | return -1; 156 | } 157 | 158 | for (i = 0; i < length; i++) { 159 | out_vector[i] = (int16_t)(( 160 | in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + 161 | round_value) >> right_shifts); 162 | } 163 | 164 | return 0; 165 | } 166 | --------------------------------------------------------------------------------