├── .github └── FUNDING.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── dr_wav.h ├── main.cc ├── ns ├── aligned_malloc.cc ├── aligned_malloc.h ├── array_view.h ├── audio_buffer.cc ├── audio_buffer.h ├── audio_util.cc ├── audio_util.h ├── channel_buffer.h ├── checks.h ├── constructor_magic.h ├── fast_math.cc ├── fast_math.h ├── fft4g.cc ├── fft4g.h ├── gtest_prod_util.h ├── histograms.cc ├── histograms.h ├── noise_estimator.cc ├── noise_estimator.h ├── noise_suppressor.cc ├── noise_suppressor.h ├── ns_common.h ├── ns_config.h ├── ns_fft.cc ├── ns_fft.h ├── prior_signal_model.cc ├── prior_signal_model.h ├── prior_signal_model_estimator.cc ├── prior_signal_model_estimator.h ├── push_sinc_resampler.cc ├── push_sinc_resampler.h ├── quantile_noise_estimator.cc ├── quantile_noise_estimator.h ├── signal_model.cc ├── signal_model.h ├── signal_model_estimator.cc ├── signal_model_estimator.h ├── sinc_resampler.cc ├── sinc_resampler.h ├── speech_probability_estimator.cc ├── speech_probability_estimator.h ├── splitting_filter.c ├── splitting_filter.cc ├── splitting_filter.h ├── suppression_params.cc ├── suppression_params.h ├── three_band_filter_bank.cc ├── three_band_filter_bank.h ├── type_traits.h ├── wiener_filter.cc └── wiener_filter.h └── timing.h /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: https://www.paypal.com/paypalme/cpuimage/ 13 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(webrtc_ns_cpp) 3 | file(GLOB NS_SRC ns/*.cc ns/*.h ns/*.c) 4 | add_executable(webrtc_ns_cpp main.cc ${NS_SRC}) 5 | 6 | SET(CMAKE_C_FLAGS_DEBUG "-O3") 7 | SET(CMAKE_C_FLAGS_RELEASE "-O3") 8 | SET(CMAKE_CXX_FLAGS_DEBUG "-O3") 9 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3") 10 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ftree-vectorize") 11 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftree-vectorize") 12 | SET(CMAKE_BUILD_TYPE "Release") 13 | SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden") 14 | 15 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_DEBUG} -Wall -g -O0 -Wextra") 16 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_RELDEBINFO} -g -O3") 17 | 18 | target_link_libraries(webrtc_ns_cpp -lm) 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, The WebRTC project authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | * Neither the name of Google nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WebRTC_NS_CPP 2 | Noise Suppression Module Port From WebRTC 3 | 4 | # Donating 5 | 6 | If you found this project useful, consider buying me a coffee 7 | 8 | Buy Me A Coffee 9 | -------------------------------------------------------------------------------- /main.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "ns/noise_suppressor.h" 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include "timing.h" 20 | 21 | //采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解码 22 | #define DR_WAV_IMPLEMENTATION 23 | 24 | #include "dr_wav.h" 25 | 26 | #ifndef nullptr 27 | #define nullptr 0 28 | #endif 29 | 30 | #ifndef MIN 31 | #define MIN(A, B) ((A) < (B) ? (A) : (B)) 32 | #endif 33 | 34 | #ifndef MAX 35 | #define MAX(A, B) ((A) > (B) ? (A) : (B)) 36 | #endif 37 | 38 | //写wav文件 39 | void wavWrite_s16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount, unsigned int channels) { 40 | drwav_data_format format; 41 | format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64. 42 | format.channels = channels; 43 | format.sampleRate = (drwav_uint32) sampleRate; 44 | format.bitsPerSample = sizeof(*buffer) * 8; 45 | format.format = DR_WAVE_FORMAT_PCM; 46 | drwav wav; 47 | drwav_init_file_write(&wav, filename, &format, NULL); 48 | drwav_uint64 samplesWritten = drwav_write_pcm_frames(&wav, totalSampleCount, buffer); 49 | drwav_uninit(&wav); 50 | if (samplesWritten != totalSampleCount) { 51 | fprintf(stderr, "ERROR\n"); 52 | exit(1); 53 | 54 | } 55 | } 56 | 57 | //读取wav文件 58 | short *wavRead_s16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount, unsigned int *channels) { 59 | short *buffer = drwav_open_file_and_read_pcm_frames_s16(filename, channels, sampleRate, totalSampleCount, NULL); 60 | if (buffer == NULL) { 61 | printf("ERROR."); 62 | } 63 | return buffer; 64 | } 65 | 66 | //分割路径函数 67 | void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) { 68 | const char *end; 69 | const char *p; 70 | const char *s; 71 | if (path[0] && path[1] == ':') { 72 | if (drv) { 73 | *drv++ = *path++; 74 | *drv++ = *path++; 75 | *drv = '\0'; 76 | } 77 | } else if (drv) 78 | *drv = '\0'; 79 | for (end = path; *end && *end != ':';) 80 | end++; 81 | for (p = end; p > path && *--p != '\\' && *p != '/';) 82 | if (*p == '.') { 83 | end = p; 84 | break; 85 | } 86 | if (ext) 87 | for (s = end; (*ext = *s++);) 88 | ext++; 89 | for (p = end; p > path;) 90 | if (*--p == '\\' || *p == '/') { 91 | p++; 92 | break; 93 | } 94 | if (name) { 95 | for (s = p; s < end;) 96 | *name++ = *s++; 97 | *name = '\0'; 98 | } 99 | if (dir) { 100 | for (s = path; s < p;) 101 | *dir++ = *s++; 102 | *dir = '\0'; 103 | } 104 | } 105 | 106 | using namespace webrtc; 107 | 108 | int nsProc(short *input, size_t SampleCount, size_t sampleRate, int num_channels) { 109 | AudioBuffer audio(sampleRate, num_channels, sampleRate, num_channels, sampleRate, 110 | num_channels); 111 | StreamConfig stream_config(sampleRate, num_channels); 112 | NsConfig cfg; 113 | /* 114 | * NsConfig::SuppressionLevel::k6dB 115 | * NsConfig::SuppressionLevel::k12dB 116 | * NsConfig::SuppressionLevel::k18dB 117 | * NsConfig::SuppressionLevel::k21dB 118 | */ 119 | // cfg.target_level = NsConfig::SuppressionLevel::k21dB; 120 | NoiseSuppressor ns(cfg, sampleRate, num_channels); 121 | short *buffer = input; 122 | bool split_bands = sampleRate > 16000; 123 | uint64_t frames = (SampleCount / stream_config.num_samples()); 124 | for (size_t frame_index = 0; frame_index < frames; ++frame_index) { 125 | audio.CopyFrom(buffer, stream_config); 126 | if (split_bands) { 127 | audio.SplitIntoFrequencyBands(); 128 | } 129 | ns.Analyze(audio); 130 | ns.Process(&audio); 131 | if (split_bands) { 132 | audio.MergeFrequencyBands(); 133 | } 134 | audio.CopyTo(stream_config, buffer); 135 | buffer += stream_config.num_samples(); 136 | } 137 | return 0; 138 | } 139 | 140 | void WebRtc_DeNoise(char *in_file, char *out_file) { 141 | uint32_t sampleRate = 0; 142 | uint64_t nSampleCount = 0; 143 | uint32_t channels = 1; 144 | short *data_in = wavRead_s16(in_file, &sampleRate, &nSampleCount, &channels); 145 | if (data_in != NULL) { 146 | double startTime = now(); 147 | short *data_out = (short *) calloc(nSampleCount, sizeof(short)); 148 | if (data_out != NULL) { 149 | nsProc(data_in, nSampleCount, sampleRate, channels); 150 | double time_interval = calcElapsed(startTime, now()); 151 | printf("time interval: %d ms\n ", (int) (time_interval * 1000)); 152 | wavWrite_s16(out_file, data_in, sampleRate, (uint32_t) nSampleCount, channels); 153 | free(data_out); 154 | } 155 | free(data_in); 156 | } 157 | } 158 | 159 | 160 | int main(int argc, char *argv[]) { 161 | printf("webrtc noise suppressor\n"); 162 | printf("blog:http://cpuimage.cnblogs.com/\n"); 163 | printf("email:gaozhihan@vip.qq.com\n"); 164 | if (argc < 2) { 165 | printf("usage:\n"); 166 | printf("./webrtc_ns input.wav\n"); 167 | printf("or\n"); 168 | printf("./webrtc_ns input.wav output.wav\n"); 169 | return -1; 170 | } 171 | char *in_file = argv[1]; 172 | 173 | if (argc > 2) { 174 | char *out_file = argv[2]; 175 | WebRtc_DeNoise(in_file, out_file); 176 | } else { 177 | char drive[3]; 178 | char dir[256]; 179 | char fname[256]; 180 | char ext[256]; 181 | char out_file[1024]; 182 | splitpath(in_file, drive, dir, fname, ext); 183 | sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext); 184 | WebRtc_DeNoise(in_file, out_file); 185 | } 186 | printf("press any key to exit.\n"); 187 | getchar(); 188 | return 0; 189 | } 190 | 191 | -------------------------------------------------------------------------------- /ns/aligned_malloc.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "aligned_malloc.h" 12 | 13 | #include // for free, malloc 14 | #include // for memcpy 15 | 16 | #include "checks.h" 17 | 18 | #ifdef _WIN32 19 | 20 | #include 21 | 22 | #else 23 | #include 24 | #endif 25 | 26 | // Reference on memory alignment: 27 | // http://stackoverflow.com/questions/227897/solve-the-memory-alignment-in-c-interview-question-that-stumped-me 28 | namespace webrtc { 29 | 30 | uintptr_t GetRightAlign(uintptr_t start_pos, size_t alignment) { 31 | // The pointer should be aligned with |alignment| bytes. The - 1 guarantees 32 | // that it is aligned towards the closest higher (right) address. 33 | return (start_pos + alignment - 1) & ~(alignment - 1); 34 | } 35 | 36 | // Alignment must be an integer power of two. 37 | bool ValidAlignment(size_t alignment) { 38 | if (!alignment) { 39 | return false; 40 | } 41 | return (alignment & (alignment - 1)) == 0; 42 | } 43 | 44 | void *GetRightAlign(const void *pointer, size_t alignment) { 45 | if (!pointer) { 46 | return NULL; 47 | } 48 | if (!ValidAlignment(alignment)) { 49 | return NULL; 50 | } 51 | uintptr_t start_pos = reinterpret_cast(pointer); 52 | return reinterpret_cast(GetRightAlign(start_pos, alignment)); 53 | } 54 | 55 | void *AlignedMalloc(size_t size, size_t alignment) { 56 | if (size == 0) { 57 | return NULL; 58 | } 59 | if (!ValidAlignment(alignment)) { 60 | return NULL; 61 | } 62 | 63 | // The memory is aligned towards the lowest address that so only 64 | // alignment - 1 bytes needs to be allocated. 65 | // A pointer to the start of the memory must be stored so that it can be 66 | // retreived for deletion, ergo the sizeof(uintptr_t). 67 | void *memory_pointer = malloc(size + sizeof(uintptr_t) + alignment - 1); 68 | RTC_CHECK(memory_pointer); 69 | if (memory_pointer == NULL) 70 | return NULL; 71 | // Aligning after the sizeof(uintptr_t) bytes will leave room for the header 72 | // in the same memory block. 73 | uintptr_t align_start_pos = reinterpret_cast(memory_pointer); 74 | align_start_pos += sizeof(uintptr_t); 75 | uintptr_t aligned_pos = GetRightAlign(align_start_pos, alignment); 76 | void *aligned_pointer = reinterpret_cast(aligned_pos); 77 | 78 | // Store the address to the beginning of the memory just before the aligned 79 | // memory. 80 | uintptr_t header_pos = aligned_pos - sizeof(uintptr_t); 81 | void *header_pointer = reinterpret_cast(header_pos); 82 | uintptr_t memory_start = reinterpret_cast(memory_pointer); 83 | memcpy(header_pointer, &memory_start, sizeof(uintptr_t)); 84 | 85 | return aligned_pointer; 86 | } 87 | 88 | void AlignedFree(void *mem_block) { 89 | if (mem_block == NULL) { 90 | return; 91 | } 92 | uintptr_t aligned_pos = reinterpret_cast(mem_block); 93 | uintptr_t header_pos = aligned_pos - sizeof(uintptr_t); 94 | 95 | // Read out the address of the AlignedMemory struct from the header. 96 | uintptr_t memory_start_pos = *reinterpret_cast(header_pos); 97 | void *memory_start = reinterpret_cast(memory_start_pos); 98 | free(memory_start); 99 | } 100 | 101 | } // namespace webrtc 102 | -------------------------------------------------------------------------------- /ns/aligned_malloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_MEMORY_ALIGNED_MALLOC_H_ 12 | #define RTC_BASE_MEMORY_ALIGNED_MALLOC_H_ 13 | 14 | // The functions declared here 15 | // 1) Allocates block of aligned memory. 16 | // 2) Re-calculates a pointer such that it is aligned to a higher or equal 17 | // address. 18 | // Note: alignment must be a power of two. The alignment is in bytes. 19 | 20 | #include 21 | 22 | namespace webrtc { 23 | 24 | // Returns a pointer to the first boundry of |alignment| bytes following the 25 | // address of |ptr|. 26 | // Note that there is no guarantee that the memory in question is available. 27 | // |ptr| has no requirements other than it can't be NULL. 28 | void *GetRightAlign(const void *ptr, size_t alignment); 29 | 30 | // Allocates memory of |size| bytes aligned on an |alignment| boundry. 31 | // The return value is a pointer to the memory. Note that the memory must 32 | // be de-allocated using AlignedFree. 33 | void *AlignedMalloc(size_t size, size_t alignment); 34 | 35 | // De-allocates memory created using the AlignedMalloc() API. 36 | void AlignedFree(void *mem_block); 37 | 38 | // Templated versions to facilitate usage of aligned malloc without casting 39 | // to and from void*. 40 | template 41 | T *GetRightAlign(const T *ptr, size_t alignment) { 42 | return reinterpret_cast( 43 | GetRightAlign(reinterpret_cast(ptr), alignment)); 44 | } 45 | 46 | template 47 | T *AlignedMalloc(size_t size, size_t alignment) { 48 | return reinterpret_cast(AlignedMalloc(size, alignment)); 49 | } 50 | 51 | // Deleter for use with unique_ptr. E.g., use as 52 | // std::unique_ptr foo; 53 | struct AlignedFreeDeleter { 54 | inline void operator()(void *ptr) const { AlignedFree(ptr); } 55 | }; 56 | 57 | } // namespace webrtc 58 | 59 | #endif // RTC_BASE_MEMORY_ALIGNED_MALLOC_H_ 60 | -------------------------------------------------------------------------------- /ns/array_view.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef API_ARRAY_VIEW_H_ 12 | #define API_ARRAY_VIEW_H_ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "checks.h" 19 | #include "type_traits.h" 20 | 21 | namespace rtc { 22 | 23 | // tl;dr: rtc::ArrayView is the same thing as gsl::span from the Guideline 24 | // Support Library. 25 | // 26 | // Many functions read from or write to arrays. The obvious way to do this is 27 | // to use two arguments, a pointer to the first element and an element count: 28 | // 29 | // bool Contains17(const int* arr, size_t size) { 30 | // for (size_t i = 0; i < size; ++i) { 31 | // if (arr[i] == 17) 32 | // return true; 33 | // } 34 | // return false; 35 | // } 36 | // 37 | // This is flexible, since it doesn't matter how the array is stored (C array, 38 | // std::vector, rtc::Buffer, ...), but it's error-prone because the caller has 39 | // to correctly specify the array length: 40 | // 41 | // Contains17(arr, arraysize(arr)); // C array 42 | // Contains17(arr.data(), arr.size()); // std::vector 43 | // Contains17(arr, size); // pointer + size 44 | // ... 45 | // 46 | // It's also kind of messy to have two separate arguments for what is 47 | // conceptually a single thing. 48 | // 49 | // Enter rtc::ArrayView. It contains a T pointer (to an array it doesn't 50 | // own) and a count, and supports the basic things you'd expect, such as 51 | // indexing and iteration. It allows us to write our function like this: 52 | // 53 | // bool Contains17(rtc::ArrayView arr) { 54 | // for (auto e : arr) { 55 | // if (e == 17) 56 | // return true; 57 | // } 58 | // return false; 59 | // } 60 | // 61 | // And even better, because a bunch of things will implicitly convert to 62 | // ArrayView, we can call it like this: 63 | // 64 | // Contains17(arr); // C array 65 | // Contains17(arr); // std::vector 66 | // Contains17(rtc::ArrayView(arr, size)); // pointer + size 67 | // Contains17(nullptr); // nullptr -> empty ArrayView 68 | // ... 69 | // 70 | // ArrayView stores both a pointer and a size, but you may also use 71 | // ArrayView, which has a size that's fixed at compile time (which means 72 | // it only has to store the pointer). 73 | // 74 | // One important point is that ArrayView and ArrayView are 75 | // different types, which allow and don't allow mutation of the array elements, 76 | // respectively. The implicit conversions work just like you'd hope, so that 77 | // e.g. vector will convert to either ArrayView or ArrayView, but const vector will convert only to ArrayView. 79 | // (ArrayView itself can be the source type in such conversions, so 80 | // ArrayView will convert to ArrayView.) 81 | // 82 | // Note: ArrayView is tiny (just a pointer and a count if variable-sized, just 83 | // a pointer if fix-sized) and trivially copyable, so it's probably cheaper to 84 | // pass it by value than by const reference. 85 | 86 | namespace impl { 87 | 88 | // Magic constant for indicating that the size of an ArrayView is variable 89 | // instead of fixed. 90 | enum : std::ptrdiff_t { 91 | kArrayViewVarSize = -4711 92 | }; 93 | 94 | // Base class for ArrayViews of fixed nonzero size. 95 | template 96 | class ArrayViewBase { 97 | static_assert(Size > 0, "ArrayView size must be variable or non-negative"); 98 | 99 | public: 100 | ArrayViewBase(T *data, size_t size) : data_(data) {} 101 | 102 | static constexpr size_t size() { return Size; } 103 | 104 | static constexpr bool empty() { return false; } 105 | 106 | T *data() const { return data_; } 107 | 108 | protected: 109 | static constexpr bool fixed_size() { return true; } 110 | 111 | private: 112 | T *data_; 113 | }; 114 | 115 | // Specialized base class for ArrayViews of fixed zero size. 116 | template 117 | class ArrayViewBase { 118 | public: 119 | explicit ArrayViewBase(T *data, size_t size) {} 120 | 121 | static constexpr size_t size() { return 0; } 122 | 123 | static constexpr bool empty() { return true; } 124 | 125 | T *data() const { return nullptr; } 126 | 127 | protected: 128 | static constexpr bool fixed_size() { return true; } 129 | }; 130 | 131 | // Specialized base class for ArrayViews of variable size. 132 | template 133 | class ArrayViewBase { 134 | public: 135 | ArrayViewBase(T *data, size_t size) 136 | : data_(size == 0 ? nullptr : data), size_(size) {} 137 | 138 | size_t size() const { return size_; } 139 | 140 | bool empty() const { return size_ == 0; } 141 | 142 | T *data() const { return data_; } 143 | 144 | protected: 145 | static constexpr bool fixed_size() { return false; } 146 | 147 | private: 148 | T *data_; 149 | size_t size_; 150 | }; 151 | 152 | } // namespace impl 153 | 154 | template 155 | class ArrayView final : public impl::ArrayViewBase { 156 | public: 157 | using value_type = T; 158 | using const_iterator = const T *; 159 | 160 | // Construct an ArrayView from a pointer and a length. 161 | template 162 | ArrayView(U *data, size_t size) 163 | : impl::ArrayViewBase::ArrayViewBase(data, size) { 164 | RTC_DCHECK_EQ(size == 0 ? nullptr : data, this->data()); 165 | RTC_DCHECK_EQ(size, this->size()); 166 | RTC_DCHECK_EQ(!this->data(), 167 | this->size() == 0); // data is null iff size == 0. 168 | } 169 | 170 | // Construct an empty ArrayView. Note that fixed-size ArrayViews of size > 0 171 | // cannot be empty. 172 | ArrayView() : ArrayView(nullptr, 0) {} 173 | 174 | ArrayView(std::nullptr_t) // NOLINT 175 | : ArrayView() {} 176 | 177 | ArrayView(std::nullptr_t, size_t size) 178 | : ArrayView(static_cast(nullptr), size) { 179 | static_assert(Size == 0 || Size == impl::kArrayViewVarSize, ""); 180 | RTC_DCHECK_EQ(0, size); 181 | } 182 | 183 | // Construct an ArrayView from a C-style array. 184 | template 185 | ArrayView(U (&array)[N]) // NOLINT 186 | : ArrayView(array, N) { 187 | static_assert(Size == N || Size == impl::kArrayViewVarSize, 188 | "Array size must match ArrayView size"); 189 | } 190 | 191 | // (Only if size is fixed.) Construct a fixed size ArrayView from a 192 | // non-const std::array instance. For an ArrayView with variable size, the 193 | // used ctor is ArrayView(U& u) instead. 194 | template(N)>::type * = nullptr> 198 | ArrayView(std::array &u) // NOLINT 199 | : ArrayView(u.data(), u.size()) {} 200 | 201 | // (Only if size is fixed.) Construct a fixed size ArrayView where T is 202 | // const from a const(expr) std::array instance. For an ArrayView with 203 | // variable size, the used ctor is ArrayView(U& u) instead. 204 | template(N)>::type * = nullptr> 208 | ArrayView(const std::array &u) // NOLINT 209 | : ArrayView(u.data(), u.size()) {} 210 | 211 | // (Only if size is fixed.) Construct an ArrayView from any type U that has a 212 | // static constexpr size() method whose return value is equal to Size, and a 213 | // data() method whose return value converts implicitly to T*. In particular, 214 | // this means we allow conversion from ArrayView to ArrayView, but not the other way around. We also don't allow conversion from 216 | // ArrayView to ArrayView, or from ArrayView to ArrayView when M != N. 218 | template< 219 | typename U, 220 | typename std::enable_if::value>::type * = nullptr> 222 | ArrayView(U &u) // NOLINT 223 | : ArrayView(u.data(), u.size()) { 224 | static_assert(U::size() == Size, "Sizes must match exactly"); 225 | } 226 | 227 | template< 228 | typename U, 229 | typename std::enable_if::value>::type * = nullptr> 231 | ArrayView(const U &u) // NOLINT(runtime/explicit) 232 | : ArrayView(u.data(), u.size()) { 233 | static_assert(U::size() == Size, "Sizes must match exactly"); 234 | } 235 | 236 | // (Only if size is variable.) Construct an ArrayView from any type U that 237 | // has a size() method whose return value converts implicitly to size_t, and 238 | // a data() method whose return value converts implicitly to T*. In 239 | // particular, this means we allow conversion from ArrayView to 240 | // ArrayView, but not the other way around. Other allowed 241 | // conversions include 242 | // ArrayView to ArrayView or ArrayView, 243 | // std::vector to ArrayView or ArrayView, 244 | // const std::vector to ArrayView, 245 | // rtc::Buffer to ArrayView or ArrayView, and 246 | // const rtc::Buffer to ArrayView. 247 | template< 248 | typename U, 249 | typename std::enable_if::value>::type * = nullptr> 251 | ArrayView(U &u) // NOLINT 252 | : ArrayView(u.data(), u.size()) {} 253 | 254 | template< 255 | typename U, 256 | typename std::enable_if::value>::type * = nullptr> 258 | ArrayView(const U &u) // NOLINT(runtime/explicit) 259 | : ArrayView(u.data(), u.size()) {} 260 | 261 | // Indexing and iteration. These allow mutation even if the ArrayView is 262 | // const, because the ArrayView doesn't own the array. (To prevent mutation, 263 | // use a const element type.) 264 | T &operator[](size_t idx) const { 265 | RTC_DCHECK_LT(idx, this->size()); 266 | RTC_DCHECK(this->data()); 267 | return this->data()[idx]; 268 | } 269 | 270 | T *begin() const { return this->data(); } 271 | 272 | T *end() const { return this->data() + this->size(); } 273 | 274 | const T *cbegin() const { return this->data(); } 275 | 276 | const T *cend() const { return this->data() + this->size(); } 277 | 278 | ArrayView subview(size_t offset, size_t size) const { 279 | return offset < this->size() 280 | ? ArrayView(this->data() + offset, 281 | std::min(size, this->size() - offset)) 282 | : ArrayView(); 283 | } 284 | 285 | ArrayView subview(size_t offset) const { 286 | return subview(offset, this->size()); 287 | } 288 | }; 289 | 290 | // Comparing two ArrayViews compares their (pointer,size) pairs; it does *not* 291 | // dereference the pointers. 292 | template 293 | bool operator==(const ArrayView &a, const ArrayView &b) { 294 | return a.data() == b.data() && a.size() == b.size(); 295 | } 296 | 297 | template 298 | bool operator!=(const ArrayView &a, const ArrayView &b) { 299 | return !(a == b); 300 | } 301 | 302 | // Variable-size ArrayViews are the size of two pointers; fixed-size ArrayViews 303 | // are the size of one pointer. (And as a special case, fixed-size ArrayViews 304 | // of size 0 require no storage.) 305 | static_assert(sizeof(ArrayView) == 2 * sizeof(int *), ""); 306 | static_assert(sizeof(ArrayView) == sizeof(int *), ""); 307 | static_assert(std::is_empty>::value, ""); 308 | 309 | template 310 | inline ArrayView MakeArrayView(T *data, size_t size) { 311 | return ArrayView(data, size); 312 | } 313 | 314 | // Only for primitive types that have the same size and aligment. 315 | // Allow reinterpret cast of the array view to another primitive type of the 316 | // same size. 317 | // Template arguments order is (U, T, Size) to allow deduction of the template 318 | // arguments in client calls: reinterpret_array_view(array_view). 319 | template 320 | inline ArrayView reinterpret_array_view(ArrayView view) { 321 | static_assert(sizeof(U) == sizeof(T) && alignof(U) == alignof(T), 322 | "ArrayView reinterpret_cast is only supported for casting " 323 | "between views that represent the same chunk of memory."); 324 | static_assert( 325 | std::is_fundamental::value && std::is_fundamental::value, 326 | "ArrayView reinterpret_cast is only supported for casting between " 327 | "fundamental types."); 328 | return ArrayView(reinterpret_cast(view.data()), view.size()); 329 | } 330 | 331 | } // namespace rtc 332 | 333 | #endif // API_ARRAY_VIEW_H_ 334 | -------------------------------------------------------------------------------- /ns/audio_buffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ 12 | #define MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | 20 | #include "channel_buffer.h" 21 | //#include "audio_processing.h" 22 | 23 | namespace webrtc { 24 | 25 | class StreamConfig { 26 | public: 27 | // sample_rate_hz: The sampling rate of the stream. 28 | // 29 | // num_channels: The number of audio channels in the stream, excluding the 30 | // keyboard channel if it is present. When passing a 31 | // StreamConfig with an array of arrays T*[N], 32 | // 33 | // N == {num_channels + 1 if has_keyboard 34 | // {num_channels if !has_keyboard 35 | // 36 | // has_keyboard: True if the stream has a keyboard channel. When has_keyboard 37 | // is true, the last channel in any corresponding list of 38 | // channels is the keyboard channel. 39 | StreamConfig(int sample_rate_hz = 0, 40 | size_t num_channels = 0, 41 | bool has_keyboard = false) 42 | : sample_rate_hz_(sample_rate_hz), 43 | num_channels_(num_channels), 44 | has_keyboard_(has_keyboard), 45 | num_frames_(calculate_frames(sample_rate_hz)) {} 46 | 47 | void set_sample_rate_hz(int value) { 48 | sample_rate_hz_ = value; 49 | num_frames_ = calculate_frames(value); 50 | } 51 | 52 | void set_num_channels(size_t value) { num_channels_ = value; } 53 | 54 | void set_has_keyboard(bool value) { has_keyboard_ = value; } 55 | 56 | int sample_rate_hz() const { return sample_rate_hz_; } 57 | 58 | // The number of channels in the stream, not including the keyboard channel if 59 | // present. 60 | size_t num_channels() const { return num_channels_; } 61 | 62 | bool has_keyboard() const { return has_keyboard_; } 63 | 64 | size_t num_frames() const { return num_frames_; } 65 | 66 | size_t num_samples() const { return num_channels_ * num_frames_; } 67 | 68 | bool operator==(const StreamConfig &other) const { 69 | return sample_rate_hz_ == other.sample_rate_hz_ && 70 | num_channels_ == other.num_channels_ && 71 | has_keyboard_ == other.has_keyboard_; 72 | } 73 | 74 | bool operator!=(const StreamConfig &other) const { return !(*this == other); } 75 | 76 | private: 77 | 78 | static const int kChunkSizeMs = 10; 79 | 80 | static size_t calculate_frames(int sample_rate_hz) { 81 | return static_cast( kChunkSizeMs * sample_rate_hz / 82 | 1000); 83 | } 84 | 85 | int sample_rate_hz_; 86 | size_t num_channels_; 87 | bool has_keyboard_; 88 | size_t num_frames_; 89 | }; 90 | 91 | class PushSincResampler; 92 | 93 | class SplittingFilter; 94 | 95 | enum Band { 96 | kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 97 | }; 98 | 99 | // Stores any audio data in a way that allows the audio processing module to 100 | // operate on it in a controlled manner. 101 | class AudioBuffer { 102 | public: 103 | static const int kSplitBandSize = 160; 104 | static const size_t kMaxSampleRate = 384000; 105 | 106 | AudioBuffer(size_t input_rate, 107 | size_t input_num_channels, 108 | size_t buffer_rate, 109 | size_t buffer_num_channels, 110 | size_t output_rate, 111 | size_t output_num_channels); 112 | 113 | // The constructor below will be deprecated. 114 | AudioBuffer(size_t input_num_frames, 115 | size_t input_num_channels, 116 | size_t buffer_num_frames, 117 | size_t buffer_num_channels, 118 | size_t output_num_frames); 119 | 120 | virtual ~AudioBuffer(); 121 | 122 | AudioBuffer(const AudioBuffer &) = delete; 123 | 124 | AudioBuffer &operator=(const AudioBuffer &) = delete; 125 | 126 | // Specify that downmixing should be done by selecting a single channel. 127 | void set_downmixing_to_specific_channel(size_t channel); 128 | 129 | // Specify that downmixing should be done by averaging all channels,. 130 | void set_downmixing_by_averaging(); 131 | 132 | // Set the number of channels in the buffer. The specified number of channels 133 | // cannot be larger than the specified buffer_num_channels. The number is also 134 | // reset at each call to CopyFrom or InterleaveFrom. 135 | void set_num_channels(size_t num_channels); 136 | 137 | size_t num_channels() const { return num_channels_; } 138 | 139 | size_t num_frames() const { return buffer_num_frames_; } 140 | 141 | size_t num_frames_per_band() const { return num_split_frames_; } 142 | 143 | size_t num_bands() const { return num_bands_; } 144 | 145 | // Returns pointer arrays to the full-band channels. 146 | // Usage: 147 | // channels()[channel][sample]. 148 | // Where: 149 | // 0 <= channel < |buffer_num_channels_| 150 | // 0 <= sample < |buffer_num_frames_| 151 | float *const *channels() { return data_->channels(); } 152 | 153 | const float *const *channels_const() const { return data_->channels(); } 154 | 155 | // Returns pointer arrays to the bands for a specific channel. 156 | // Usage: 157 | // split_bands(channel)[band][sample]. 158 | // Where: 159 | // 0 <= channel < |buffer_num_channels_| 160 | // 0 <= band < |num_bands_| 161 | // 0 <= sample < |num_split_frames_| 162 | const float *const *split_bands_const(size_t channel) const { 163 | return split_data_.get() ? split_data_->bands(channel) 164 | : data_->bands(channel); 165 | } 166 | 167 | float *const *split_bands(size_t channel) { 168 | return split_data_.get() ? split_data_->bands(channel) 169 | : data_->bands(channel); 170 | } 171 | 172 | // Returns a pointer array to the channels for a specific band. 173 | // Usage: 174 | // split_channels(band)[channel][sample]. 175 | // Where: 176 | // 0 <= band < |num_bands_| 177 | // 0 <= channel < |buffer_num_channels_| 178 | // 0 <= sample < |num_split_frames_| 179 | const float *const *split_channels_const(Band band) const { 180 | if (split_data_.get()) { 181 | return split_data_->channels(band); 182 | } else { 183 | return band == kBand0To8kHz ? data_->channels() : nullptr; 184 | } 185 | } 186 | 187 | // Copies data into the buffer. 188 | void CopyFrom(const int16_t *const interleaved_data, 189 | const StreamConfig &stream_config); 190 | 191 | void CopyFrom(const float *const *stacked_data, 192 | const StreamConfig &stream_config); 193 | 194 | // Copies data from the buffer. 195 | void CopyTo(const StreamConfig &stream_config, 196 | int16_t *const interleaved_data); 197 | 198 | void CopyTo(const StreamConfig &stream_config, float *const *stacked_data); 199 | 200 | void CopyTo(AudioBuffer *buffer) const; 201 | 202 | // Splits the buffer data into frequency bands. 203 | void SplitIntoFrequencyBands(); 204 | 205 | // Recombines the frequency bands into a full-band signal. 206 | void MergeFrequencyBands(); 207 | 208 | // Copies the split bands data into the integer two-dimensional array. 209 | void ExportSplitChannelData(size_t channel, 210 | int16_t *const *split_band_data) const; 211 | 212 | // Copies the data in the integer two-dimensional array into the split_bands 213 | // data. 214 | void ImportSplitChannelData(size_t channel, 215 | const int16_t *const *split_band_data); 216 | 217 | static const size_t kMaxSplitFrameLength = 160; 218 | static const size_t kMaxNumBands = 3; 219 | 220 | // Deprecated methods, will be removed soon. 221 | float *const *channels_f() { return channels(); } 222 | 223 | const float *const *channels_const_f() const { return channels_const(); } 224 | 225 | const float *const *split_bands_const_f(size_t channel) const { 226 | return split_bands_const(channel); 227 | } 228 | 229 | float *const *split_bands_f(size_t channel) { return split_bands(channel); } 230 | 231 | const float *const *split_channels_const_f(Band band) const { 232 | return split_channels_const(band); 233 | } 234 | 235 | private: 236 | FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, 237 | SetNumChannelsSetsChannelBuffersNumChannels); 238 | 239 | void RestoreNumChannels(); 240 | 241 | const size_t input_num_frames_; 242 | const size_t input_num_channels_; 243 | const size_t buffer_num_frames_; 244 | const size_t buffer_num_channels_; 245 | const size_t output_num_frames_; 246 | const size_t output_num_channels_; 247 | 248 | size_t num_channels_; 249 | size_t num_bands_; 250 | size_t num_split_frames_; 251 | 252 | std::unique_ptr> data_; 253 | std::unique_ptr> split_data_; 254 | std::unique_ptr splitting_filter_; 255 | std::vector> input_resamplers_; 256 | std::vector> output_resamplers_; 257 | bool downmix_by_averaging_ = true; 258 | size_t channel_for_downmixing_ = 0; 259 | }; 260 | 261 | } // namespace webrtc 262 | 263 | #endif // MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ 264 | -------------------------------------------------------------------------------- /ns/audio_util.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "audio_util.h" 12 | 13 | namespace webrtc { 14 | 15 | void FloatToS16(const float *src, size_t size, int16_t *dest) { 16 | for (size_t i = 0; i < size; ++i) 17 | dest[i] = FloatToS16(src[i]); 18 | } 19 | 20 | void S16ToFloat(const int16_t *src, size_t size, float *dest) { 21 | for (size_t i = 0; i < size; ++i) 22 | dest[i] = S16ToFloat(src[i]); 23 | } 24 | 25 | void S16ToFloatS16(const int16_t *src, size_t size, float *dest) { 26 | for (size_t i = 0; i < size; ++i) 27 | dest[i] = src[i]; 28 | } 29 | 30 | void FloatS16ToS16(const float *src, size_t size, int16_t *dest) { 31 | for (size_t i = 0; i < size; ++i) 32 | dest[i] = FloatS16ToS16(src[i]); 33 | } 34 | 35 | void FloatToFloatS16(const float *src, size_t size, float *dest) { 36 | for (size_t i = 0; i < size; ++i) 37 | dest[i] = FloatToFloatS16(src[i]); 38 | } 39 | 40 | void FloatS16ToFloat(const float *src, size_t size, float *dest) { 41 | for (size_t i = 0; i < size; ++i) 42 | dest[i] = FloatS16ToFloat(src[i]); 43 | } 44 | 45 | template<> 46 | void DownmixInterleavedToMono(const int16_t *interleaved, 47 | size_t num_frames, 48 | int num_channels, 49 | int16_t *deinterleaved) { 50 | DownmixInterleavedToMonoImpl(interleaved, num_frames, 51 | num_channels, deinterleaved); 52 | } 53 | 54 | } // namespace webrtc 55 | -------------------------------------------------------------------------------- /ns/audio_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ 12 | #define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ 13 | 14 | #include 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "checks.h" 22 | 23 | namespace webrtc { 24 | 25 | typedef std::numeric_limits limits_int16; 26 | 27 | // The conversion functions use the following naming convention: 28 | // S16: int16_t [-32768, 32767] 29 | // Float: float [-1.0, 1.0] 30 | // FloatS16: float [-32768.0, 32768.0] 31 | // Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0] 32 | // The ratio conversion functions use this naming convention: 33 | // Ratio: float (0, +inf) 34 | // Db: float (-inf, +inf) 35 | static inline float S16ToFloat(int16_t v) { 36 | constexpr float kScaling = 1.f / 32768.f; 37 | return v * kScaling; 38 | } 39 | 40 | static inline int16_t FloatS16ToS16(float v) { 41 | v = std::min(v, 32767.f); 42 | v = std::max(v, -32768.f); 43 | return static_cast(v + std::copysign(0.5f, v)); 44 | } 45 | 46 | static inline int16_t FloatToS16(float v) { 47 | v *= 32768.f; 48 | v = std::min(v, 32767.f); 49 | v = std::max(v, -32768.f); 50 | return static_cast(v + std::copysign(0.5f, v)); 51 | } 52 | 53 | static inline float FloatToFloatS16(float v) { 54 | v = std::min(v, 1.f); 55 | v = std::max(v, -1.f); 56 | return v * 32768.f; 57 | } 58 | 59 | static inline float FloatS16ToFloat(float v) { 60 | v = std::min(v, 32768.f); 61 | v = std::max(v, -32768.f); 62 | constexpr float kScaling = 1.f / 32768.f; 63 | return v * kScaling; 64 | } 65 | 66 | void FloatToS16(const float *src, size_t size, int16_t *dest); 67 | 68 | void S16ToFloat(const int16_t *src, size_t size, float *dest); 69 | 70 | void S16ToFloatS16(const int16_t *src, size_t size, float *dest); 71 | 72 | void FloatS16ToS16(const float *src, size_t size, int16_t *dest); 73 | 74 | void FloatToFloatS16(const float *src, size_t size, float *dest); 75 | 76 | void FloatS16ToFloat(const float *src, size_t size, float *dest); 77 | 78 | inline float DbToRatio(float v) { 79 | return std::pow(10.0f, v / 20.0f); 80 | } 81 | 82 | inline float DbfsToFloatS16(float v) { 83 | static constexpr float kMaximumAbsFloatS16 = -limits_int16::min(); 84 | return DbToRatio(v) * kMaximumAbsFloatS16; 85 | } 86 | 87 | inline float FloatS16ToDbfs(float v) { 88 | RTC_DCHECK_GE(v, 0); 89 | 90 | // kMinDbfs is equal to -20.0 * log10(-limits_int16::min()) 91 | static constexpr float kMinDbfs = -90.30899869919436f; 92 | if (v <= 1.0f) { 93 | return kMinDbfs; 94 | } 95 | // Equal to 20 * log10(v / (-limits_int16::min())) 96 | return 20.0f * std::log10(v) + kMinDbfs; 97 | } 98 | 99 | // Copy audio from |src| channels to |dest| channels unless |src| and |dest| 100 | // point to the same address. |src| and |dest| must have the same number of 101 | // channels, and there must be sufficient space allocated in |dest|. 102 | template 103 | void CopyAudioIfNeeded(const T *const *src, 104 | int num_frames, 105 | int num_channels, 106 | T *const *dest) { 107 | for (int i = 0; i < num_channels; ++i) { 108 | if (src[i] != dest[i]) { 109 | std::copy(src[i], src[i] + num_frames, dest[i]); 110 | } 111 | } 112 | } 113 | 114 | // Deinterleave audio from |interleaved| to the channel buffers pointed to 115 | // by |deinterleaved|. There must be sufficient space allocated in the 116 | // |deinterleaved| buffers (|num_channel| buffers with |samples_per_channel| 117 | // per buffer). 118 | template 119 | void Deinterleave(const T *interleaved, 120 | size_t samples_per_channel, 121 | size_t num_channels, 122 | T *const *deinterleaved) { 123 | for (size_t i = 0; i < num_channels; ++i) { 124 | T *channel = deinterleaved[i]; 125 | size_t interleaved_idx = i; 126 | for (size_t j = 0; j < samples_per_channel; ++j) { 127 | channel[j] = interleaved[interleaved_idx]; 128 | interleaved_idx += num_channels; 129 | } 130 | } 131 | } 132 | 133 | // Interleave audio from the channel buffers pointed to by |deinterleaved| to 134 | // |interleaved|. There must be sufficient space allocated in |interleaved| 135 | // (|samples_per_channel| * |num_channels|). 136 | template 137 | void Interleave(const T *const *deinterleaved, 138 | size_t samples_per_channel, 139 | size_t num_channels, 140 | T *interleaved) { 141 | for (size_t i = 0; i < num_channels; ++i) { 142 | const T *channel = deinterleaved[i]; 143 | size_t interleaved_idx = i; 144 | for (size_t j = 0; j < samples_per_channel; ++j) { 145 | interleaved[interleaved_idx] = channel[j]; 146 | interleaved_idx += num_channels; 147 | } 148 | } 149 | } 150 | 151 | // Copies audio from a single channel buffer pointed to by |mono| to each 152 | // channel of |interleaved|. There must be sufficient space allocated in 153 | // |interleaved| (|samples_per_channel| * |num_channels|). 154 | template 155 | void UpmixMonoToInterleaved(const T *mono, 156 | int num_frames, 157 | int num_channels, 158 | T *interleaved) { 159 | int interleaved_idx = 0; 160 | for (int i = 0; i < num_frames; ++i) { 161 | for (int j = 0; j < num_channels; ++j) { 162 | interleaved[interleaved_idx++] = mono[i]; 163 | } 164 | } 165 | } 166 | 167 | template 168 | void DownmixToMono(const T *const *input_channels, 169 | size_t num_frames, 170 | int num_channels, 171 | T *out) { 172 | for (size_t i = 0; i < num_frames; ++i) { 173 | Intermediate value = input_channels[0][i]; 174 | for (int j = 1; j < num_channels; ++j) { 175 | value += input_channels[j][i]; 176 | } 177 | out[i] = value / num_channels; 178 | } 179 | } 180 | 181 | // Downmixes an interleaved multichannel signal to a single channel by averaging 182 | // all channels. 183 | template 184 | void DownmixInterleavedToMonoImpl(const T *interleaved, 185 | size_t num_frames, 186 | int num_channels, 187 | T *deinterleaved) { 188 | RTC_DCHECK_GT(num_channels, 0); 189 | RTC_DCHECK_GT(num_frames, 0); 190 | 191 | const T *const end = interleaved + num_frames * num_channels; 192 | 193 | while (interleaved < end) { 194 | const T *const frame_end = interleaved + num_channels; 195 | 196 | Intermediate value = *interleaved++; 197 | while (interleaved < frame_end) { 198 | value += *interleaved++; 199 | } 200 | 201 | *deinterleaved++ = value / num_channels; 202 | } 203 | } 204 | 205 | template 206 | void DownmixInterleavedToMono(const T *interleaved, 207 | size_t num_frames, 208 | int num_channels, 209 | T *deinterleaved); 210 | 211 | template<> 212 | void DownmixInterleavedToMono(const int16_t *interleaved, 213 | size_t num_frames, 214 | int num_channels, 215 | int16_t *deinterleaved); 216 | 217 | } // namespace webrtc 218 | 219 | #endif // COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ 220 | -------------------------------------------------------------------------------- /ns/channel_buffer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_CHANNEL_BUFFER_H_ 12 | #define COMMON_AUDIO_CHANNEL_BUFFER_H_ 13 | 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | #include "array_view.h" 20 | #include "audio_util.h" 21 | #include "checks.h" 22 | #include "gtest_prod_util.h" 23 | 24 | namespace webrtc { 25 | 26 | // Helper to encapsulate a contiguous data buffer, full or split into frequency 27 | // bands, with access to a pointer arrays of the deinterleaved channels and 28 | // bands. The buffer is zero initialized at creation. 29 | // 30 | // The buffer structure is showed below for a 2 channel and 2 bands case: 31 | // 32 | // |data_|: 33 | // { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] } 34 | // 35 | // The pointer arrays for the same example are as follows: 36 | // 37 | // |channels_|: 38 | // { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] } 39 | // 40 | // |bands_|: 41 | // { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] } 42 | template 43 | class ChannelBuffer { 44 | public: 45 | ChannelBuffer(size_t num_frames, size_t num_channels, size_t num_bands = 1) 46 | : data_(new T[num_frames * num_channels]()), 47 | channels_(new T *[num_channels * num_bands]), 48 | bands_(new T *[num_channels * num_bands]), 49 | num_frames_(num_frames), 50 | num_frames_per_band_(num_frames / num_bands), 51 | num_allocated_channels_(num_channels), 52 | num_channels_(num_channels), 53 | num_bands_(num_bands), 54 | bands_view_(num_allocated_channels_, 55 | std::vector>(num_bands_)), 56 | channels_view_( 57 | num_bands_, 58 | std::vector>(num_allocated_channels_)) { 59 | // Temporarily cast away const_ness to allow populating the array views. 60 | auto *bands_view = 61 | const_cast>> *>(&bands_view_); 62 | auto *channels_view = 63 | const_cast>> *>( 64 | &channels_view_); 65 | 66 | for (size_t ch = 0; ch < num_allocated_channels_; ++ch) { 67 | for (size_t band = 0; band < num_bands_; ++band) { 68 | (*channels_view)[band][ch] = rtc::ArrayView( 69 | &data_[ch * num_frames_ + band * num_frames_per_band_], 70 | num_frames_per_band_); 71 | (*bands_view)[ch][band] = channels_view_[band][ch]; 72 | channels_[band * num_allocated_channels_ + ch] = 73 | channels_view_[band][ch].data(); 74 | bands_[ch * num_bands_ + band] = 75 | channels_[band * num_allocated_channels_ + ch]; 76 | } 77 | } 78 | } 79 | 80 | // Returns a pointer array to the channels. 81 | // If band is explicitly specificed, the channels for a specific band are 82 | // returned and the usage becomes: channels(band)[channel][sample]. 83 | // Where: 84 | // 0 <= band < |num_bands_| 85 | // 0 <= channel < |num_allocated_channels_| 86 | // 0 <= sample < |num_frames_per_band_| 87 | 88 | // If band is not explicitly specified, the full-band channels (or lower band 89 | // channels) are returned and the usage becomes: channels()[channel][sample]. 90 | // Where: 91 | // 0 <= channel < |num_allocated_channels_| 92 | // 0 <= sample < |num_frames_| 93 | const T *const *channels(size_t band = 0) const { 94 | RTC_DCHECK_LT(band, num_bands_); 95 | return &channels_[band * num_allocated_channels_]; 96 | } 97 | 98 | T *const *channels(size_t band = 0) { 99 | const ChannelBuffer *t = this; 100 | return const_cast(t->channels(band)); 101 | } 102 | 103 | rtc::ArrayView> channels_view(size_t band = 0) { 104 | return channels_view_[band]; 105 | } 106 | 107 | rtc::ArrayView> channels_view(size_t band = 0) const { 108 | return channels_view_[band]; 109 | } 110 | 111 | // Returns a pointer array to the bands for a specific channel. 112 | // Usage: 113 | // bands(channel)[band][sample]. 114 | // Where: 115 | // 0 <= channel < |num_channels_| 116 | // 0 <= band < |num_bands_| 117 | // 0 <= sample < |num_frames_per_band_| 118 | const T *const *bands(size_t channel) const { 119 | RTC_DCHECK_LT(channel, num_channels_); 120 | RTC_DCHECK_GE(channel, 0); 121 | return &bands_[channel * num_bands_]; 122 | } 123 | 124 | T *const *bands(size_t channel) { 125 | const ChannelBuffer *t = this; 126 | return const_cast(t->bands(channel)); 127 | } 128 | 129 | rtc::ArrayView> bands_view(size_t channel) { 130 | return bands_view_[channel]; 131 | } 132 | 133 | rtc::ArrayView> bands_view(size_t channel) const { 134 | return bands_view_[channel]; 135 | } 136 | 137 | // Sets the |slice| pointers to the |start_frame| position for each channel. 138 | // Returns |slice| for convenience. 139 | const T *const *Slice(T **slice, size_t start_frame) const { 140 | RTC_DCHECK_LT(start_frame, num_frames_); 141 | for (size_t i = 0; i < num_channels_; ++i) 142 | slice[i] = &channels_[i][start_frame]; 143 | return slice; 144 | } 145 | 146 | T **Slice(T **slice, size_t start_frame) { 147 | const ChannelBuffer *t = this; 148 | return const_cast(t->Slice(slice, start_frame)); 149 | } 150 | 151 | size_t num_frames() const { return num_frames_; } 152 | 153 | size_t num_frames_per_band() const { return num_frames_per_band_; } 154 | 155 | size_t num_channels() const { return num_channels_; } 156 | 157 | size_t num_bands() const { return num_bands_; } 158 | 159 | size_t size() const { return num_frames_ * num_allocated_channels_; } 160 | 161 | void set_num_channels(size_t num_channels) { 162 | RTC_DCHECK_LE(num_channels, num_allocated_channels_); 163 | num_channels_ = num_channels; 164 | } 165 | 166 | void SetDataForTesting(const T *data, size_t size) { 167 | RTC_CHECK_EQ(size, this->size()); 168 | memcpy(data_.get(), data, size * sizeof(*data)); 169 | } 170 | 171 | private: 172 | std::unique_ptr data_; 173 | std::unique_ptr channels_; 174 | std::unique_ptr bands_; 175 | const size_t num_frames_; 176 | const size_t num_frames_per_band_; 177 | // Number of channels the internal buffer holds. 178 | const size_t num_allocated_channels_; 179 | // Number of channels the user sees. 180 | size_t num_channels_; 181 | const size_t num_bands_; 182 | const std::vector>> bands_view_; 183 | const std::vector>> channels_view_; 184 | }; 185 | 186 | } // namespace webrtc 187 | 188 | #endif // COMMON_AUDIO_CHANNEL_BUFFER_H_ 189 | -------------------------------------------------------------------------------- /ns/checks.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2006 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_CHECKS_H_ 12 | #define RTC_BASE_CHECKS_H_ 13 | 14 | # include 15 | // If you for some reson need to know if DCHECKs are on, test the value of 16 | // RTC_DCHECK_IS_ON. (Test its value, not if it's defined; it'll always be 17 | // defined, to either a true or a false value.) 18 | #if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON) 19 | #define RTC_DCHECK_IS_ON 1 20 | #else 21 | #define RTC_DCHECK_IS_ON 0 22 | #endif 23 | 24 | // Annotate a function that will not return control flow to the caller. 25 | #if defined(_MSC_VER) 26 | #define RTC_NORETURN __declspec(noreturn) 27 | #elif defined(__GNUC__) 28 | #define RTC_NORETURN __attribute__((__noreturn__)) 29 | #else 30 | #define RTC_NORETURN 31 | #endif 32 | 33 | #ifdef RTC_DISABLE_CHECK_MSG 34 | #define RTC_CHECK_MSG_ENABLED 0 35 | #else 36 | #define RTC_CHECK_MSG_ENABLED 1 37 | #endif 38 | 39 | #if RTC_CHECK_MSG_ENABLED 40 | #define RTC_CHECK_EVAL_MESSAGE(message) message 41 | #else 42 | #define RTC_CHECK_EVAL_MESSAGE(message) "" 43 | #endif 44 | 45 | // C version. Lacks many features compared to the C++ version, but usage 46 | // guidelines are the same. 47 | #define rtc_FatalMessage(file, line, msg) printf("[%s:%d]%s \n", file, line, msg) 48 | #define RTC_CHECK(condition) \ 49 | do { \ 50 | if (!(condition)) { \ 51 | rtc_FatalMessage(__FILE__, __LINE__, \ 52 | RTC_CHECK_EVAL_MESSAGE("CHECK failed: " #condition)); \ 53 | } \ 54 | } while (0) 55 | 56 | #define RTC_CHECK_EQ(a, b) RTC_CHECK((a) == (b)) 57 | #define RTC_CHECK_NE(a, b) RTC_CHECK((a) != (b)) 58 | #define RTC_CHECK_LE(a, b) RTC_CHECK((a) <= (b)) 59 | #define RTC_CHECK_LT(a, b) RTC_CHECK((a) < (b)) 60 | #define RTC_CHECK_GE(a, b) RTC_CHECK((a) >= (b)) 61 | #define RTC_CHECK_GT(a, b) RTC_CHECK((a) > (b)) 62 | 63 | #define RTC_DCHECK(condition) \ 64 | do { \ 65 | if (RTC_DCHECK_IS_ON && !(condition)) { \ 66 | rtc_FatalMessage(__FILE__, __LINE__, \ 67 | RTC_CHECK_EVAL_MESSAGE("DCHECK failed: " #condition)); \ 68 | } \ 69 | } while (0) 70 | 71 | #define RTC_DCHECK_EQ(a, b) RTC_DCHECK((a) == (b)) 72 | #define RTC_DCHECK_NE(a, b) RTC_DCHECK((a) != (b)) 73 | #define RTC_DCHECK_LE(a, b) RTC_DCHECK((a) <= (b)) 74 | #define RTC_DCHECK_LT(a, b) RTC_DCHECK((a) < (b)) 75 | #define RTC_DCHECK_GE(a, b) RTC_DCHECK((a) >= (b)) 76 | #define RTC_DCHECK_GT(a, b) RTC_DCHECK((a) > (b)) 77 | 78 | #define RTC_UNREACHABLE_CODE_HIT false 79 | #define RTC_NOTREACHED() RTC_DCHECK(RTC_UNREACHABLE_CODE_HIT) 80 | #endif // RTC_BASE_CHECKS_H_ 81 | -------------------------------------------------------------------------------- /ns/constructor_magic.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2004 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_CONSTRUCTOR_MAGIC_H_ 12 | #define RTC_BASE_CONSTRUCTOR_MAGIC_H_ 13 | 14 | // Put this in the declarations for a class to be unassignable. 15 | #define RTC_DISALLOW_ASSIGN(TypeName) \ 16 | TypeName& operator=(const TypeName&) = delete 17 | 18 | // A macro to disallow the copy constructor and operator= functions. This should 19 | // be used in the declarations for a class. 20 | #define RTC_DISALLOW_COPY_AND_ASSIGN(TypeName) \ 21 | TypeName(const TypeName&) = delete; \ 22 | RTC_DISALLOW_ASSIGN(TypeName) 23 | 24 | // A macro to disallow all the implicit constructors, namely the default 25 | // constructor, copy constructor and operator= functions. 26 | // 27 | // This should be used in the declarations for a class that wants to prevent 28 | // anyone from instantiating it. This is especially useful for classes 29 | // containing only static methods. 30 | #define RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ 31 | TypeName() = delete; \ 32 | RTC_DISALLOW_COPY_AND_ASSIGN(TypeName) 33 | 34 | #endif // RTC_BASE_CONSTRUCTOR_MAGIC_H_ 35 | -------------------------------------------------------------------------------- /ns/fast_math.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "fast_math.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "checks.h" 17 | 18 | namespace webrtc { 19 | 20 | namespace { 21 | 22 | float FastLog2f(float in) { 23 | RTC_DCHECK_GT(in, .0f); 24 | // Read and interpret float as uint32_t and then cast to float. 25 | // This is done to extract the exponent (bits 30 - 23). 26 | // "Right shift" of the exponent is then performed by multiplying 27 | // with the constant (1/2^23). Finally, we subtract a constant to 28 | // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias). 29 | union { 30 | float dummy; 31 | uint32_t a; 32 | } x = {in}; 33 | float out = x.a; 34 | out *= 1.1920929e-7f; // 1/2^23 35 | out -= 126.942695f; // Remove bias. 36 | return out; 37 | } 38 | 39 | } // namespace 40 | 41 | float SqrtFastApproximation(float f) { 42 | // TODO(peah): Add fast approximate implementation. 43 | return sqrtf(f); 44 | } 45 | 46 | float Pow2Approximation(float p) { 47 | // TODO(peah): Add fast approximate implementation. 48 | return powf(2.f, p); 49 | } 50 | 51 | float PowApproximation(float x, float p) { 52 | return Pow2Approximation(p * FastLog2f(x)); 53 | } 54 | 55 | float LogApproximation(float x) { 56 | constexpr float kLogOf2 = 0.69314718056f; 57 | return FastLog2f(x) * kLogOf2; 58 | } 59 | 60 | void LogApproximation(rtc::ArrayView x, rtc::ArrayView y) { 61 | for (size_t k = 0; k < x.size(); ++k) { 62 | y[k] = LogApproximation(x[k]); 63 | } 64 | } 65 | 66 | float ExpApproximation(float x) { 67 | constexpr float kLog10Ofe = 0.4342944819f; 68 | return PowApproximation(10.f, x * kLog10Ofe); 69 | } 70 | 71 | void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y) { 72 | for (size_t k = 0; k < x.size(); ++k) { 73 | y[k] = ExpApproximation(x[k]); 74 | } 75 | } 76 | 77 | void ExpApproximationSignFlip(rtc::ArrayView x, 78 | rtc::ArrayView y) { 79 | for (size_t k = 0; k < x.size(); ++k) { 80 | y[k] = ExpApproximation(-x[k]); 81 | } 82 | } 83 | 84 | } // namespace webrtc 85 | -------------------------------------------------------------------------------- /ns/fast_math.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ 13 | 14 | #include "array_view.h" 15 | 16 | namespace webrtc { 17 | 18 | // Sqrt approximation. 19 | float SqrtFastApproximation(float f); 20 | 21 | // Log base conversion log(x) = log2(x)/log2(e). 22 | float LogApproximation(float x); 23 | 24 | void LogApproximation(rtc::ArrayView x, rtc::ArrayView y); 25 | 26 | // 2^x approximation. 27 | float Pow2Approximation(float p); 28 | 29 | // x^p approximation. 30 | float PowApproximation(float x, float p); 31 | 32 | // e^x approximation. 33 | float ExpApproximation(float x); 34 | 35 | void ExpApproximation(rtc::ArrayView x, rtc::ArrayView y); 36 | 37 | void ExpApproximationSignFlip(rtc::ArrayView x, 38 | rtc::ArrayView y); 39 | } // namespace webrtc 40 | 41 | #endif // MODULES_AUDIO_PROCESSING_NS_FAST_MATH_H_ 42 | -------------------------------------------------------------------------------- /ns/fft4g.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the ../../../LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ 12 | #define COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ 13 | 14 | namespace webrtc { 15 | 16 | // Refer to fft4g.c for documentation. 17 | void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w); 18 | 19 | } // namespace webrtc 20 | 21 | #endif // COMMON_AUDIO_THIRD_PARTY_OOURA_FFT_SIZE_256_FFT4G_H_ 22 | -------------------------------------------------------------------------------- /ns/gtest_prod_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_GTEST_PROD_UTIL_H_ 12 | #define RTC_BASE_GTEST_PROD_UTIL_H_ 13 | 14 | // Define our own version of FRIEND_TEST here rather than including 15 | // gtest_prod.h to avoid depending on any part of GTest in production code. 16 | #define FRIEND_TEST_WEBRTC(test_case_name, test_name) \ 17 | friend class test_case_name##_##test_name##_Test 18 | 19 | // This file is a plain copy of Chromium's base/gtest_prod_util.h. 20 | // 21 | // This is a wrapper for gtest's FRIEND_TEST macro that friends 22 | // test with all possible prefixes. This is very helpful when changing the test 23 | // prefix, because the friend declarations don't need to be updated. 24 | // 25 | // Example usage: 26 | // 27 | // class MyClass { 28 | // private: 29 | // void MyMethod(); 30 | // FRIEND_TEST_ALL_PREFIXES(MyClassTest, MyMethod); 31 | // }; 32 | #define FRIEND_TEST_ALL_PREFIXES(test_case_name, test_name) \ 33 | FRIEND_TEST_WEBRTC(test_case_name, test_name); \ 34 | FRIEND_TEST_WEBRTC(test_case_name, DISABLED_##test_name); \ 35 | FRIEND_TEST_WEBRTC(test_case_name, FLAKY_##test_name); \ 36 | FRIEND_TEST_WEBRTC(test_case_name, FAILS_##test_name) 37 | 38 | #endif // RTC_BASE_GTEST_PROD_UTIL_H_ 39 | -------------------------------------------------------------------------------- /ns/histograms.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "histograms.h" 12 | 13 | namespace webrtc { 14 | 15 | Histograms::Histograms() { 16 | Clear(); 17 | } 18 | 19 | void Histograms::Clear() { 20 | lrt_.fill(0); 21 | spectral_flatness_.fill(0); 22 | spectral_diff_.fill(0); 23 | } 24 | 25 | void Histograms::Update(const SignalModel &features_) { 26 | // Update the histogram for the LRT. 27 | constexpr float kOneByBinSizeLrt = 1.f / kBinSizeLrt; 28 | if (features_.lrt < kHistogramSize * kBinSizeLrt && features_.lrt >= 0.f) { 29 | ++lrt_[kOneByBinSizeLrt * features_.lrt]; 30 | } 31 | 32 | // Update histogram for the spectral flatness. 33 | constexpr float kOneByBinSizeSpecFlat = 1.f / kBinSizeSpecFlat; 34 | if (features_.spectral_flatness < kHistogramSize * kBinSizeSpecFlat && 35 | features_.spectral_flatness >= 0.f) { 36 | ++spectral_flatness_[features_.spectral_flatness * kOneByBinSizeSpecFlat]; 37 | } 38 | 39 | // Update histogram for the spectral difference. 40 | constexpr float kOneByBinSizeSpecDiff = 1.f / kBinSizeSpecDiff; 41 | if (features_.spectral_diff < kHistogramSize * kBinSizeSpecDiff && 42 | features_.spectral_diff >= 0.f) { 43 | ++spectral_diff_[features_.spectral_diff * kOneByBinSizeSpecDiff]; 44 | } 45 | } 46 | 47 | } // namespace webrtc 48 | -------------------------------------------------------------------------------- /ns/histograms.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "ns_common.h" 18 | #include "signal_model.h" 19 | 20 | namespace webrtc { 21 | 22 | constexpr int kHistogramSize = 1000; 23 | 24 | // Class for handling the updating of histograms. 25 | class Histograms { 26 | public: 27 | Histograms(); 28 | 29 | Histograms(const Histograms &) = delete; 30 | 31 | Histograms &operator=(const Histograms &) = delete; 32 | 33 | // Clears the histograms. 34 | void Clear(); 35 | 36 | // Extracts thresholds for feature parameters and updates the corresponding 37 | // histogram. 38 | void Update(const SignalModel &features_); 39 | 40 | // Methods for accessing the histograms. 41 | rtc::ArrayView get_lrt() const { return lrt_; } 42 | 43 | rtc::ArrayView get_spectral_flatness() const { 44 | return spectral_flatness_; 45 | } 46 | 47 | rtc::ArrayView get_spectral_diff() const { 48 | return spectral_diff_; 49 | } 50 | 51 | private: 52 | std::array lrt_{}; 53 | std::array spectral_flatness_{}; 54 | std::array spectral_diff_{}; 55 | }; 56 | 57 | } // namespace webrtc 58 | 59 | #endif // MODULES_AUDIO_PROCESSING_NS_HISTOGRAMS_H_ 60 | -------------------------------------------------------------------------------- /ns/noise_estimator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "noise_estimator.h" 12 | 13 | #include 14 | 15 | #include "fast_math.h" 16 | #include "checks.h" 17 | 18 | namespace webrtc { 19 | 20 | namespace { 21 | 22 | // Log(i). 23 | constexpr std::array log_table = { 24 | 0.f, 0.f, 0.f, 0.f, 0.f, 1.609438f, 1.791759f, 25 | 1.945910f, 2.079442f, 2.197225f, 2.302585f, 2.397895f, 2.484907f, 2.564949f, 26 | 2.639057f, 2.708050f, 2.772589f, 2.833213f, 2.890372f, 2.944439f, 2.995732f, 27 | 3.044522f, 3.091043f, 3.135494f, 3.178054f, 3.218876f, 3.258097f, 3.295837f, 28 | 3.332205f, 3.367296f, 3.401197f, 3.433987f, 3.465736f, 3.496507f, 3.526361f, 29 | 3.555348f, 3.583519f, 3.610918f, 3.637586f, 3.663562f, 3.688879f, 3.713572f, 30 | 3.737669f, 3.761200f, 3.784190f, 3.806663f, 3.828641f, 3.850147f, 3.871201f, 31 | 3.891820f, 3.912023f, 3.931826f, 3.951244f, 3.970292f, 3.988984f, 4.007333f, 32 | 4.025352f, 4.043051f, 4.060443f, 4.077538f, 4.094345f, 4.110874f, 4.127134f, 33 | 4.143135f, 4.158883f, 4.174387f, 4.189655f, 4.204693f, 4.219508f, 4.234107f, 34 | 4.248495f, 4.262680f, 4.276666f, 4.290460f, 4.304065f, 4.317488f, 4.330733f, 35 | 4.343805f, 4.356709f, 4.369448f, 4.382027f, 4.394449f, 4.406719f, 4.418841f, 36 | 4.430817f, 4.442651f, 4.454347f, 4.465908f, 4.477337f, 4.488636f, 4.499810f, 37 | 4.510859f, 4.521789f, 4.532599f, 4.543295f, 4.553877f, 4.564348f, 4.574711f, 38 | 4.584968f, 4.595119f, 4.605170f, 4.615121f, 4.624973f, 4.634729f, 4.644391f, 39 | 4.653960f, 4.663439f, 4.672829f, 4.682131f, 4.691348f, 4.700480f, 4.709530f, 40 | 4.718499f, 4.727388f, 4.736198f, 4.744932f, 4.753591f, 4.762174f, 4.770685f, 41 | 4.779124f, 4.787492f, 4.795791f, 4.804021f, 4.812184f, 4.820282f, 4.828314f, 42 | 4.836282f, 4.844187f, 4.852030f}; 43 | 44 | } // namespace 45 | 46 | NoiseEstimator::NoiseEstimator(const SuppressionParams &suppression_params) 47 | : suppression_params_(suppression_params) { 48 | noise_spectrum_.fill(0.f); 49 | prev_noise_spectrum_.fill(0.f); 50 | conservative_noise_spectrum_.fill(0.f); 51 | parametric_noise_spectrum_.fill(0.f); 52 | } 53 | 54 | void NoiseEstimator::PrepareAnalysis() { 55 | std::copy(noise_spectrum_.begin(), noise_spectrum_.end(), 56 | prev_noise_spectrum_.begin()); 57 | } 58 | 59 | void NoiseEstimator::PreUpdate( 60 | int32_t num_analyzed_frames, 61 | rtc::ArrayView signal_spectrum, 62 | float signal_spectral_sum) { 63 | quantile_noise_estimator_.Estimate(signal_spectrum, noise_spectrum_); 64 | 65 | if (num_analyzed_frames < kShortStartupPhaseBlocks) { 66 | // Compute simplified noise model during startup. 67 | const size_t kStartBand = 5; 68 | float sum_log_i_log_magn = 0.f; 69 | float sum_log_i = 0.f; 70 | float sum_log_i_square = 0.f; 71 | float sum_log_magn = 0.f; 72 | for (size_t i = kStartBand; i < kFftSizeBy2Plus1; ++i) { 73 | float log_i = log_table[i]; 74 | sum_log_i += log_i; 75 | sum_log_i_square += log_i * log_i; 76 | float log_signal = LogApproximation(signal_spectrum[i]); 77 | sum_log_magn += log_signal; 78 | sum_log_i_log_magn += log_i * log_signal; 79 | } 80 | 81 | // Estimate the parameter for the level of the white noise. 82 | constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; 83 | white_noise_level_ += signal_spectral_sum * kOneByFftSizeBy2Plus1 * 84 | suppression_params_.over_subtraction_factor; 85 | 86 | // Estimate pink noise parameters. 87 | float denom = sum_log_i_square * (kFftSizeBy2Plus1 - kStartBand) - 88 | sum_log_i * sum_log_i; 89 | float num = 90 | sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn; 91 | RTC_DCHECK_NE(denom, 0.f); 92 | float pink_noise_adjustment = num / denom; 93 | 94 | // Constrain the estimated spectrum to be positive. 95 | pink_noise_adjustment = std::max(pink_noise_adjustment, 0.f); 96 | pink_noise_numerator_ += pink_noise_adjustment; 97 | num = sum_log_i * sum_log_magn - 98 | (kFftSizeBy2Plus1 - kStartBand) * sum_log_i_log_magn; 99 | RTC_DCHECK_NE(denom, 0.f); 100 | pink_noise_adjustment = num / denom; 101 | 102 | // Constrain the pink noise power to be in the interval [0, 1]. 103 | pink_noise_adjustment = std::max(std::min(pink_noise_adjustment, 1.f), 0.f); 104 | 105 | pink_noise_exp_ += pink_noise_adjustment; 106 | 107 | const float one_by_num_analyzed_frames_plus_1 = 108 | 1.f / (num_analyzed_frames + 1.f); 109 | 110 | // Calculate the frequency-independent parts of parametric noise estimate. 111 | float parametric_exp = 0.f; 112 | float parametric_num = 0.f; 113 | if (pink_noise_exp_ > 0.f) { 114 | // Use pink noise estimate. 115 | parametric_num = ExpApproximation(pink_noise_numerator_ * 116 | one_by_num_analyzed_frames_plus_1); 117 | parametric_num *= num_analyzed_frames + 1.f; 118 | parametric_exp = pink_noise_exp_ * one_by_num_analyzed_frames_plus_1; 119 | } 120 | 121 | constexpr float kOneByShortStartupPhaseBlocks = 122 | 1.f / kShortStartupPhaseBlocks; 123 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 124 | // Estimate the background noise using the white and pink noise 125 | // parameters. 126 | if (pink_noise_exp_ == 0.f) { 127 | // Use white noise estimate. 128 | parametric_noise_spectrum_[i] = white_noise_level_; 129 | } else { 130 | // Use pink noise estimate. 131 | float use_band = i < kStartBand ? kStartBand : i; 132 | float denom = PowApproximation(use_band, parametric_exp); 133 | RTC_DCHECK_NE(denom, 0.f); 134 | parametric_noise_spectrum_[i] = parametric_num / denom; 135 | } 136 | } 137 | 138 | // Weight quantile noise with modeled noise. 139 | float w = (kShortStartupPhaseBlocks - num_analyzed_frames); 140 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 141 | noise_spectrum_[i] *= num_analyzed_frames; 142 | float tmp = parametric_noise_spectrum_[i] * w; 143 | noise_spectrum_[i] += tmp * one_by_num_analyzed_frames_plus_1; 144 | noise_spectrum_[i] *= kOneByShortStartupPhaseBlocks; 145 | } 146 | } 147 | } 148 | 149 | void NoiseEstimator::PostUpdate( 150 | rtc::ArrayView speech_probability, 151 | rtc::ArrayView signal_spectrum) { 152 | // Time-avg parameter for noise_spectrum update. 153 | constexpr float kNoiseUpdate = 0.9f; 154 | 155 | float gamma = kNoiseUpdate; 156 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 157 | const float prob_speech = speech_probability[i]; 158 | const float prob_non_speech = 1.f - prob_speech; 159 | 160 | // Temporary noise update used for speech frames if update value is less 161 | // than previous. 162 | float noise_update_tmp = 163 | gamma * prev_noise_spectrum_[i] + 164 | (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + 165 | prob_speech * prev_noise_spectrum_[i]); 166 | 167 | // Time-constant based on speech/noise_spectrum state. 168 | float gamma_old = gamma; 169 | 170 | // Increase gamma for frame likely to be seech. 171 | constexpr float kProbRange = .2f; 172 | gamma = prob_speech > kProbRange ? .99f : kNoiseUpdate; 173 | 174 | // Conservative noise_spectrum update. 175 | if (prob_speech < kProbRange) { 176 | conservative_noise_spectrum_[i] += 177 | 0.05f * (signal_spectrum[i] - conservative_noise_spectrum_[i]); 178 | } 179 | 180 | // Noise_spectrum update. 181 | if (gamma == gamma_old) { 182 | noise_spectrum_[i] = noise_update_tmp; 183 | } else { 184 | noise_spectrum_[i] = 185 | gamma * prev_noise_spectrum_[i] + 186 | (1.f - gamma) * (prob_non_speech * signal_spectrum[i] + 187 | prob_speech * prev_noise_spectrum_[i]); 188 | // Allow for noise_spectrum update downwards: If noise_spectrum update 189 | // decreases the noise_spectrum, it is safe, so allow it to happen. 190 | noise_spectrum_[i] = std::min(noise_spectrum_[i], noise_update_tmp); 191 | } 192 | } 193 | } 194 | 195 | } // namespace webrtc 196 | -------------------------------------------------------------------------------- /ns/noise_estimator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "ns_common.h" 18 | #include "quantile_noise_estimator.h" 19 | #include "suppression_params.h" 20 | 21 | namespace webrtc { 22 | 23 | // Class for estimating the spectral characteristics of the noise in an incoming 24 | // signal. 25 | class NoiseEstimator { 26 | public: 27 | explicit NoiseEstimator(const SuppressionParams &suppression_params); 28 | 29 | // Prepare the estimator for analysis of a new frame. 30 | void PrepareAnalysis(); 31 | 32 | // Performs the first step of the estimator update. 33 | void PreUpdate(int32_t num_analyzed_frames, 34 | rtc::ArrayView signal_spectrum, 35 | float signal_spectral_sum); 36 | 37 | // Performs the second step of the estimator update. 38 | void PostUpdate( 39 | rtc::ArrayView speech_probability, 40 | rtc::ArrayView signal_spectrum); 41 | 42 | // Returns the noise spectral estimate. 43 | rtc::ArrayView get_noise_spectrum() const { 44 | return noise_spectrum_; 45 | } 46 | 47 | // Returns the noise from the previous frame. 48 | rtc::ArrayView get_prev_noise_spectrum() 49 | const { 50 | return prev_noise_spectrum_; 51 | } 52 | 53 | // Returns a noise spectral estimate based on white and pink noise parameters. 54 | rtc::ArrayView get_parametric_noise_spectrum() 55 | const { 56 | return parametric_noise_spectrum_; 57 | } 58 | 59 | rtc::ArrayView 60 | get_conservative_noise_spectrum() const { 61 | return conservative_noise_spectrum_; 62 | } 63 | 64 | private: 65 | const SuppressionParams &suppression_params_; 66 | float white_noise_level_ = 0.f; 67 | float pink_noise_numerator_ = 0.f; 68 | float pink_noise_exp_ = 0.f; 69 | std::array prev_noise_spectrum_{}; 70 | std::array conservative_noise_spectrum_{}; 71 | std::array parametric_noise_spectrum_{}; 72 | std::array noise_spectrum_{}; 73 | QuantileNoiseEstimator quantile_noise_estimator_; 74 | }; 75 | 76 | } // namespace webrtc 77 | 78 | #endif // MODULES_AUDIO_PROCESSING_NS_NOISE_ESTIMATOR_H_ 79 | -------------------------------------------------------------------------------- /ns/noise_suppressor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ 13 | 14 | #include 15 | #include 16 | 17 | #include "array_view.h" 18 | #include "audio_buffer.h" 19 | #include "noise_estimator.h" 20 | #include "ns_common.h" 21 | #include "ns_config.h" 22 | #include "ns_fft.h" 23 | #include "speech_probability_estimator.h" 24 | #include "wiener_filter.h" 25 | 26 | namespace webrtc { 27 | 28 | // Class for suppressing noise in a signal. 29 | class NoiseSuppressor { 30 | public: 31 | NoiseSuppressor(const NsConfig &config, 32 | size_t sample_rate_hz, 33 | size_t num_channels); 34 | 35 | NoiseSuppressor(const NoiseSuppressor &) = delete; 36 | 37 | NoiseSuppressor &operator=(const NoiseSuppressor &) = delete; 38 | 39 | // Analyses the signal (typically applied before the AEC to avoid analyzing 40 | // any comfort noise signal). 41 | void Analyze(const AudioBuffer &audio); 42 | 43 | // Applies noise suppression. 44 | void Process(AudioBuffer *audio); 45 | 46 | private: 47 | const size_t num_bands_; 48 | const size_t num_channels_; 49 | const SuppressionParams suppression_params_; 50 | int32_t num_analyzed_frames_ = -1; 51 | NrFft fft_; 52 | 53 | struct ChannelState { 54 | ChannelState(const SuppressionParams &suppression_params, size_t num_bands); 55 | 56 | SpeechProbabilityEstimator speech_probability_estimator; 57 | WienerFilter wiener_filter; 58 | NoiseEstimator noise_estimator; 59 | std::array prev_analysis_signal_spectrum{}; 60 | std::array analyze_analysis_memory{}; 61 | std::array process_analysis_memory{}; 62 | std::array process_synthesis_memory{}; 63 | std::vector> process_delay_memory; 64 | }; 65 | 66 | struct FilterBankState { 67 | std::array real; 68 | std::array imag; 69 | std::array extended_frame; 70 | }; 71 | 72 | std::vector filter_bank_states_heap_; 73 | std::vector upper_band_gains_heap_; 74 | std::vector energies_before_filtering_heap_; 75 | std::vector gain_adjustments_heap_; 76 | std::vector> channels_; 77 | 78 | // Aggregates the Wiener filters into a single filter to use. 79 | void AggregateWienerFilters( 80 | rtc::ArrayView filter) const; 81 | }; 82 | 83 | } // namespace webrtc 84 | 85 | #endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSOR_H_ 86 | -------------------------------------------------------------------------------- /ns/ns_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ 13 | 14 | #include 15 | 16 | namespace webrtc { 17 | 18 | constexpr size_t kFftSize = 256; 19 | constexpr size_t kFftSizeBy2Plus1 = kFftSize / 2 + 1; 20 | constexpr size_t kNsFrameSize = 160; 21 | constexpr size_t kOverlapSize = kFftSize - kNsFrameSize; 22 | 23 | constexpr int kShortStartupPhaseBlocks = 50; 24 | constexpr int kLongStartupPhaseBlocks = 200; 25 | constexpr int kFeatureUpdateWindowSize = 500; 26 | 27 | constexpr float kLtrFeatureThr = 0.5f; 28 | constexpr float kBinSizeLrt = 0.1f; 29 | constexpr float kBinSizeSpecFlat = 0.05f; 30 | constexpr float kBinSizeSpecDiff = 0.1f; 31 | 32 | } // namespace webrtc 33 | 34 | #endif // MODULES_AUDIO_PROCESSING_NS_NS_COMMON_H_ 35 | -------------------------------------------------------------------------------- /ns/ns_config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ 13 | 14 | namespace webrtc { 15 | 16 | // Config struct for the noise suppressor 17 | struct NsConfig { 18 | enum class SuppressionLevel { 19 | k6dB, k12dB, k18dB, k21dB 20 | }; 21 | SuppressionLevel target_level = SuppressionLevel::k12dB; 22 | }; 23 | 24 | } // namespace webrtc 25 | 26 | #endif // MODULES_AUDIO_PROCESSING_NS_NS_CONFIG_H_ 27 | -------------------------------------------------------------------------------- /ns/ns_fft.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "ns_fft.h" 12 | 13 | #include "fft4g.h" 14 | 15 | namespace webrtc { 16 | 17 | NrFft::NrFft() : bit_reversal_state_(kFftSize / 2), tables_(kFftSize / 2) { 18 | // Initialize WebRtc_rdt (setting (bit_reversal_state_[0] to 0 triggers 19 | // initialization) 20 | bit_reversal_state_[0] = 0.f; 21 | std::array tmp_buffer{}; 22 | tmp_buffer.fill(0.f); 23 | WebRtc_rdft(kFftSize, 1, tmp_buffer.data(), bit_reversal_state_.data(), 24 | tables_.data()); 25 | } 26 | 27 | void NrFft::Fft(rtc::ArrayView time_data, 28 | rtc::ArrayView real, 29 | rtc::ArrayView imag) { 30 | WebRtc_rdft(kFftSize, 1, time_data.data(), bit_reversal_state_.data(), 31 | tables_.data()); 32 | 33 | imag[0] = 0; 34 | real[0] = time_data[0]; 35 | 36 | imag[kFftSizeBy2Plus1 - 1] = 0; 37 | real[kFftSizeBy2Plus1 - 1] = time_data[1]; 38 | 39 | for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { 40 | real[i] = time_data[2 * i]; 41 | imag[i] = time_data[2 * i + 1]; 42 | } 43 | } 44 | 45 | void NrFft::Ifft(rtc::ArrayView real, 46 | rtc::ArrayView imag, 47 | rtc::ArrayView time_data) { 48 | time_data[0] = real[0]; 49 | time_data[1] = real[kFftSizeBy2Plus1 - 1]; 50 | for (size_t i = 1; i < kFftSizeBy2Plus1 - 1; ++i) { 51 | time_data[2 * i] = real[i]; 52 | time_data[2 * i + 1] = imag[i]; 53 | } 54 | WebRtc_rdft(kFftSize, -1, time_data.data(), bit_reversal_state_.data(), 55 | tables_.data()); 56 | 57 | // Scale the output 58 | constexpr float kScaling = 2.f / kFftSize; 59 | for (float &d : time_data) { 60 | d *= kScaling; 61 | } 62 | } 63 | 64 | } // namespace webrtc 65 | -------------------------------------------------------------------------------- /ns/ns_fft.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "ns_common.h" 18 | 19 | namespace webrtc { 20 | 21 | // Wrapper class providing 256 point FFT functionality. 22 | class NrFft { 23 | public: 24 | NrFft(); 25 | 26 | NrFft(const NrFft &) = delete; 27 | 28 | NrFft &operator=(const NrFft &) = delete; 29 | 30 | // Transforms the signal from time to frequency domain. 31 | void Fft(rtc::ArrayView time_data, 32 | rtc::ArrayView real, 33 | rtc::ArrayView imag); 34 | 35 | // Transforms the signal from frequency to time domain. 36 | void Ifft(rtc::ArrayView real, 37 | rtc::ArrayView imag, 38 | rtc::ArrayView time_data); 39 | 40 | private: 41 | std::vector bit_reversal_state_; 42 | std::vector tables_; 43 | }; 44 | 45 | } // namespace webrtc 46 | 47 | #endif // MODULES_AUDIO_PROCESSING_NS_NS_FFT_H_ 48 | -------------------------------------------------------------------------------- /ns/prior_signal_model.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "prior_signal_model.h" 12 | 13 | namespace webrtc { 14 | 15 | PriorSignalModel::PriorSignalModel(float lrt_initial_value) 16 | : lrt(lrt_initial_value) {} 17 | 18 | } // namespace webrtc 19 | -------------------------------------------------------------------------------- /ns/prior_signal_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ 13 | 14 | namespace webrtc { 15 | 16 | // Struct for storing the prior signal model parameters. 17 | struct PriorSignalModel { 18 | explicit PriorSignalModel(float lrt_initial_value); 19 | 20 | PriorSignalModel(const PriorSignalModel &) = delete; 21 | 22 | PriorSignalModel &operator=(const PriorSignalModel &) = delete; 23 | 24 | float lrt; 25 | float flatness_threshold = .5f; 26 | float template_diff_threshold = .5f; 27 | float lrt_weighting = 1.f; 28 | float flatness_weighting = 0.f; 29 | float difference_weighting = 0.f; 30 | }; 31 | 32 | } // namespace webrtc 33 | 34 | #endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_H_ 35 | -------------------------------------------------------------------------------- /ns/prior_signal_model_estimator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "prior_signal_model_estimator.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "fast_math.h" 17 | #include "checks.h" 18 | 19 | namespace webrtc { 20 | 21 | namespace { 22 | 23 | // Identifies the first of the two largest peaks in the histogram. 24 | void FindFirstOfTwoLargestPeaks( 25 | float bin_size, 26 | rtc::ArrayView spectral_flatness, 27 | float *peak_position, 28 | int *peak_weight) { 29 | RTC_DCHECK(peak_position); 30 | RTC_DCHECK(peak_weight); 31 | 32 | int peak_value = 0; 33 | int secondary_peak_value = 0; 34 | *peak_position = 0.f; 35 | float secondary_peak_position = 0.f; 36 | *peak_weight = 0; 37 | int secondary_peak_weight = 0; 38 | 39 | // Identify the two largest peaks. 40 | for (int i = 0; i < kHistogramSize; ++i) { 41 | const float bin_mid = (i + 0.5f) * bin_size; 42 | if (spectral_flatness[i] > peak_value) { 43 | // Found new "first" peak candidate. 44 | secondary_peak_value = peak_value; 45 | secondary_peak_weight = *peak_weight; 46 | secondary_peak_position = *peak_position; 47 | 48 | peak_value = spectral_flatness[i]; 49 | *peak_weight = spectral_flatness[i]; 50 | *peak_position = bin_mid; 51 | } else if (spectral_flatness[i] > secondary_peak_value) { 52 | // Found new "second" peak candidate. 53 | secondary_peak_value = spectral_flatness[i]; 54 | secondary_peak_weight = spectral_flatness[i]; 55 | secondary_peak_position = bin_mid; 56 | } 57 | } 58 | 59 | // Merge the peaks if they are close. 60 | if ((fabs(secondary_peak_position - *peak_position) < 2 * bin_size) && 61 | (secondary_peak_weight > 0.5f * (*peak_weight))) { 62 | *peak_weight += secondary_peak_weight; 63 | *peak_position = 0.5f * (*peak_position + secondary_peak_position); 64 | } 65 | } 66 | 67 | void UpdateLrt(rtc::ArrayView lrt_histogram, 68 | float *prior_model_lrt, 69 | bool *low_lrt_fluctuations) { 70 | RTC_DCHECK(prior_model_lrt); 71 | RTC_DCHECK(low_lrt_fluctuations); 72 | 73 | float average = 0.f; 74 | float average_compl = 0.f; 75 | float average_squared = 0.f; 76 | int count = 0; 77 | 78 | for (int i = 0; i < 10; ++i) { 79 | float bin_mid = (i + 0.5f) * kBinSizeLrt; 80 | average += lrt_histogram[i] * bin_mid; 81 | count += lrt_histogram[i]; 82 | } 83 | if (count > 0) { 84 | average = average / count; 85 | } 86 | 87 | for (int i = 0; i < kHistogramSize; ++i) { 88 | float bin_mid = (i + 0.5f) * kBinSizeLrt; 89 | average_squared += lrt_histogram[i] * bin_mid * bin_mid; 90 | average_compl += lrt_histogram[i] * bin_mid; 91 | } 92 | constexpr float kOneFeatureUpdateWindowSize = 1.f / kFeatureUpdateWindowSize; 93 | average_squared = average_squared * kOneFeatureUpdateWindowSize; 94 | average_compl = average_compl * kOneFeatureUpdateWindowSize; 95 | 96 | // Fluctuation limit of LRT feature. 97 | *low_lrt_fluctuations = average_squared - average * average_compl < 0.05f; 98 | 99 | // Get threshold for LRT feature. 100 | constexpr float kMaxLrt = 1.f; 101 | constexpr float kMinLrt = .2f; 102 | if (*low_lrt_fluctuations) { 103 | // Very low fluctuation, so likely noise. 104 | *prior_model_lrt = kMaxLrt; 105 | } else { 106 | *prior_model_lrt = std::min(kMaxLrt, std::max(kMinLrt, 1.2f * average)); 107 | } 108 | } 109 | 110 | } // namespace 111 | 112 | PriorSignalModelEstimator::PriorSignalModelEstimator(float lrt_initial_value) 113 | : prior_model_(lrt_initial_value) {} 114 | 115 | // Extract thresholds for feature parameters and computes the threshold/weights. 116 | void PriorSignalModelEstimator::Update(const Histograms &histograms) { 117 | bool low_lrt_fluctuations; 118 | UpdateLrt(histograms.get_lrt(), &prior_model_.lrt, &low_lrt_fluctuations); 119 | 120 | // For spectral flatness and spectral difference: compute the main peaks of 121 | // the histograms. 122 | float spectral_flatness_peak_position; 123 | int spectral_flatness_peak_weight; 124 | FindFirstOfTwoLargestPeaks( 125 | kBinSizeSpecFlat, histograms.get_spectral_flatness(), 126 | &spectral_flatness_peak_position, &spectral_flatness_peak_weight); 127 | 128 | float spectral_diff_peak_position = 0.f; 129 | int spectral_diff_peak_weight = 0; 130 | FindFirstOfTwoLargestPeaks(kBinSizeSpecDiff, histograms.get_spectral_diff(), 131 | &spectral_diff_peak_position, 132 | &spectral_diff_peak_weight); 133 | 134 | // Reject if weight of peaks is not large enough, or peak value too small. 135 | // Peak limit for spectral flatness (varies between 0 and 1). 136 | const int use_spec_flat = spectral_flatness_peak_weight < 0.3f * 500 || 137 | spectral_flatness_peak_position < 0.6f 138 | ? 0 139 | : 1; 140 | 141 | // Reject if weight of peaks is not large enough or if fluctuation of the LRT 142 | // feature are very low, indicating a noise state. 143 | const int use_spec_diff = 144 | spectral_diff_peak_weight < 0.3f * 500 || low_lrt_fluctuations ? 0 : 1; 145 | 146 | // Update the model. 147 | prior_model_.template_diff_threshold = 1.2f * spectral_diff_peak_position; 148 | prior_model_.template_diff_threshold = 149 | std::min(1.f, std::max(0.16f, prior_model_.template_diff_threshold)); 150 | 151 | float one_by_feature_sum = 1.f / (1.f + use_spec_flat + use_spec_diff); 152 | prior_model_.lrt_weighting = one_by_feature_sum; 153 | 154 | if (use_spec_flat == 1) { 155 | prior_model_.flatness_threshold = 0.9f * spectral_flatness_peak_position; 156 | prior_model_.flatness_threshold = 157 | std::min(.95f, std::max(0.1f, prior_model_.flatness_threshold)); 158 | prior_model_.flatness_weighting = one_by_feature_sum; 159 | } else { 160 | prior_model_.flatness_weighting = 0.f; 161 | } 162 | 163 | if (use_spec_diff == 1) { 164 | prior_model_.difference_weighting = one_by_feature_sum; 165 | } else { 166 | prior_model_.difference_weighting = 0.f; 167 | } 168 | } 169 | 170 | } // namespace webrtc 171 | -------------------------------------------------------------------------------- /ns/prior_signal_model_estimator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ 13 | 14 | #include "histograms.h" 15 | #include "prior_signal_model.h" 16 | 17 | namespace webrtc { 18 | 19 | // Estimator of the prior signal model parameters. 20 | class PriorSignalModelEstimator { 21 | public: 22 | explicit PriorSignalModelEstimator(float lrt_initial_value); 23 | 24 | PriorSignalModelEstimator(const PriorSignalModelEstimator &) = delete; 25 | 26 | PriorSignalModelEstimator &operator=(const PriorSignalModelEstimator &) = 27 | delete; 28 | 29 | // Updates the model estimate. 30 | void Update(const Histograms &h); 31 | 32 | // Returns the estimated model. 33 | const PriorSignalModel &get_prior_model() const { return prior_model_; } 34 | 35 | private: 36 | PriorSignalModel prior_model_; 37 | }; 38 | 39 | } // namespace webrtc 40 | 41 | #endif // MODULES_AUDIO_PROCESSING_NS_PRIOR_SIGNAL_MODEL_ESTIMATOR_H_ 42 | -------------------------------------------------------------------------------- /ns/push_sinc_resampler.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "push_sinc_resampler.h" 12 | 13 | #include 14 | 15 | #include "audio_util.h" 16 | 17 | namespace webrtc { 18 | 19 | PushSincResampler::PushSincResampler(size_t source_frames, 20 | size_t destination_frames) 21 | : resampler_(new SincResampler(source_frames * 1.0 / destination_frames, 22 | source_frames, 23 | this)), 24 | source_ptr_(nullptr), 25 | source_ptr_int_(nullptr), 26 | destination_frames_(destination_frames), 27 | first_pass_(true), 28 | source_available_(0) {} 29 | 30 | PushSincResampler::~PushSincResampler() = default; 31 | 32 | size_t PushSincResampler::Resample(const int16_t *source, 33 | size_t source_length, 34 | int16_t *destination, 35 | size_t destination_capacity) { 36 | if (!float_buffer_) 37 | float_buffer_.reset(new float[destination_frames_]); 38 | 39 | source_ptr_int_ = source; 40 | // Pass nullptr as the float source to have Run() read from the int16 source. 41 | Resample(nullptr, source_length, float_buffer_.get(), destination_frames_); 42 | FloatS16ToS16(float_buffer_.get(), destination_frames_, destination); 43 | source_ptr_int_ = nullptr; 44 | return destination_frames_; 45 | } 46 | 47 | size_t PushSincResampler::Resample(const float *source, 48 | size_t source_length, 49 | float *destination, 50 | size_t destination_capacity) { 51 | RTC_CHECK_EQ(source_length, resampler_->request_frames()); 52 | RTC_CHECK_GE(destination_capacity, destination_frames_); 53 | // Cache the source pointer. Calling Resample() will immediately trigger 54 | // the Run() callback whereupon we provide the cached value. 55 | source_ptr_ = source; 56 | source_available_ = source_length; 57 | 58 | // On the first pass, we call Resample() twice. During the first call, we 59 | // provide dummy input and discard the output. This is done to prime the 60 | // SincResampler buffer with the correct delay (half the kernel size), thereby 61 | // ensuring that all later Resample() calls will only result in one input 62 | // request through Run(). 63 | // 64 | // If this wasn't done, SincResampler would call Run() twice on the first 65 | // pass, and we'd have to introduce an entire |source_frames| of delay, rather 66 | // than the minimum half kernel. 67 | // 68 | // It works out that ChunkSize() is exactly the amount of output we need to 69 | // request in order to prime the buffer with a single Run() request for 70 | // |source_frames|. 71 | if (first_pass_) 72 | resampler_->Resample(resampler_->ChunkSize(), destination); 73 | 74 | resampler_->Resample(destination_frames_, destination); 75 | source_ptr_ = nullptr; 76 | return destination_frames_; 77 | } 78 | 79 | void PushSincResampler::Run(size_t frames, float *destination) { 80 | // Ensure we are only asked for the available samples. This would fail if 81 | // Run() was triggered more than once per Resample() call. 82 | RTC_CHECK_EQ(source_available_, frames); 83 | 84 | if (first_pass_) { 85 | // Provide dummy input on the first pass, the output of which will be 86 | // discarded, as described in Resample(). 87 | std::memset(destination, 0, frames * sizeof(*destination)); 88 | first_pass_ = false; 89 | return; 90 | } 91 | 92 | if (source_ptr_) { 93 | std::memcpy(destination, source_ptr_, frames * sizeof(*destination)); 94 | } else { 95 | for (size_t i = 0; i < frames; ++i) 96 | destination[i] = static_cast(source_ptr_int_[i]); 97 | } 98 | source_available_ -= frames; 99 | } 100 | 101 | } // namespace webrtc 102 | -------------------------------------------------------------------------------- /ns/push_sinc_resampler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ 12 | #define COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "sinc_resampler.h" 20 | #include "constructor_magic.h" 21 | 22 | namespace webrtc { 23 | 24 | // A thin wrapper over SincResampler to provide a push-based interface as 25 | // required by WebRTC. SincResampler uses a pull-based interface, and will 26 | // use SincResamplerCallback::Run() to request data upon a call to Resample(). 27 | // These Run() calls will happen on the same thread Resample() is called on. 28 | class PushSincResampler : public SincResamplerCallback { 29 | public: 30 | // Provide the size of the source and destination blocks in samples. These 31 | // must correspond to the same time duration (typically 10 ms) as the sample 32 | // ratio is inferred from them. 33 | PushSincResampler(size_t source_frames, size_t destination_frames); 34 | 35 | ~PushSincResampler() override; 36 | 37 | // Perform the resampling. |source_frames| must always equal the 38 | // |source_frames| provided at construction. |destination_capacity| must be 39 | // at least as large as |destination_frames|. Returns the number of samples 40 | // provided in destination (for convenience, since this will always be equal 41 | // to |destination_frames|). 42 | size_t Resample(const int16_t *source, 43 | size_t source_frames, 44 | int16_t *destination, 45 | size_t destination_capacity); 46 | 47 | size_t Resample(const float *source, 48 | size_t source_frames, 49 | float *destination, 50 | size_t destination_capacity); 51 | 52 | // Delay due to the filter kernel. Essentially, the time after which an input 53 | // sample will appear in the resampled output. 54 | static float AlgorithmicDelaySeconds(int source_rate_hz) { 55 | return 1.f / source_rate_hz * SincResampler::kKernelSize / 2; 56 | } 57 | 58 | protected: 59 | // Implements SincResamplerCallback. 60 | void Run(size_t frames, float *destination) override; 61 | 62 | private: 63 | friend class PushSincResamplerTest; 64 | 65 | SincResampler *get_resampler_for_testing() { return resampler_.get(); } 66 | 67 | std::unique_ptr resampler_; 68 | std::unique_ptr float_buffer_; 69 | const float *source_ptr_; 70 | const int16_t *source_ptr_int_; 71 | const size_t destination_frames_; 72 | 73 | // True on the first call to Resample(), to prime the SincResampler buffer. 74 | bool first_pass_; 75 | 76 | // Used to assert we are only requested for as much data as is available. 77 | size_t source_available_; 78 | 79 | RTC_DISALLOW_COPY_AND_ASSIGN(PushSincResampler); 80 | }; 81 | 82 | } // namespace webrtc 83 | 84 | #endif // COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ 85 | -------------------------------------------------------------------------------- /ns/quantile_noise_estimator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "quantile_noise_estimator.h" 12 | 13 | #include 14 | 15 | #include "fast_math.h" 16 | 17 | namespace webrtc { 18 | 19 | QuantileNoiseEstimator::QuantileNoiseEstimator() { 20 | quantile_.fill(0.f); 21 | density_.fill(0.3f); 22 | log_quantile_.fill(8.f); 23 | 24 | constexpr float kOneBySimult = 1.f / kSimult; 25 | for (size_t i = 0; i < kSimult; ++i) { 26 | counter_[i] = floor(kLongStartupPhaseBlocks * (i + 1.f) * kOneBySimult); 27 | } 28 | } 29 | 30 | void QuantileNoiseEstimator::Estimate( 31 | rtc::ArrayView signal_spectrum, 32 | rtc::ArrayView noise_spectrum) { 33 | std::array log_spectrum{}; 34 | LogApproximation(signal_spectrum, log_spectrum); 35 | 36 | int quantile_index_to_return = -1; 37 | // Loop over simultaneous estimates. 38 | for (int s = 0, k = 0; s < kSimult; 39 | ++s, k += static_cast(kFftSizeBy2Plus1)) { 40 | const float one_by_counter_plus_1 = 1.f / (counter_[s] + 1.f); 41 | for (int i = 0, j = k; i < static_cast(kFftSizeBy2Plus1); ++i, ++j) { 42 | // Update log quantile estimate. 43 | const float delta = density_[j] > 1.f ? 40.f / density_[j] : 40.f; 44 | 45 | const float multiplier = delta * one_by_counter_plus_1; 46 | if (log_spectrum[i] > log_quantile_[j]) { 47 | log_quantile_[j] += 0.25f * multiplier; 48 | } else { 49 | log_quantile_[j] -= 0.75f * multiplier; 50 | } 51 | 52 | // Update density estimate. 53 | constexpr float kWidth = 0.01f; 54 | constexpr float kOneByWidthPlus2 = 1.f / (2.f * kWidth); 55 | if (fabs(log_spectrum[i] - log_quantile_[j]) < kWidth) { 56 | density_[j] = (counter_[s] * density_[j] + kOneByWidthPlus2) * 57 | one_by_counter_plus_1; 58 | } 59 | } 60 | 61 | if (counter_[s] >= kLongStartupPhaseBlocks) { 62 | counter_[s] = 0; 63 | if (num_updates_ >= kLongStartupPhaseBlocks) { 64 | quantile_index_to_return = k; 65 | } 66 | } 67 | 68 | ++counter_[s]; 69 | } 70 | 71 | // Sequentially update the noise during startup. 72 | if (num_updates_ < kLongStartupPhaseBlocks) { 73 | // Use the last "s" to get noise during startup that differ from zero. 74 | quantile_index_to_return = kFftSizeBy2Plus1 * (kSimult - 1); 75 | ++num_updates_; 76 | } 77 | 78 | if (quantile_index_to_return >= 0) { 79 | ExpApproximation( 80 | rtc::ArrayView(&log_quantile_[quantile_index_to_return], 81 | kFftSizeBy2Plus1), 82 | quantile_); 83 | } 84 | 85 | std::copy(quantile_.begin(), quantile_.end(), noise_spectrum.begin()); 86 | } 87 | 88 | } // namespace webrtc 89 | -------------------------------------------------------------------------------- /ns/quantile_noise_estimator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ 13 | 14 | #include 15 | #include 16 | 17 | #include "array_view.h" 18 | #include "ns_common.h" 19 | 20 | namespace webrtc { 21 | 22 | constexpr int kSimult = 3; 23 | 24 | // For quantile noise estimation. 25 | class QuantileNoiseEstimator { 26 | public: 27 | QuantileNoiseEstimator(); 28 | 29 | QuantileNoiseEstimator(const QuantileNoiseEstimator &) = delete; 30 | 31 | QuantileNoiseEstimator &operator=(const QuantileNoiseEstimator &) = delete; 32 | 33 | // Estimate noise. 34 | void Estimate(rtc::ArrayView signal_spectrum, 35 | rtc::ArrayView noise_spectrum); 36 | 37 | private: 38 | std::array density_{}; 39 | std::array log_quantile_{}; 40 | std::array quantile_{}; 41 | std::array counter_{}; 42 | int num_updates_ = 1; 43 | }; 44 | 45 | } // namespace webrtc 46 | 47 | #endif // MODULES_AUDIO_PROCESSING_NS_QUANTILE_NOISE_ESTIMATOR_H_ 48 | -------------------------------------------------------------------------------- /ns/signal_model.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "signal_model.h" 12 | 13 | namespace webrtc { 14 | 15 | SignalModel::SignalModel() { 16 | constexpr float kSfFeatureThr = 0.5f; 17 | 18 | lrt = kLtrFeatureThr; 19 | spectral_flatness = kSfFeatureThr; 20 | spectral_diff = kSfFeatureThr; 21 | avg_log_lrt.fill(kLtrFeatureThr); 22 | } 23 | 24 | } // namespace webrtc 25 | -------------------------------------------------------------------------------- /ns/signal_model.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ 13 | 14 | #include 15 | 16 | #include "ns_common.h" 17 | 18 | namespace webrtc { 19 | 20 | struct SignalModel { 21 | SignalModel(); 22 | 23 | SignalModel(const SignalModel &) = delete; 24 | 25 | SignalModel &operator=(const SignalModel &) = delete; 26 | 27 | float lrt; 28 | float spectral_diff; 29 | float spectral_flatness; 30 | // Log LRT factor with time-smoothing. 31 | std::array avg_log_lrt{}; 32 | }; 33 | 34 | } // namespace webrtc 35 | 36 | #endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_H_ 37 | -------------------------------------------------------------------------------- /ns/signal_model_estimator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "signal_model_estimator.h" 12 | 13 | #include "fast_math.h" 14 | 15 | namespace webrtc { 16 | 17 | namespace { 18 | 19 | constexpr float kOneByFftSizeBy2Plus1 = 1.f / kFftSizeBy2Plus1; 20 | 21 | // Computes the difference measure between input spectrum and a template/learned 22 | // noise spectrum. 23 | float ComputeSpectralDiff( 24 | rtc::ArrayView conservative_noise_spectrum, 25 | rtc::ArrayView signal_spectrum, 26 | float signal_spectral_sum, 27 | float diff_normalization) { 28 | // spectral_diff = var(signal_spectrum) - cov(signal_spectrum, magnAvgPause)^2 29 | // / var(magnAvgPause) 30 | 31 | // Compute average quantities. 32 | float noise_average = 0.f; 33 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 34 | // Conservative smooth noise spectrum from pause frames. 35 | noise_average += conservative_noise_spectrum[i]; 36 | } 37 | noise_average = noise_average * kOneByFftSizeBy2Plus1; 38 | float signal_average = signal_spectral_sum * kOneByFftSizeBy2Plus1; 39 | 40 | // Compute variance and covariance quantities. 41 | float covariance = 0.f; 42 | float noise_variance = 0.f; 43 | float signal_variance = 0.f; 44 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 45 | float signal_diff = signal_spectrum[i] - signal_average; 46 | float noise_diff = conservative_noise_spectrum[i] - noise_average; 47 | covariance += signal_diff * noise_diff; 48 | noise_variance += noise_diff * noise_diff; 49 | signal_variance += signal_diff * signal_diff; 50 | } 51 | covariance *= kOneByFftSizeBy2Plus1; 52 | noise_variance *= kOneByFftSizeBy2Plus1; 53 | signal_variance *= kOneByFftSizeBy2Plus1; 54 | 55 | // Update of average magnitude spectrum. 56 | float spectral_diff = 57 | signal_variance - (covariance * covariance) / (noise_variance + 0.0001f); 58 | // Normalize. 59 | return spectral_diff / (diff_normalization + 0.0001f); 60 | } 61 | 62 | // Updates the spectral flatness based on the input spectrum. 63 | void UpdateSpectralFlatness( 64 | rtc::ArrayView signal_spectrum, 65 | float signal_spectral_sum, 66 | float *spectral_flatness) { 67 | RTC_DCHECK(spectral_flatness); 68 | 69 | // Compute log of ratio of the geometric to arithmetic mean (handle the log(0) 70 | // separately). 71 | constexpr float kAveraging = 0.3f; 72 | float avg_spect_flatness_num = 0.f; 73 | for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { 74 | if (signal_spectrum[i] == 0.f) { 75 | *spectral_flatness -= kAveraging * (*spectral_flatness); 76 | return; 77 | } 78 | } 79 | 80 | for (size_t i = 1; i < kFftSizeBy2Plus1; ++i) { 81 | avg_spect_flatness_num += LogApproximation(signal_spectrum[i]); 82 | } 83 | 84 | float avg_spect_flatness_denom = signal_spectral_sum - signal_spectrum[0]; 85 | 86 | avg_spect_flatness_denom = avg_spect_flatness_denom * kOneByFftSizeBy2Plus1; 87 | avg_spect_flatness_num = avg_spect_flatness_num * kOneByFftSizeBy2Plus1; 88 | 89 | float spectral_tmp = 90 | ExpApproximation(avg_spect_flatness_num) / avg_spect_flatness_denom; 91 | 92 | // Time-avg update of spectral flatness feature. 93 | *spectral_flatness += kAveraging * (spectral_tmp - *spectral_flatness); 94 | } 95 | 96 | // Updates the log LRT measures. 97 | void UpdateSpectralLrt(rtc::ArrayView prior_snr, 98 | rtc::ArrayView post_snr, 99 | rtc::ArrayView avg_log_lrt, 100 | float *lrt) { 101 | RTC_DCHECK(lrt); 102 | 103 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 104 | float tmp1 = 1.f + 2.f * prior_snr[i]; 105 | float tmp2 = 2.f * prior_snr[i] / (tmp1 + 0.0001f); 106 | float bessel_tmp = (post_snr[i] + 1.f) * tmp2; 107 | avg_log_lrt[i] += 108 | .5f * (bessel_tmp - LogApproximation(tmp1) - avg_log_lrt[i]); 109 | } 110 | 111 | float log_lrt_time_avg_k_sum = 0.f; 112 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 113 | log_lrt_time_avg_k_sum += avg_log_lrt[i]; 114 | } 115 | *lrt = log_lrt_time_avg_k_sum * kOneByFftSizeBy2Plus1; 116 | } 117 | 118 | } // namespace 119 | 120 | SignalModelEstimator::SignalModelEstimator() 121 | : prior_model_estimator_(kLtrFeatureThr) {} 122 | 123 | void SignalModelEstimator::AdjustNormalization(int32_t num_analyzed_frames, 124 | float signal_energy) { 125 | diff_normalization_ *= num_analyzed_frames; 126 | diff_normalization_ += signal_energy; 127 | diff_normalization_ /= (num_analyzed_frames + 1); 128 | } 129 | 130 | // Update the noise features. 131 | void SignalModelEstimator::Update( 132 | rtc::ArrayView prior_snr, 133 | rtc::ArrayView post_snr, 134 | rtc::ArrayView conservative_noise_spectrum, 135 | rtc::ArrayView signal_spectrum, 136 | float signal_spectral_sum, 137 | float signal_energy) { 138 | // Compute spectral flatness on input spectrum. 139 | UpdateSpectralFlatness(signal_spectrum, signal_spectral_sum, 140 | &features_.spectral_flatness); 141 | 142 | // Compute difference of input spectrum with learned/estimated noise spectrum. 143 | float spectral_diff = 144 | ComputeSpectralDiff(conservative_noise_spectrum, signal_spectrum, 145 | signal_spectral_sum, diff_normalization_); 146 | // Compute time-avg update of difference feature. 147 | features_.spectral_diff += 0.3f * (spectral_diff - features_.spectral_diff); 148 | 149 | signal_energy_sum_ += signal_energy; 150 | 151 | // Compute histograms for parameter decisions (thresholds and weights for 152 | // features). Parameters are extracted periodically. 153 | if (--histogram_analysis_counter_ > 0) { 154 | histograms_.Update(features_); 155 | } else { 156 | // Compute model parameters. 157 | prior_model_estimator_.Update(histograms_); 158 | 159 | // Clear histograms for next update. 160 | histograms_.Clear(); 161 | 162 | histogram_analysis_counter_ = kFeatureUpdateWindowSize; 163 | 164 | // Update every window: 165 | // Compute normalization for the spectral difference for next estimation. 166 | signal_energy_sum_ = signal_energy_sum_ / kFeatureUpdateWindowSize; 167 | diff_normalization_ = 0.5f * (signal_energy_sum_ + diff_normalization_); 168 | signal_energy_sum_ = 0.f; 169 | } 170 | 171 | // Compute the LRT. 172 | UpdateSpectralLrt(prior_snr, post_snr, features_.avg_log_lrt, &features_.lrt); 173 | } 174 | 175 | } // namespace webrtc 176 | -------------------------------------------------------------------------------- /ns/signal_model_estimator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "histograms.h" 18 | #include "ns_common.h" 19 | #include "prior_signal_model.h" 20 | #include "prior_signal_model_estimator.h" 21 | #include "signal_model.h" 22 | 23 | namespace webrtc { 24 | 25 | class SignalModelEstimator { 26 | public: 27 | SignalModelEstimator(); 28 | 29 | SignalModelEstimator(const SignalModelEstimator &) = delete; 30 | 31 | SignalModelEstimator &operator=(const SignalModelEstimator &) = delete; 32 | 33 | // Compute signal normalization during the initial startup phase. 34 | void AdjustNormalization(int32_t num_analyzed_frames, float signal_energy); 35 | 36 | void Update( 37 | rtc::ArrayView prior_snr, 38 | rtc::ArrayView post_snr, 39 | rtc::ArrayView conservative_noise_spectrum, 40 | rtc::ArrayView signal_spectrum, 41 | float signal_spectral_sum, 42 | float signal_energy); 43 | 44 | const PriorSignalModel &get_prior_model() const { 45 | return prior_model_estimator_.get_prior_model(); 46 | } 47 | 48 | const SignalModel &get_model() { return features_; } 49 | 50 | private: 51 | float diff_normalization_ = 0.f; 52 | float signal_energy_sum_ = 0.f; 53 | Histograms histograms_; 54 | int histogram_analysis_counter_ = 500; 55 | PriorSignalModelEstimator prior_model_estimator_; 56 | SignalModel features_; 57 | }; 58 | 59 | } // namespace webrtc 60 | 61 | #endif // MODULES_AUDIO_PROCESSING_NS_SIGNAL_MODEL_ESTIMATOR_H_ 62 | -------------------------------------------------------------------------------- /ns/sinc_resampler.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Modified from the Chromium original: 12 | // src/media/base/sinc_resampler.cc 13 | 14 | // Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_ 15 | // and r4_ will move after the first load): 16 | // 17 | // |----------------|-----------------------------------------|----------------| 18 | // 19 | // request_frames_ 20 | // <---------------------------------------------------------> 21 | // r0_ (during first load) 22 | // 23 | // kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 24 | // <---------------> <---------------> <---------------> <---------------> 25 | // r1_ r2_ r3_ r4_ 26 | // 27 | // block_size_ == r4_ - r2_ 28 | // <---------------------------------------> 29 | // 30 | // request_frames_ 31 | // <------------------ ... -----------------> 32 | // r0_ (during second load) 33 | // 34 | // On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_ 35 | // and block_size_ are reinitialized via step (3) in the algorithm below. 36 | // 37 | // These new regions remain constant until a Flush() occurs. While complicated, 38 | // this allows us to reduce jitter by always requesting the same amount from the 39 | // provided callback. 40 | // 41 | // The algorithm: 42 | // 43 | // 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures 44 | // there's enough room to read request_frames_ from the callback into region 45 | // r0_ (which will move between the first and subsequent passes). 46 | // 47 | // 2) Let r1_, r2_ each represent half the kernel centered around r0_: 48 | // 49 | // r0_ = input_buffer_ + kKernelSize / 2 50 | // r1_ = input_buffer_ 51 | // r2_ = r0_ 52 | // 53 | // r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in 54 | // size. r1_ must be zero initialized to avoid convolution with garbage (see 55 | // step (5) for why). 56 | // 57 | // 3) Let r3_, r4_ each represent half the kernel right aligned with the end of 58 | // r0_ and choose block_size_ as the distance in frames between r4_ and r2_: 59 | // 60 | // r3_ = r0_ + request_frames_ - kKernelSize 61 | // r4_ = r0_ + request_frames_ - kKernelSize / 2 62 | // block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2 63 | // 64 | // 4) Consume request_frames_ frames into r0_. 65 | // 66 | // 5) Position kernel centered at start of r2_ and generate output frames until 67 | // the kernel is centered at the start of r4_ or we've finished generating 68 | // all the output frames. 69 | // 70 | // 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_. 71 | // 72 | // 7) If we're on the second load, in order to avoid overwriting the frames we 73 | // just wrapped from r4_ we need to slide r0_ to the right by the size of 74 | // r4_, which is kKernelSize / 2: 75 | // 76 | // r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize 77 | // 78 | // r3_, r4_, and block_size_ then need to be reinitialized, so goto (3). 79 | // 80 | // 8) Else, if we're not on the second load, goto (4). 81 | // 82 | // Note: we're glossing over how the sub-sample handling works with 83 | // |virtual_source_idx_|, etc. 84 | 85 | // MSVC++ requires this to be set before any other includes to get M_PI. 86 | #define _USE_MATH_DEFINES 87 | 88 | #include "sinc_resampler.h" 89 | 90 | #include 91 | #include 92 | #include 93 | 94 | #include 95 | 96 | #include "checks.h" 97 | 98 | namespace webrtc { 99 | 100 | namespace { 101 | 102 | double SincScaleFactor(double io_ratio) { 103 | // |sinc_scale_factor| is basically the normalized cutoff frequency of the 104 | // low-pass filter. 105 | double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0; 106 | 107 | // The sinc function is an idealized brick-wall filter, but since we're 108 | // windowing it the transition from pass to stop does not happen right away. 109 | // So we should adjust the low pass filter cutoff slightly downward to avoid 110 | // some aliasing at the very high-end. 111 | // TODO(crogers): this value is empirical and to be more exact should vary 112 | // depending on kKernelSize. 113 | sinc_scale_factor *= 0.9; 114 | 115 | return sinc_scale_factor; 116 | } 117 | 118 | } // namespace 119 | 120 | const size_t SincResampler::kKernelSize; 121 | 122 | // If we know the minimum architecture at compile time, avoid CPU detection. 123 | #if defined(WEBRTC_ARCH_X86_FAMILY) 124 | #if defined(__SSE2__) 125 | #define CONVOLVE_FUNC Convolve_SSE 126 | void SincResampler::InitializeCPUSpecificFeatures() {} 127 | #else 128 | // x86 CPU detection required. Function will be set by 129 | // InitializeCPUSpecificFeatures(). 130 | // TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed. 131 | #define CONVOLVE_FUNC convolve_proc_ 132 | 133 | void SincResampler::InitializeCPUSpecificFeatures() { 134 | convolve_proc_ = WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C; 135 | } 136 | #endif 137 | #elif defined(WEBRTC_HAS_NEON) 138 | #define CONVOLVE_FUNC Convolve_NEON 139 | void SincResampler::InitializeCPUSpecificFeatures() {} 140 | #else 141 | // Unknown architecture. 142 | #define CONVOLVE_FUNC Convolve_C 143 | 144 | void SincResampler::InitializeCPUSpecificFeatures() {} 145 | 146 | #endif 147 | 148 | SincResampler::SincResampler(double io_sample_rate_ratio, 149 | size_t request_frames, 150 | SincResamplerCallback *read_cb) 151 | : io_sample_rate_ratio_(io_sample_rate_ratio), 152 | read_cb_(read_cb), 153 | request_frames_(request_frames), 154 | input_buffer_size_(request_frames_ + kKernelSize), 155 | // Create input buffers with a 16-byte alignment for SSE optimizations. 156 | kernel_storage_(static_cast( 157 | AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), 158 | kernel_pre_sinc_storage_(static_cast( 159 | AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), 160 | kernel_window_storage_(static_cast( 161 | AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), 162 | input_buffer_(static_cast( 163 | AlignedMalloc(sizeof(float) * input_buffer_size_, 16))), 164 | #if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__) 165 | convolve_proc_(nullptr), 166 | #endif 167 | r1_(input_buffer_.get()), 168 | r2_(input_buffer_.get() + kKernelSize / 2) { 169 | #if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__) 170 | InitializeCPUSpecificFeatures(); 171 | RTC_DCHECK(convolve_proc_); 172 | #endif 173 | RTC_DCHECK_GT(request_frames_, 0); 174 | Flush(); 175 | RTC_DCHECK_GT(block_size_, kKernelSize); 176 | 177 | memset(kernel_storage_.get(), 0, 178 | sizeof(*kernel_storage_.get()) * kKernelStorageSize); 179 | memset(kernel_pre_sinc_storage_.get(), 0, 180 | sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize); 181 | memset(kernel_window_storage_.get(), 0, 182 | sizeof(*kernel_window_storage_.get()) * kKernelStorageSize); 183 | 184 | InitializeKernel(); 185 | } 186 | 187 | SincResampler::~SincResampler() {} 188 | 189 | void SincResampler::UpdateRegions(bool second_load) { 190 | // Setup various region pointers in the buffer (see diagram above). If we're 191 | // on the second load we need to slide r0_ to the right by kKernelSize / 2. 192 | r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2); 193 | r3_ = r0_ + request_frames_ - kKernelSize; 194 | r4_ = r0_ + request_frames_ - kKernelSize / 2; 195 | block_size_ = r4_ - r2_; 196 | 197 | // r1_ at the beginning of the buffer. 198 | RTC_DCHECK_EQ(r1_, input_buffer_.get()); 199 | // r1_ left of r2_, r4_ left of r3_ and size correct. 200 | RTC_DCHECK_EQ(r2_ - r1_, r4_ - r3_); 201 | // r2_ left of r3. 202 | RTC_DCHECK_LT(r2_, r3_); 203 | } 204 | 205 | void SincResampler::InitializeKernel() { 206 | // Blackman window parameters. 207 | static const double kAlpha = 0.16; 208 | static const double kA0 = 0.5 * (1.0 - kAlpha); 209 | static const double kA1 = 0.5; 210 | static const double kA2 = 0.5 * kAlpha; 211 | 212 | // Generates a set of windowed sinc() kernels. 213 | // We generate a range of sub-sample offsets from 0.0 to 1.0. 214 | const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); 215 | for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { 216 | const float subsample_offset = 217 | static_cast(offset_idx) / kKernelOffsetCount; 218 | 219 | for (size_t i = 0; i < kKernelSize; ++i) { 220 | const size_t idx = i + offset_idx * kKernelSize; 221 | const float pre_sinc = static_cast( 222 | M_PI * (static_cast(i) - static_cast(kKernelSize / 2) - 223 | subsample_offset)); 224 | kernel_pre_sinc_storage_[idx] = pre_sinc; 225 | 226 | // Compute Blackman window, matching the offset of the sinc(). 227 | const float x = (i - subsample_offset) / kKernelSize; 228 | const float window = static_cast(kA0 - kA1 * cos(2.0 * M_PI * x) + 229 | kA2 * cos(4.0 * M_PI * x)); 230 | kernel_window_storage_[idx] = window; 231 | 232 | // Compute the sinc with offset, then window the sinc() function and store 233 | // at the correct offset. 234 | kernel_storage_[idx] = static_cast( 235 | window * ((pre_sinc == 0) 236 | ? sinc_scale_factor 237 | : (sin(sinc_scale_factor * pre_sinc) / pre_sinc))); 238 | } 239 | } 240 | } 241 | 242 | void SincResampler::SetRatio(double io_sample_rate_ratio) { 243 | if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) < 244 | std::numeric_limits::epsilon()) { 245 | return; 246 | } 247 | 248 | io_sample_rate_ratio_ = io_sample_rate_ratio; 249 | 250 | // Optimize reinitialization by reusing values which are independent of 251 | // |sinc_scale_factor|. Provides a 3x speedup. 252 | const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); 253 | for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { 254 | for (size_t i = 0; i < kKernelSize; ++i) { 255 | const size_t idx = i + offset_idx * kKernelSize; 256 | const float window = kernel_window_storage_[idx]; 257 | const float pre_sinc = kernel_pre_sinc_storage_[idx]; 258 | 259 | kernel_storage_[idx] = static_cast( 260 | window * ((pre_sinc == 0) 261 | ? sinc_scale_factor 262 | : (sin(sinc_scale_factor * pre_sinc) / pre_sinc))); 263 | } 264 | } 265 | } 266 | 267 | void SincResampler::Resample(size_t frames, float *destination) { 268 | size_t remaining_frames = frames; 269 | 270 | // Step (1) -- Prime the input buffer at the start of the input stream. 271 | if (!buffer_primed_ && remaining_frames) { 272 | read_cb_->Run(request_frames_, r0_); 273 | buffer_primed_ = true; 274 | } 275 | 276 | // Step (2) -- Resample! const what we can outside of the loop for speed. It 277 | // actually has an impact on ARM performance. See inner loop comment below. 278 | const double current_io_ratio = io_sample_rate_ratio_; 279 | const float *const kernel_ptr = kernel_storage_.get(); 280 | while (remaining_frames) { 281 | // |i| may be negative if the last Resample() call ended on an iteration 282 | // that put |virtual_source_idx_| over the limit. 283 | // 284 | // Note: The loop construct here can severely impact performance on ARM 285 | // or when built with clang. See https://codereview.chromium.org/18566009/ 286 | for (int i = static_cast( 287 | ceil((block_size_ - virtual_source_idx_) / current_io_ratio)); 288 | i > 0; --i) { 289 | RTC_DCHECK_LT(virtual_source_idx_, block_size_); 290 | 291 | // |virtual_source_idx_| lies in between two kernel offsets so figure out 292 | // what they are. 293 | const int source_idx = static_cast(virtual_source_idx_); 294 | const double subsample_remainder = virtual_source_idx_ - source_idx; 295 | 296 | const double virtual_offset_idx = 297 | subsample_remainder * kKernelOffsetCount; 298 | const int offset_idx = static_cast(virtual_offset_idx); 299 | 300 | // We'll compute "convolutions" for the two kernels which straddle 301 | // |virtual_source_idx_|. 302 | const float *const k1 = kernel_ptr + offset_idx * kKernelSize; 303 | const float *const k2 = k1 + kKernelSize; 304 | 305 | // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be 306 | // true so long as kKernelSize is a multiple of 16. 307 | RTC_DCHECK_EQ(0, reinterpret_cast(k1) % 16); 308 | RTC_DCHECK_EQ(0, reinterpret_cast(k2) % 16); 309 | 310 | // Initialize input pointer based on quantized |virtual_source_idx_|. 311 | const float *const input_ptr = r1_ + source_idx; 312 | 313 | // Figure out how much to weight each kernel's "convolution". 314 | const double kernel_interpolation_factor = 315 | virtual_offset_idx - offset_idx; 316 | *destination++ = 317 | CONVOLVE_FUNC(input_ptr, k1, k2, kernel_interpolation_factor); 318 | 319 | // Advance the virtual index. 320 | virtual_source_idx_ += current_io_ratio; 321 | 322 | if (!--remaining_frames) 323 | return; 324 | } 325 | 326 | // Wrap back around to the start. 327 | virtual_source_idx_ -= block_size_; 328 | 329 | // Step (3) -- Copy r3_, r4_ to r1_, r2_. 330 | // This wraps the last input frames back to the start of the buffer. 331 | memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize); 332 | 333 | // Step (4) -- Reinitialize regions if necessary. 334 | if (r0_ == r2_) 335 | UpdateRegions(true); 336 | 337 | // Step (5) -- Refresh the buffer with more input. 338 | read_cb_->Run(request_frames_, r0_); 339 | } 340 | } 341 | 342 | #undef CONVOLVE_FUNC 343 | 344 | size_t SincResampler::ChunkSize() const { 345 | return static_cast(block_size_ / io_sample_rate_ratio_); 346 | } 347 | 348 | void SincResampler::Flush() { 349 | virtual_source_idx_ = 0; 350 | buffer_primed_ = false; 351 | memset(input_buffer_.get(), 0, 352 | sizeof(*input_buffer_.get()) * input_buffer_size_); 353 | UpdateRegions(false); 354 | } 355 | 356 | float SincResampler::Convolve_C(const float *input_ptr, 357 | const float *k1, 358 | const float *k2, 359 | double kernel_interpolation_factor) { 360 | float sum1 = 0; 361 | float sum2 = 0; 362 | 363 | // Generate a single output sample. Unrolling this loop hurt performance in 364 | // local testing. 365 | size_t n = kKernelSize; 366 | while (n--) { 367 | sum1 += *input_ptr * *k1++; 368 | sum2 += *input_ptr++ * *k2++; 369 | } 370 | 371 | // Linearly interpolate the two "convolutions". 372 | return static_cast((1.0 - kernel_interpolation_factor) * sum1 + 373 | kernel_interpolation_factor * sum2); 374 | } 375 | 376 | } // namespace webrtc 377 | -------------------------------------------------------------------------------- /ns/sinc_resampler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // Modified from the Chromium original here: 12 | // src/media/base/sinc_resampler.h 13 | 14 | #ifndef COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ 15 | #define COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ 16 | 17 | #include 18 | 19 | #include 20 | 21 | #include "constructor_magic.h" 22 | #include "gtest_prod_util.h" 23 | #include "aligned_malloc.h" 24 | 25 | namespace webrtc { 26 | 27 | // Callback class for providing more data into the resampler. Expects |frames| 28 | // of data to be rendered into |destination|; zero padded if not enough frames 29 | // are available to satisfy the request. 30 | class SincResamplerCallback { 31 | public: 32 | virtual ~SincResamplerCallback() = default; 33 | 34 | virtual void Run(size_t frames, float *destination) = 0; 35 | }; 36 | 37 | // SincResampler is a high-quality single-channel sample-rate converter. 38 | class SincResampler { 39 | public: 40 | // The kernel size can be adjusted for quality (higher is better) at the 41 | // expense of performance. Must be a multiple of 32. 42 | // TODO(dalecurtis): Test performance to see if we can jack this up to 64+. 43 | static const size_t kKernelSize = 32; 44 | 45 | // Default request size. Affects how often and for how much SincResampler 46 | // calls back for input. Must be greater than kKernelSize. 47 | static const size_t kDefaultRequestSize = 512; 48 | 49 | // The kernel offset count is used for interpolation and is the number of 50 | // sub-sample kernel shifts. Can be adjusted for quality (higher is better) 51 | // at the expense of allocating more memory. 52 | static const size_t kKernelOffsetCount = 32; 53 | static const size_t kKernelStorageSize = 54 | kKernelSize * (kKernelOffsetCount + 1); 55 | 56 | // Constructs a SincResampler with the specified |read_cb|, which is used to 57 | // acquire audio data for resampling. |io_sample_rate_ratio| is the ratio 58 | // of input / output sample rates. |request_frames| controls the size in 59 | // frames of the buffer requested by each |read_cb| call. The value must be 60 | // greater than kKernelSize. Specify kDefaultRequestSize if there are no 61 | // request size constraints. 62 | SincResampler(double io_sample_rate_ratio, 63 | size_t request_frames, 64 | SincResamplerCallback *read_cb); 65 | 66 | virtual ~SincResampler(); 67 | 68 | // Resample |frames| of data from |read_cb_| into |destination|. 69 | void Resample(size_t frames, float *destination); 70 | 71 | // The maximum size in frames that guarantees Resample() will only make a 72 | // single call to |read_cb_| for more data. 73 | size_t ChunkSize() const; 74 | 75 | size_t request_frames() const { return request_frames_; } 76 | 77 | // Flush all buffered data and reset internal indices. Not thread safe, do 78 | // not call while Resample() is in progress. 79 | void Flush(); 80 | 81 | // Update |io_sample_rate_ratio_|. SetRatio() will cause a reconstruction of 82 | // the kernels used for resampling. Not thread safe, do not call while 83 | // Resample() is in progress. 84 | // 85 | // TODO(ajm): Use this in PushSincResampler rather than reconstructing 86 | // SincResampler. We would also need a way to update |request_frames_|. 87 | void SetRatio(double io_sample_rate_ratio); 88 | 89 | float *get_kernel_for_testing() { return kernel_storage_.get(); } 90 | 91 | private: 92 | FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve); 93 | 94 | FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark); 95 | 96 | void InitializeKernel(); 97 | 98 | void UpdateRegions(bool second_load); 99 | 100 | // Selects runtime specific CPU features like SSE. Must be called before 101 | // using SincResampler. 102 | // TODO(ajm): Currently managed by the class internally. See the note with 103 | // |convolve_proc_| below. 104 | void InitializeCPUSpecificFeatures(); 105 | 106 | // Compute convolution of |k1| and |k2| over |input_ptr|, resultant sums are 107 | // linearly interpolated using |kernel_interpolation_factor|. On x86 and ARM 108 | // the underlying implementation is chosen at run time. 109 | static float Convolve_C(const float *input_ptr, 110 | const float *k1, 111 | const float *k2, 112 | double kernel_interpolation_factor); 113 | 114 | #if defined(WEBRTC_ARCH_X86_FAMILY) 115 | static float Convolve_SSE(const float* input_ptr, 116 | const float* k1, 117 | const float* k2, 118 | double kernel_interpolation_factor); 119 | #elif defined(WEBRTC_HAS_NEON) 120 | static float Convolve_NEON(const float* input_ptr, 121 | const float* k1, 122 | const float* k2, 123 | double kernel_interpolation_factor); 124 | #endif 125 | 126 | // The ratio of input / output sample rates. 127 | double io_sample_rate_ratio_; 128 | 129 | // An index on the source input buffer with sub-sample precision. It must be 130 | // double precision to avoid drift. 131 | double virtual_source_idx_{}; 132 | 133 | // The buffer is primed once at the very beginning of processing. 134 | bool buffer_primed_{}; 135 | 136 | // Source of data for resampling. 137 | SincResamplerCallback *read_cb_; 138 | 139 | // The size (in samples) to request from each |read_cb_| execution. 140 | const size_t request_frames_; 141 | 142 | // The number of source frames processed per pass. 143 | size_t block_size_{}; 144 | 145 | // The size (in samples) of the internal buffer used by the resampler. 146 | const size_t input_buffer_size_; 147 | 148 | // Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize. 149 | // The kernel offsets are sub-sample shifts of a windowed sinc shifted from 150 | // 0.0 to 1.0 sample. 151 | std::unique_ptr kernel_storage_; 152 | std::unique_ptr kernel_pre_sinc_storage_; 153 | std::unique_ptr kernel_window_storage_; 154 | 155 | // Data from the source is copied into this buffer for each processing pass. 156 | std::unique_ptr input_buffer_; 157 | 158 | // Stores the runtime selection of which Convolve function to use. 159 | // TODO(ajm): Move to using a global static which must only be initialized 160 | // once by the user. We're not doing this initially, because we don't have 161 | // e.g. a LazyInstance helper in webrtc. 162 | #if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__) 163 | typedef float (*ConvolveProc)(const float*, 164 | const float*, 165 | const float*, 166 | double); 167 | ConvolveProc convolve_proc_; 168 | #endif 169 | 170 | // Pointers to the various regions inside |input_buffer_|. See the diagram at 171 | // the top of the .cc file for more information. 172 | float *r0_{}; 173 | float *const r1_; 174 | float *const r2_; 175 | float *r3_{}; 176 | float *r4_{}; 177 | 178 | RTC_DISALLOW_COPY_AND_ASSIGN(SincResampler); 179 | }; 180 | 181 | } // namespace webrtc 182 | 183 | #endif // COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ 184 | -------------------------------------------------------------------------------- /ns/speech_probability_estimator.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "speech_probability_estimator.h" 12 | 13 | #include 14 | #include 15 | 16 | #include "fast_math.h" 17 | #include "checks.h" 18 | 19 | namespace webrtc { 20 | 21 | SpeechProbabilityEstimator::SpeechProbabilityEstimator() { 22 | speech_probability_.fill(0.f); 23 | } 24 | 25 | void SpeechProbabilityEstimator::Update( 26 | int32_t num_analyzed_frames, 27 | rtc::ArrayView prior_snr, 28 | rtc::ArrayView post_snr, 29 | rtc::ArrayView conservative_noise_spectrum, 30 | rtc::ArrayView signal_spectrum, 31 | float signal_spectral_sum, 32 | float signal_energy) { 33 | // Update models. 34 | if (num_analyzed_frames < kLongStartupPhaseBlocks) { 35 | signal_model_estimator_.AdjustNormalization(num_analyzed_frames, 36 | signal_energy); 37 | } 38 | signal_model_estimator_.Update(prior_snr, post_snr, 39 | conservative_noise_spectrum, signal_spectrum, 40 | signal_spectral_sum, signal_energy); 41 | 42 | const SignalModel &model = signal_model_estimator_.get_model(); 43 | const PriorSignalModel &prior_model = 44 | signal_model_estimator_.get_prior_model(); 45 | 46 | // Width parameter in sigmoid map for prior model. 47 | constexpr float kWidthPrior0 = 4.f; 48 | // Width for pause region: lower range, so increase width in tanh map. 49 | constexpr float kWidthPrior1 = 2.f * kWidthPrior0; 50 | 51 | // Average LRT feature: use larger width in tanh map for pause regions. 52 | float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0; 53 | 54 | // Compute indicator function: sigmoid map. 55 | float indicator0 = 56 | 0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f); 57 | 58 | // Spectral flatness feature: use larger width in tanh map for pause regions. 59 | width_prior = model.spectral_flatness > prior_model.flatness_threshold 60 | ? kWidthPrior1 61 | : kWidthPrior0; 62 | 63 | // Compute indicator function: sigmoid map. 64 | float indicator1 = 65 | 0.5f * (tanh(1.f * width_prior * 66 | (prior_model.flatness_threshold - model.spectral_flatness)) + 67 | 1.f); 68 | 69 | // For template spectrum-difference : use larger width in tanh map for pause 70 | // regions. 71 | width_prior = model.spectral_diff < prior_model.template_diff_threshold 72 | ? kWidthPrior1 73 | : kWidthPrior0; 74 | 75 | // Compute indicator function: sigmoid map. 76 | float indicator2 = 77 | 0.5f * (tanh(width_prior * (model.spectral_diff - 78 | prior_model.template_diff_threshold)) + 79 | 1.f); 80 | 81 | // Combine the indicator function with the feature weights. 82 | float ind_prior = prior_model.lrt_weighting * indicator0 + 83 | prior_model.flatness_weighting * indicator1 + 84 | prior_model.difference_weighting * indicator2; 85 | 86 | // Compute the prior probability. 87 | prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_); 88 | 89 | // Make sure probabilities are within range: keep floor to 0.01. 90 | prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f); 91 | 92 | // Final speech probability: combine prior model with LR factor:. 93 | float gain_prior = 94 | (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f); 95 | 96 | std::array inv_lrt{}; 97 | ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt); 98 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 99 | speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]); 100 | } 101 | } 102 | 103 | } // namespace webrtc 104 | -------------------------------------------------------------------------------- /ns/speech_probability_estimator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "ns_common.h" 18 | #include "signal_model_estimator.h" 19 | 20 | namespace webrtc { 21 | 22 | // Class for estimating the probability of speech. 23 | class SpeechProbabilityEstimator { 24 | public: 25 | SpeechProbabilityEstimator(); 26 | 27 | SpeechProbabilityEstimator(const SpeechProbabilityEstimator &) = delete; 28 | 29 | SpeechProbabilityEstimator &operator=(const SpeechProbabilityEstimator &) = 30 | delete; 31 | 32 | // Compute speech probability. 33 | void Update( 34 | int32_t num_analyzed_frames, 35 | rtc::ArrayView prior_snr, 36 | rtc::ArrayView post_snr, 37 | rtc::ArrayView conservative_noise_spectrum, 38 | rtc::ArrayView signal_spectrum, 39 | float signal_spectral_sum, 40 | float signal_energy); 41 | 42 | float get_prior_probability() const { return prior_speech_prob_; } 43 | 44 | rtc::ArrayView get_probability() { return speech_probability_; } 45 | 46 | private: 47 | SignalModelEstimator signal_model_estimator_; 48 | float prior_speech_prob_ = .5f; 49 | std::array speech_probability_{}; 50 | }; 51 | 52 | } // namespace webrtc 53 | 54 | #endif // MODULES_AUDIO_PROCESSING_NS_SPEECH_PROBABILITY_ESTIMATOR_H_ 55 | -------------------------------------------------------------------------------- /ns/splitting_filter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | /* 12 | * This file contains the splitting filter functions. 13 | * 14 | */ 15 | 16 | #include "checks.h" 17 | #include 18 | // C + the 32 most significant bits of A * B 19 | #define WEBRTC_SPL_SCALEDIFF32(A, B, C) \ 20 | (C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16)) 21 | 22 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { 23 | int16_t out16 = (int16_t) value32; 24 | 25 | if (value32 > 32767) 26 | out16 = 32767; 27 | else if (value32 < -32768) 28 | out16 = -32768; 29 | 30 | return out16; 31 | } 32 | 33 | 34 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) { 35 | // Do the subtraction in unsigned numbers, since signed overflow is undefined 36 | // behavior. 37 | const int32_t diff = (int32_t) ((uint32_t) a - (uint32_t) b); 38 | 39 | // a - b can't overflow if a and b have the same sign. If they have different 40 | // signs, a - b has the same sign as a iff it didn't overflow. 41 | if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) { 42 | // The direction of the overflow is obvious from the sign of a - b. 43 | return diff < 0 ? INT32_MAX : INT32_MIN; 44 | } 45 | return diff; 46 | } 47 | 48 | 49 | // Maximum number of samples in a low/high-band frame. 50 | enum { 51 | kMaxBandFrameLength = 320 // 10 ms at 64 kHz. 52 | }; 53 | 54 | // QMF filter coefficients in Q16. 55 | static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261}; 56 | static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010}; 57 | 58 | /////////////////////////////////////////////////////////////////////////////////////////////// 59 | // WebRtcSpl_AllPassQMF(...) 60 | // 61 | // Allpass filter used by the analysis and synthesis parts of the QMF filter. 62 | // 63 | // Input: 64 | // - in_data : Input data sequence (Q10) 65 | // - data_length : Length of data sequence (>2) 66 | // - filter_coefficients : Filter coefficients (length 3, Q16) 67 | // 68 | // Input & Output: 69 | // - filter_state : Filter state (length 6, Q10). 70 | // 71 | // Output: 72 | // - out_data : Output data sequence (Q10), length equal to 73 | // |data_length| 74 | // 75 | 76 | void WebRtcSpl_AllPassQMF(int32_t *in_data, size_t data_length, 77 | int32_t *out_data, const uint16_t *filter_coefficients, 78 | int32_t *filter_state) { 79 | // The procedure is to filter the input with three first order all pass filters 80 | // (cascade operations). 81 | // 82 | // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1 83 | // y[n] = ----------- ----------- ----------- x[n] 84 | // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1 85 | // 86 | // The input vector |filter_coefficients| includes these three filter coefficients. 87 | // The filter state contains the in_data state, in_data[-1], followed by 88 | // the out_data state, out_data[-1]. This is repeated for each cascade. 89 | // The first cascade filter will filter the |in_data| and store the output in 90 | // |out_data|. The second will the take the |out_data| as input and make an 91 | // intermediate storage in |in_data|, to save memory. The third, and final, cascade 92 | // filter operation takes the |in_data| (which is the output from the previous cascade 93 | // filter) and store the output in |out_data|. 94 | // Note that the input vector values are changed during the process. 95 | size_t k; 96 | int32_t diff; 97 | // First all-pass cascade; filter from in_data to out_data. 98 | 99 | // Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at 100 | // vector position n. Then the final output will be y[n] = y_3[n] 101 | 102 | // First loop, use the states stored in memory. 103 | // "diff" should be safe from wrap around since max values are 2^25 104 | // diff = (x[0] - y_1[-1]) 105 | diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]); 106 | // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1]) 107 | out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]); 108 | 109 | // For the remaining loops, use previous values. 110 | for (k = 1; k < data_length; k++) { 111 | // diff = (x[n] - y_1[n-1]) 112 | diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); 113 | // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1]) 114 | out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]); 115 | } 116 | 117 | // Update states. 118 | filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time 119 | filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time 120 | 121 | // Second all-pass cascade; filter from out_data to in_data. 122 | // diff = (y_1[0] - y_2[-1]) 123 | diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]); 124 | // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) 125 | in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]); 126 | for (k = 1; k < data_length; k++) { 127 | // diff = (y_1[n] - y_2[n-1]) 128 | diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]); 129 | // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) 130 | in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k - 1]); 131 | } 132 | 133 | filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time 134 | filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time 135 | 136 | // Third all-pass cascade; filter from in_data to out_data. 137 | // diff = (y_2[0] - y[-1]) 138 | diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]); 139 | // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1]) 140 | out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]); 141 | for (k = 1; k < data_length; k++) { 142 | // diff = (y_2[n] - y[n-1]) 143 | diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); 144 | // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1]) 145 | out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k - 1]); 146 | } 147 | filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time 148 | filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time 149 | } 150 | 151 | void WebRtcSpl_AnalysisQMF(const int16_t *in_data, size_t in_data_length, 152 | int16_t *low_band, int16_t *high_band, 153 | int32_t *filter_state1, int32_t *filter_state2) { 154 | size_t i; 155 | int16_t k; 156 | int32_t tmp; 157 | int32_t half_in1[kMaxBandFrameLength]; 158 | int32_t half_in2[kMaxBandFrameLength]; 159 | int32_t filter1[kMaxBandFrameLength]; 160 | int32_t filter2[kMaxBandFrameLength]; 161 | const size_t band_length = in_data_length / 2; 162 | RTC_DCHECK_EQ(0, in_data_length % 2); 163 | RTC_DCHECK_LE(band_length, kMaxBandFrameLength); 164 | 165 | // Split even and odd samples. Also shift them to Q10. 166 | for (i = 0, k = 0; i < band_length; i++, k += 2) { 167 | half_in2[i] = ((int32_t) in_data[k]) * (1 << 10); 168 | half_in1[i] = ((int32_t) in_data[k + 1]) * (1 << 10); 169 | } 170 | 171 | // All pass filter even and odd samples, independently. 172 | WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, 173 | WebRtcSpl_kAllPassFilter1, filter_state1); 174 | WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, 175 | WebRtcSpl_kAllPassFilter2, filter_state2); 176 | 177 | // Take the sum and difference of filtered version of odd and even 178 | // branches to get upper & lower band. 179 | for (i = 0; i < band_length; i++) { 180 | tmp = (filter1[i] + filter2[i] + 1024) >> 11; 181 | low_band[i] = WebRtcSpl_SatW32ToW16(tmp); 182 | 183 | tmp = (filter1[i] - filter2[i] + 1024) >> 11; 184 | high_band[i] = WebRtcSpl_SatW32ToW16(tmp); 185 | } 186 | } 187 | 188 | 189 | void WebRtcSpl_SynthesisQMF(const int16_t *low_band, const int16_t *high_band, 190 | size_t band_length, int16_t *out_data, 191 | int32_t *filter_state1, int32_t *filter_state2) { 192 | int32_t tmp; 193 | int32_t half_in1[kMaxBandFrameLength]; 194 | int32_t half_in2[kMaxBandFrameLength]; 195 | int32_t filter1[kMaxBandFrameLength]; 196 | int32_t filter2[kMaxBandFrameLength]; 197 | size_t i; 198 | int16_t k; 199 | RTC_DCHECK_LE(band_length, kMaxBandFrameLength); 200 | 201 | // Obtain the sum and difference channels out of upper and lower-band channels. 202 | // Also shift to Q10 domain. 203 | for (i = 0; i < band_length; i++) { 204 | tmp = (int32_t) low_band[i] + (int32_t) high_band[i]; 205 | half_in1[i] = tmp * (1 << 10); 206 | tmp = (int32_t) low_band[i] - (int32_t) high_band[i]; 207 | half_in2[i] = tmp * (1 << 10); 208 | } 209 | 210 | // all-pass filter the sum and difference channels 211 | WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, 212 | WebRtcSpl_kAllPassFilter2, filter_state1); 213 | WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, 214 | WebRtcSpl_kAllPassFilter1, filter_state2); 215 | 216 | // The filtered signals are even and odd samples of the output. Combine 217 | // them. The signals are Q10 should shift them back to Q0 and take care of 218 | // saturation. 219 | for (i = 0, k = 0; i < band_length; i++) { 220 | tmp = (filter2[i] + 512) >> 10; 221 | out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); 222 | 223 | tmp = (filter1[i] + 512) >> 10; 224 | out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); 225 | } 226 | 227 | } 228 | -------------------------------------------------------------------------------- /ns/splitting_filter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "splitting_filter.h" 12 | 13 | #include 14 | 15 | #include "array_view.h" 16 | #include "channel_buffer.h" 17 | #include "checks.h" 18 | 19 | namespace webrtc { 20 | namespace { 21 | 22 | constexpr size_t kSamplesPerBand = 160; 23 | constexpr size_t kTwoBandFilterSamplesPerFrame = 320; 24 | 25 | } // namespace 26 | 27 | SplittingFilter::SplittingFilter(size_t num_channels, 28 | size_t num_bands, 29 | size_t num_frames) 30 | : num_bands_(num_bands), 31 | two_bands_states_(num_bands_ == 2 ? num_channels : 0), 32 | three_band_filter_banks_(num_bands_ == 3 ? num_channels : 0) { 33 | RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); 34 | } 35 | 36 | SplittingFilter::~SplittingFilter() = default; 37 | 38 | void SplittingFilter::Analysis(const ChannelBuffer *data, 39 | ChannelBuffer *bands) { 40 | RTC_DCHECK_EQ(num_bands_, bands->num_bands()); 41 | RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); 42 | RTC_DCHECK_EQ(data->num_frames(), 43 | bands->num_frames_per_band() * bands->num_bands()); 44 | if (bands->num_bands() == 2) { 45 | TwoBandsAnalysis(data, bands); 46 | } else if (bands->num_bands() == 3) { 47 | ThreeBandsAnalysis(data, bands); 48 | } 49 | } 50 | 51 | void SplittingFilter::Synthesis(const ChannelBuffer *bands, 52 | ChannelBuffer *data) { 53 | RTC_DCHECK_EQ(num_bands_, bands->num_bands()); 54 | RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); 55 | RTC_DCHECK_EQ(data->num_frames(), 56 | bands->num_frames_per_band() * bands->num_bands()); 57 | if (bands->num_bands() == 2) { 58 | TwoBandsSynthesis(bands, data); 59 | } else if (bands->num_bands() == 3) { 60 | ThreeBandsSynthesis(bands, data); 61 | } 62 | } 63 | 64 | void SplittingFilter::TwoBandsAnalysis(const ChannelBuffer *data, 65 | ChannelBuffer *bands) { 66 | RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); 67 | RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); 68 | 69 | for (size_t i = 0; i < two_bands_states_.size(); ++i) { 70 | std::array, 2> bands16{}; 71 | std::array full_band16{}; 72 | FloatS16ToS16(data->channels(0)[i], full_band16.size(), full_band16.data()); 73 | WebRtcSpl_AnalysisQMF(full_band16.data(), data->num_frames(), 74 | bands16[0].data(), bands16[1].data(), 75 | two_bands_states_[i].analysis_state1, 76 | two_bands_states_[i].analysis_state2); 77 | S16ToFloatS16(bands16[0].data(), bands16[0].size(), bands->channels(0)[i]); 78 | S16ToFloatS16(bands16[1].data(), bands16[1].size(), bands->channels(1)[i]); 79 | } 80 | } 81 | 82 | void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer *bands, 83 | ChannelBuffer *data) { 84 | RTC_DCHECK_LE(data->num_channels(), two_bands_states_.size()); 85 | RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); 86 | for (size_t i = 0; i < data->num_channels(); ++i) { 87 | std::array, 2> bands16{}; 88 | std::array full_band16{}; 89 | FloatS16ToS16(bands->channels(0)[i], bands16[0].size(), bands16[0].data()); 90 | FloatS16ToS16(bands->channels(1)[i], bands16[1].size(), bands16[1].data()); 91 | WebRtcSpl_SynthesisQMF(bands16[0].data(), bands16[1].data(), 92 | bands->num_frames_per_band(), full_band16.data(), 93 | two_bands_states_[i].synthesis_state1, 94 | two_bands_states_[i].synthesis_state2); 95 | S16ToFloatS16(full_band16.data(), full_band16.size(), data->channels(0)[i]); 96 | } 97 | } 98 | 99 | void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer *data, 100 | ChannelBuffer *bands) { 101 | RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); 102 | RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); 103 | RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); 104 | RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); 105 | RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); 106 | RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); 107 | RTC_DCHECK_EQ(bands->num_frames_per_band(), 108 | ThreeBandFilterBank::kSplitBandSize); 109 | 110 | for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { 111 | three_band_filter_banks_[i].Analysis( 112 | rtc::ArrayView( 113 | data->channels_view()[i].data(), 114 | ThreeBandFilterBank::kFullBandSize), 115 | rtc::ArrayView, 116 | ThreeBandFilterBank::kNumBands>( 117 | bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands)); 118 | } 119 | } 120 | 121 | void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer *bands, 122 | ChannelBuffer *data) { 123 | RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); 124 | RTC_DCHECK_LE(data->num_channels(), bands->num_channels()); 125 | RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); 126 | RTC_DCHECK_EQ(data->num_frames(), ThreeBandFilterBank::kFullBandSize); 127 | RTC_DCHECK_EQ(bands->num_frames(), ThreeBandFilterBank::kFullBandSize); 128 | RTC_DCHECK_EQ(bands->num_bands(), ThreeBandFilterBank::kNumBands); 129 | RTC_DCHECK_EQ(bands->num_frames_per_band(), 130 | ThreeBandFilterBank::kSplitBandSize); 131 | 132 | for (size_t i = 0; i < data->num_channels(); ++i) { 133 | three_band_filter_banks_[i].Synthesis( 134 | rtc::ArrayView, 135 | ThreeBandFilterBank::kNumBands>( 136 | bands->bands_view(i).data(), ThreeBandFilterBank::kNumBands), 137 | rtc::ArrayView( 138 | data->channels_view()[i].data(), 139 | ThreeBandFilterBank::kFullBandSize)); 140 | } 141 | } 142 | 143 | } // namespace webrtc 144 | -------------------------------------------------------------------------------- /ns/splitting_filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ 12 | #define MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "channel_buffer.h" 19 | #include "three_band_filter_bank.h" 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | void WebRtcSpl_AnalysisQMF(const int16_t *in_data, 26 | size_t in_data_length, 27 | int16_t *low_band, 28 | int16_t *high_band, 29 | int32_t *filter_state1, 30 | int32_t *filter_state2); 31 | void WebRtcSpl_SynthesisQMF(const int16_t *low_band, 32 | const int16_t *high_band, 33 | size_t band_length, 34 | int16_t *out_data, 35 | int32_t *filter_state1, 36 | int32_t *filter_state2); 37 | #ifdef __cplusplus 38 | } 39 | #endif // __cplusplus 40 | namespace webrtc { 41 | 42 | struct TwoBandsStates { 43 | TwoBandsStates() { 44 | memset(analysis_state1, 0, sizeof(analysis_state1)); 45 | memset(analysis_state2, 0, sizeof(analysis_state2)); 46 | memset(synthesis_state1, 0, sizeof(synthesis_state1)); 47 | memset(synthesis_state2, 0, sizeof(synthesis_state2)); 48 | } 49 | 50 | static const int kStateSize = 6; 51 | int analysis_state1[kStateSize]{}; 52 | int analysis_state2[kStateSize]{}; 53 | int synthesis_state1[kStateSize]{}; 54 | int synthesis_state2[kStateSize]{}; 55 | }; 56 | 57 | // Splitting filter which is able to split into and merge from 2 or 3 frequency 58 | // bands. The number of channels needs to be provided at construction time. 59 | // 60 | // For each block, Analysis() is called to split into bands and then Synthesis() 61 | // to merge these bands again. The input and output signals are contained in 62 | // ChannelBuffers and for the different bands an array of ChannelBuffers is 63 | // used. 64 | class SplittingFilter { 65 | public: 66 | SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); 67 | 68 | ~SplittingFilter(); 69 | 70 | void Analysis(const ChannelBuffer *data, ChannelBuffer *bands); 71 | 72 | void Synthesis(const ChannelBuffer *bands, ChannelBuffer *data); 73 | 74 | private: 75 | // Two-band analysis and synthesis work for 640 samples or less. 76 | void TwoBandsAnalysis(const ChannelBuffer *data, 77 | ChannelBuffer *bands); 78 | 79 | void TwoBandsSynthesis(const ChannelBuffer *bands, 80 | ChannelBuffer *data); 81 | 82 | void ThreeBandsAnalysis(const ChannelBuffer *data, 83 | ChannelBuffer *bands); 84 | 85 | void ThreeBandsSynthesis(const ChannelBuffer *bands, 86 | ChannelBuffer *data); 87 | 88 | const size_t num_bands_; 89 | std::vector two_bands_states_; 90 | std::vector three_band_filter_banks_; 91 | }; 92 | 93 | } // namespace webrtc 94 | 95 | #endif // MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ 96 | -------------------------------------------------------------------------------- /ns/suppression_params.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "suppression_params.h" 12 | 13 | #include "checks.h" 14 | 15 | namespace webrtc { 16 | 17 | SuppressionParams::SuppressionParams( 18 | NsConfig::SuppressionLevel suppression_level) { 19 | switch (suppression_level) { 20 | case NsConfig::SuppressionLevel::k6dB: 21 | over_subtraction_factor = 1.f; 22 | // 6 dB attenuation. 23 | minimum_attenuating_gain = 0.5f; 24 | use_attenuation_adjustment = false; 25 | break; 26 | case NsConfig::SuppressionLevel::k12dB: 27 | over_subtraction_factor = 1.f; 28 | // 12 dB attenuation. 29 | minimum_attenuating_gain = 0.25f; 30 | use_attenuation_adjustment = true; 31 | break; 32 | case NsConfig::SuppressionLevel::k18dB: 33 | over_subtraction_factor = 1.1f; 34 | // 18 dB attenuation. 35 | minimum_attenuating_gain = 0.125f; 36 | use_attenuation_adjustment = true; 37 | break; 38 | case NsConfig::SuppressionLevel::k21dB: 39 | over_subtraction_factor = 1.25f; 40 | // 20.9 dB attenuation. 41 | minimum_attenuating_gain = 0.09f; 42 | use_attenuation_adjustment = true; 43 | break; 44 | default: 45 | RTC_NOTREACHED(); 46 | } 47 | } 48 | 49 | } // namespace webrtc 50 | -------------------------------------------------------------------------------- /ns/suppression_params.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ 13 | 14 | #include "ns_config.h" 15 | 16 | namespace webrtc { 17 | 18 | struct SuppressionParams { 19 | explicit SuppressionParams(NsConfig::SuppressionLevel suppression_level); 20 | 21 | SuppressionParams(const SuppressionParams &) = delete; 22 | 23 | SuppressionParams &operator=(const SuppressionParams &) = delete; 24 | 25 | float over_subtraction_factor; 26 | float minimum_attenuating_gain; 27 | bool use_attenuation_adjustment; 28 | }; 29 | 30 | } // namespace webrtc 31 | 32 | #endif // MODULES_AUDIO_PROCESSING_NS_SUPPRESSION_PARAMS_H_ 33 | -------------------------------------------------------------------------------- /ns/three_band_filter_bank.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | // An implementation of a 3-band FIR filter-bank with DCT modulation, similar to 12 | // the proposed in "Multirate Signal Processing for Communication Systems" by 13 | // Fredric J Harris. 14 | // 15 | // The idea is to take a heterodyne system and change the order of the 16 | // components to get something which is efficient to implement digitally. 17 | // 18 | // It is possible to separate the filter using the noble identity as follows: 19 | // 20 | // H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) 21 | // 22 | // This is used in the analysis stage to first downsample serial to parallel 23 | // and then filter each branch with one of these polyphase decompositions of the 24 | // lowpass prototype. Because each filter is only a modulation of the prototype, 25 | // it is enough to multiply each coefficient by the respective cosine value to 26 | // shift it to the desired band. But because the cosine period is 12 samples, 27 | // it requires separating the prototype even further using the noble identity. 28 | // After filtering and modulating for each band, the output of all filters is 29 | // accumulated to get the downsampled bands. 30 | // 31 | // A similar logic can be applied to the synthesis stage. 32 | 33 | #include "three_band_filter_bank.h" 34 | 35 | #include 36 | 37 | #include "checks.h" 38 | 39 | namespace webrtc { 40 | namespace { 41 | 42 | // Factors to take into account when choosing |kFilterSize|: 43 | // 1. Higher |kFilterSize|, means faster transition, which ensures less 44 | // aliasing. This is especially important when there is non-linear 45 | // processing between the splitting and merging. 46 | // 2. The delay that this filter bank introduces is 47 | // |kNumBands| * |kSparsity| * |kFilterSize| / 2, so it increases linearly 48 | // with |kFilterSize|. 49 | // 3. The computation complexity also increases linearly with |kFilterSize|. 50 | 51 | // The Matlab code to generate these |kFilterCoeffs| is: 52 | // 53 | // N = kNumBands * kSparsity * kFilterSize - 1; 54 | // h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); 55 | // reshape(h, kNumBands * kSparsity, kFilterSize); 56 | // 57 | // The code below uses the values of kFilterSize, kNumBands and kSparsity 58 | // specified in the header. 59 | 60 | // Because the total bandwidth of the lower and higher band is double the middle 61 | // one (because of the spectrum parity), the low-pass prototype is half the 62 | // bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation 63 | // to the right places. 64 | // A Kaiser window is used because of its flexibility and the alpha is set to 65 | // 3.5, since that sets a stop band attenuation of 40dB ensuring a fast 66 | // transition. 67 | 68 | constexpr int kSubSampling = ThreeBandFilterBank::kNumBands; 69 | constexpr int kDctSize = ThreeBandFilterBank::kNumBands; 70 | static_assert(ThreeBandFilterBank::kNumBands * 71 | ThreeBandFilterBank::kSplitBandSize == 72 | ThreeBandFilterBank::kFullBandSize, 73 | "The full band must be split in equally sized subbands"); 74 | 75 | const float 76 | kFilterCoeffs[ThreeBandFilterBank::kNumNonZeroFilters][kFilterSize] = { 77 | {-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, 78 | {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, 79 | {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, 80 | {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, 81 | {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, 82 | {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, 83 | {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, 84 | {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, 85 | {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, 86 | {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; 87 | 88 | constexpr int kZeroFilterIndex1 = 3; 89 | constexpr int kZeroFilterIndex2 = 9; 90 | 91 | const float kDctModulation[ThreeBandFilterBank::kNumNonZeroFilters][kDctSize] = 92 | {{2.f, 2.f, 2.f}, 93 | {1.73205077f, 0.f, -1.73205077f}, 94 | {1.f, -2.f, 1.f}, 95 | {-1.f, 2.f, -1.f}, 96 | {-1.73205077f, 0.f, 1.73205077f}, 97 | {-2.f, -2.f, -2.f}, 98 | {-1.73205077f, 0.f, 1.73205077f}, 99 | {-1.f, 2.f, -1.f}, 100 | {1.f, -2.f, 1.f}, 101 | {1.73205077f, 0.f, -1.73205077f}}; 102 | 103 | // Filters the input signal |in| with the filter |filter| using a shift by 104 | // |in_shift|, taking into account the previous state. 105 | void FilterCore( 106 | rtc::ArrayView filter, 107 | rtc::ArrayView in, 108 | const int in_shift, 109 | rtc::ArrayView out, 110 | rtc::ArrayView state) { 111 | constexpr int kMaxInShift = (kStride - 1); 112 | RTC_DCHECK_GE(in_shift, 0); 113 | RTC_DCHECK_LE(in_shift, kMaxInShift); 114 | std::fill(out.begin(), out.end(), 0.f); 115 | 116 | for (int k = 0; k < in_shift; ++k) { 117 | for (int i = 0, j = kMemorySize + k - in_shift; i < kFilterSize; 118 | ++i, j -= kStride) { 119 | out[k] += state[j] * filter[i]; 120 | } 121 | } 122 | 123 | for (int k = in_shift, shift = 0; k < kFilterSize * kStride; ++k, ++shift) { 124 | RTC_DCHECK_GE(shift, 0); 125 | const int loop_limit = std::min(kFilterSize, 1 + (shift >> kStrideLog2)); 126 | for (int i = 0, j = shift; i < loop_limit; ++i, j -= kStride) { 127 | out[k] += in[j] * filter[i]; 128 | } 129 | for (int i = loop_limit, j = kMemorySize + shift - loop_limit * kStride; 130 | i < kFilterSize; ++i, j -= kStride) { 131 | out[k] += state[j] * filter[i]; 132 | } 133 | } 134 | 135 | for (int k = kFilterSize * kStride, shift = kFilterSize * kStride - in_shift; 136 | k < ThreeBandFilterBank::kSplitBandSize; ++k, ++shift) { 137 | for (int i = 0, j = shift; i < kFilterSize; ++i, j -= kStride) { 138 | out[k] += in[j] * filter[i]; 139 | } 140 | } 141 | 142 | // Update current state. 143 | std::copy(in.begin() + ThreeBandFilterBank::kSplitBandSize - kMemorySize, 144 | in.end(), state.begin()); 145 | } 146 | 147 | } // namespace 148 | 149 | // Because the low-pass filter prototype has half bandwidth it is possible to 150 | // use a DCT to shift it in both directions at the same time, to the center 151 | // frequencies [1 / 12, 3 / 12, 5 / 12]. 152 | ThreeBandFilterBank::ThreeBandFilterBank() { 153 | RTC_DCHECK_EQ(state_analysis_.size(), kNumNonZeroFilters); 154 | RTC_DCHECK_EQ(state_synthesis_.size(), kNumNonZeroFilters); 155 | for (int k = 0; k < kNumNonZeroFilters; ++k) { 156 | RTC_DCHECK_EQ(state_analysis_[k].size(), kMemorySize); 157 | RTC_DCHECK_EQ(state_synthesis_[k].size(), kMemorySize); 158 | 159 | state_analysis_[k].fill(0.f); 160 | state_synthesis_[k].fill(0.f); 161 | } 162 | } 163 | 164 | ThreeBandFilterBank::~ThreeBandFilterBank() = default; 165 | 166 | // The analysis can be separated in these steps: 167 | // 1. Serial to parallel downsampling by a factor of |kNumBands|. 168 | // 2. Filtering of |kSparsity| different delayed signals with polyphase 169 | // decomposition of the low-pass prototype filter and upsampled by a factor 170 | // of |kSparsity|. 171 | // 3. Modulating with cosines and accumulating to get the desired band. 172 | void ThreeBandFilterBank::Analysis( 173 | rtc::ArrayView in, 174 | rtc::ArrayView, ThreeBandFilterBank::kNumBands> 175 | out) { 176 | // Initialize the output to zero. 177 | for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { 178 | RTC_DCHECK_EQ(out[band].size(), kSplitBandSize); 179 | std::fill(out[band].begin(), out[band].end(), 0); 180 | } 181 | 182 | for (int downsampling_index = 0; downsampling_index < kSubSampling; 183 | ++downsampling_index) { 184 | // Downsample to form the filter input. 185 | std::array in_subsampled; 186 | for (int k = 0; k < kSplitBandSize; ++k) { 187 | in_subsampled[k] = 188 | in[(kSubSampling - 1) - downsampling_index + kSubSampling * k]; 189 | } 190 | 191 | for (int in_shift = 0; in_shift < kStride; ++in_shift) { 192 | // Choose filter, skip zero filters. 193 | const int index = downsampling_index + in_shift * kSubSampling; 194 | if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { 195 | continue; 196 | } 197 | const int filter_index = 198 | index < kZeroFilterIndex1 199 | ? index 200 | : (index < kZeroFilterIndex2 ? index - 1 : index - 2); 201 | 202 | rtc::ArrayView filter( 203 | kFilterCoeffs[filter_index]); 204 | rtc::ArrayView dct_modulation( 205 | kDctModulation[filter_index]); 206 | rtc::ArrayView state(state_analysis_[filter_index]); 207 | 208 | // Filter. 209 | std::array out_subsampled; 210 | FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); 211 | 212 | // Band and modulate the output. 213 | for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { 214 | for (int n = 0; n < kSplitBandSize; ++n) { 215 | out[band][n] += dct_modulation[band] * out_subsampled[n]; 216 | } 217 | } 218 | } 219 | } 220 | } 221 | 222 | // The synthesis can be separated in these steps: 223 | // 1. Modulating with cosines. 224 | // 2. Filtering each one with a polyphase decomposition of the low-pass 225 | // prototype filter upsampled by a factor of |kSparsity| and accumulating 226 | // |kSparsity| signals with different delays. 227 | // 3. Parallel to serial upsampling by a factor of |kNumBands|. 228 | void ThreeBandFilterBank::Synthesis( 229 | rtc::ArrayView, ThreeBandFilterBank::kNumBands> 230 | in, 231 | rtc::ArrayView out) { 232 | std::fill(out.begin(), out.end(), 0); 233 | for (int upsampling_index = 0; upsampling_index < kSubSampling; 234 | ++upsampling_index) { 235 | for (int in_shift = 0; in_shift < kStride; ++in_shift) { 236 | // Choose filter, skip zero filters. 237 | const int index = upsampling_index + in_shift * kSubSampling; 238 | if (index == kZeroFilterIndex1 || index == kZeroFilterIndex2) { 239 | continue; 240 | } 241 | const int filter_index = 242 | index < kZeroFilterIndex1 243 | ? index 244 | : (index < kZeroFilterIndex2 ? index - 1 : index - 2); 245 | 246 | rtc::ArrayView filter( 247 | kFilterCoeffs[filter_index]); 248 | rtc::ArrayView dct_modulation( 249 | kDctModulation[filter_index]); 250 | rtc::ArrayView state(state_synthesis_[filter_index]); 251 | 252 | // Prepare filter input by modulating the banded input. 253 | std::array in_subsampled; 254 | std::fill(in_subsampled.begin(), in_subsampled.end(), 0.f); 255 | for (int band = 0; band < ThreeBandFilterBank::kNumBands; ++band) { 256 | RTC_DCHECK_EQ(in[band].size(), kSplitBandSize); 257 | for (int n = 0; n < kSplitBandSize; ++n) { 258 | in_subsampled[n] += dct_modulation[band] * in[band][n]; 259 | } 260 | } 261 | 262 | // Filter. 263 | std::array out_subsampled; 264 | FilterCore(filter, in_subsampled, in_shift, out_subsampled, state); 265 | 266 | // Upsample. 267 | constexpr float kUpsamplingScaling = kSubSampling; 268 | for (int k = 0; k < kSplitBandSize; ++k) { 269 | out[upsampling_index + kSubSampling * k] += 270 | kUpsamplingScaling * out_subsampled[k]; 271 | } 272 | } 273 | } 274 | } 275 | 276 | } // namespace webrtc 277 | -------------------------------------------------------------------------------- /ns/three_band_filter_bank.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ 12 | #define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "array_view.h" 20 | 21 | namespace webrtc { 22 | 23 | constexpr int kSparsity = 4; 24 | constexpr int kStrideLog2 = 2; 25 | constexpr int kStride = 1 << kStrideLog2; 26 | constexpr int kNumZeroFilters = 2; 27 | constexpr int kFilterSize = 4; 28 | constexpr int kMemorySize = kFilterSize * kStride - 1; 29 | static_assert(kMemorySize == 15, 30 | "The memory size must be sufficient to provide memory for the " 31 | "shifted filters"); 32 | 33 | // An implementation of a 3-band FIR filter-bank with DCT modulation, similar to 34 | // the proposed in "Multirate Signal Processing for Communication Systems" by 35 | // Fredric J Harris. 36 | // The low-pass filter prototype has these characteristics: 37 | // * Pass-band ripple = 0.3dB 38 | // * Pass-band frequency = 0.147 (7kHz at 48kHz) 39 | // * Stop-band attenuation = 40dB 40 | // * Stop-band frequency = 0.192 (9.2kHz at 48kHz) 41 | // * Delay = 24 samples (500us at 48kHz) 42 | // * Linear phase 43 | // This filter bank does not satisfy perfect reconstruction. The SNR after 44 | // analysis and synthesis (with no processing in between) is approximately 9.5dB 45 | // depending on the input signal after compensating for the delay. 46 | class ThreeBandFilterBank final { 47 | public: 48 | static const int kNumBands = 3; 49 | static const int kFullBandSize = 480; 50 | static const int kSplitBandSize = 51 | ThreeBandFilterBank::kFullBandSize / ThreeBandFilterBank::kNumBands; 52 | static const int kNumNonZeroFilters = 53 | kSparsity * ThreeBandFilterBank::kNumBands - kNumZeroFilters; 54 | 55 | ThreeBandFilterBank(); 56 | 57 | ~ThreeBandFilterBank(); 58 | 59 | // Splits |in| of size kFullBandSize into 3 downsampled frequency bands in 60 | // |out|, each of size 160. 61 | void Analysis(rtc::ArrayView in, 62 | rtc::ArrayView, kNumBands> out); 63 | 64 | // Merges the 3 downsampled frequency bands in |in|, each of size 160, into 65 | // |out|, which is of size kFullBandSize. 66 | void Synthesis(rtc::ArrayView, kNumBands> in, 67 | rtc::ArrayView out); 68 | 69 | private: 70 | std::array, kNumNonZeroFilters> 71 | state_analysis_; 72 | std::array, kNumNonZeroFilters> 73 | state_synthesis_; 74 | }; 75 | 76 | } // namespace webrtc 77 | 78 | #endif // MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ 79 | -------------------------------------------------------------------------------- /ns/type_traits.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 The WebRTC Project Authors. All rights reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef RTC_BASE_TYPE_TRAITS_H_ 12 | #define RTC_BASE_TYPE_TRAITS_H_ 13 | 14 | #include 15 | #include 16 | 17 | namespace rtc { 18 | 19 | // Determines if the given class has zero-argument .data() and .size() methods 20 | // whose return values are convertible to T* and size_t, respectively. 21 | template 22 | class HasDataAndSize { 23 | private: 24 | template< 25 | typename C, 26 | typename std::enable_if< 27 | std::is_convertible().data()), T *>::value && 28 | std::is_convertible().size()), 29 | std::size_t>::value>::type * = nullptr> 30 | static int Test(int); 31 | 32 | template 33 | static char Test(...); 34 | 35 | public: 36 | static constexpr bool value = std::is_same(0)), int>::value; 37 | }; 38 | 39 | namespace test_has_data_and_size { 40 | 41 | template 42 | struct Test1 { 43 | DR data(); 44 | 45 | SR size(); 46 | }; 47 | 48 | static_assert(HasDataAndSize, int>::value, ""); 49 | static_assert(HasDataAndSize, const int>::value, ""); 50 | static_assert(HasDataAndSize, const int>::value, ""); 51 | static_assert(!HasDataAndSize, int>::value, 52 | "implicit cast of const int* to int*"); 53 | static_assert(!HasDataAndSize, int>::value, 54 | "implicit cast of char* to int*"); 55 | 56 | struct Test2 { 57 | int *data; 58 | size_t size; 59 | }; 60 | static_assert(!HasDataAndSize::value, 61 | ".data and .size aren't functions"); 62 | 63 | struct Test3 { 64 | int *data(); 65 | }; 66 | 67 | static_assert(!HasDataAndSize::value, ".size() is missing"); 68 | 69 | class Test4 { 70 | int *data(); 71 | 72 | size_t size(); 73 | }; 74 | 75 | static_assert(!HasDataAndSize::value, 76 | ".data() and .size() are private"); 77 | 78 | } // namespace test_has_data_and_size 79 | 80 | namespace type_traits_impl { 81 | 82 | // Determines if the given type is an enum that converts implicitly to 83 | // an integral type. 84 | template 85 | struct IsIntEnum { 86 | private: 87 | // This overload is used if the type is an enum, and unary plus 88 | // compiles and turns it into an integral type. 89 | template::value && 92 | std::is_integral())>::value>::type * = 93 | nullptr> 94 | static int Test(int); 95 | 96 | // Otherwise, this overload is used. 97 | template 98 | static char Test(...); 99 | 100 | public: 101 | static constexpr bool value = 102 | std::is_same::type>(0)), 103 | int>::value; 104 | }; 105 | 106 | } // namespace type_traits_impl 107 | 108 | // Determines if the given type is integral, or an enum that 109 | // converts implicitly to an integral type. 110 | template 111 | struct IsIntlike { 112 | private: 113 | using X = typename std::remove_reference::type; 114 | 115 | public: 116 | static constexpr bool value = 117 | std::is_integral::value || type_traits_impl::IsIntEnum::value; 118 | }; 119 | 120 | namespace test_enum_intlike { 121 | 122 | enum E1 { 123 | e1 124 | }; 125 | enum { 126 | e2 127 | }; 128 | enum class E3 { 129 | e3 130 | }; 131 | struct S { 132 | }; 133 | 134 | static_assert(type_traits_impl::IsIntEnum::value, ""); 135 | static_assert(type_traits_impl::IsIntEnum::value, ""); 136 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 137 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 138 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 139 | static_assert(!type_traits_impl::IsIntEnum::value, ""); 140 | 141 | static_assert(IsIntlike::value, ""); 142 | static_assert(IsIntlike::value, ""); 143 | static_assert(!IsIntlike::value, ""); 144 | static_assert(IsIntlike::value, ""); 145 | static_assert(!IsIntlike::value, ""); 146 | static_assert(!IsIntlike::value, ""); 147 | 148 | } // namespace test_enum_intlike 149 | 150 | } // namespace rtc 151 | 152 | #endif // RTC_BASE_TYPE_TRAITS_H_ 153 | -------------------------------------------------------------------------------- /ns/wiener_filter.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #include "wiener_filter.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "fast_math.h" 19 | #include "checks.h" 20 | 21 | namespace webrtc { 22 | 23 | WienerFilter::WienerFilter(const SuppressionParams &suppression_params) 24 | : suppression_params_(suppression_params) { 25 | filter_.fill(1.f); 26 | initial_spectral_estimate_.fill(0.f); 27 | spectrum_prev_process_.fill(0.f); 28 | } 29 | 30 | void WienerFilter::Update( 31 | int32_t num_analyzed_frames, 32 | rtc::ArrayView noise_spectrum, 33 | rtc::ArrayView prev_noise_spectrum, 34 | rtc::ArrayView parametric_noise_spectrum, 35 | rtc::ArrayView signal_spectrum) { 36 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 37 | // Previous estimate based on previous frame with gain filter. 38 | float prev_tsa = spectrum_prev_process_[i] / 39 | (prev_noise_spectrum[i] + 0.0001f) * filter_[i]; 40 | 41 | // Current estimate. 42 | float current_tsa; 43 | if (signal_spectrum[i] > noise_spectrum[i]) { 44 | current_tsa = signal_spectrum[i] / (noise_spectrum[i] + 0.0001f) - 1.f; 45 | } else { 46 | current_tsa = 0.f; 47 | } 48 | 49 | // Directed decision estimate is sum of two terms: current estimate and 50 | // previous estimate. 51 | float snr_prior = 0.98f * prev_tsa + (1.f - 0.98f) * current_tsa; 52 | filter_[i] = 53 | snr_prior / (suppression_params_.over_subtraction_factor + snr_prior); 54 | filter_[i] = std::max(std::min(filter_[i], 1.f), 55 | suppression_params_.minimum_attenuating_gain); 56 | } 57 | 58 | if (num_analyzed_frames < kShortStartupPhaseBlocks) { 59 | for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) { 60 | initial_spectral_estimate_[i] += signal_spectrum[i]; 61 | float filter_initial = initial_spectral_estimate_[i] - 62 | suppression_params_.over_subtraction_factor * 63 | parametric_noise_spectrum[i]; 64 | filter_initial /= initial_spectral_estimate_[i] + 0.0001f; 65 | 66 | filter_initial = std::max(std::min(filter_initial, 1.f), 67 | suppression_params_.minimum_attenuating_gain); 68 | 69 | // Weight the two suppression filters. 70 | constexpr float kOnyByShortStartupPhaseBlocks = 71 | 1.f / kShortStartupPhaseBlocks; 72 | filter_initial *= kShortStartupPhaseBlocks - num_analyzed_frames; 73 | filter_[i] *= num_analyzed_frames; 74 | filter_[i] += filter_initial; 75 | filter_[i] *= kOnyByShortStartupPhaseBlocks; 76 | } 77 | } 78 | 79 | std::copy(signal_spectrum.begin(), signal_spectrum.end(), 80 | spectrum_prev_process_.begin()); 81 | } 82 | 83 | float WienerFilter::ComputeOverallScalingFactor( 84 | int32_t num_analyzed_frames, 85 | float prior_speech_probability, 86 | float energy_before_filtering, 87 | float energy_after_filtering) const { 88 | if (!suppression_params_.use_attenuation_adjustment || 89 | num_analyzed_frames <= kLongStartupPhaseBlocks) { 90 | return 1.f; 91 | } 92 | 93 | float gain = SqrtFastApproximation(energy_after_filtering / 94 | (energy_before_filtering + 1.f)); 95 | 96 | // Scaling for new version. Threshold in final energy gain factor calculation. 97 | constexpr float kBLim = 0.5f; 98 | float scale_factor1 = 1.f; 99 | if (gain > kBLim) { 100 | scale_factor1 = 1.f + 1.3f * (gain - kBLim); 101 | if (gain * scale_factor1 > 1.f) { 102 | scale_factor1 = 1.f / gain; 103 | } 104 | } 105 | 106 | float scale_factor2 = 1.f; 107 | if (gain < kBLim) { 108 | // Do not reduce scale too much for pause regions: attenuation here should 109 | // be controlled by flooring. 110 | gain = std::max(gain, suppression_params_.minimum_attenuating_gain); 111 | scale_factor2 = 1.f - 0.3f * (kBLim - gain); 112 | } 113 | 114 | // Combine both scales with speech/noise prob: note prior 115 | // (prior_speech_probability) is not frequency dependent. 116 | return prior_speech_probability * scale_factor1 + 117 | (1.f - prior_speech_probability) * scale_factor2; 118 | } 119 | 120 | } // namespace webrtc 121 | -------------------------------------------------------------------------------- /ns/wiener_filter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. 3 | * 4 | * Use of this source code is governed by a BSD-style license 5 | * that can be found in the LICENSE file in the root of the source 6 | * tree. An additional intellectual property rights grant can be found 7 | * in the file PATENTS. All contributing project authors may 8 | * be found in the AUTHORS file in the root of the source tree. 9 | */ 10 | 11 | #ifndef MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ 12 | #define MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ 13 | 14 | #include 15 | 16 | #include "array_view.h" 17 | #include "ns_common.h" 18 | #include "suppression_params.h" 19 | 20 | namespace webrtc { 21 | 22 | // Estimates a Wiener-filter based frequency domain noise reduction filter. 23 | class WienerFilter { 24 | public: 25 | explicit WienerFilter(const SuppressionParams &suppression_params); 26 | 27 | WienerFilter(const WienerFilter &) = delete; 28 | 29 | WienerFilter &operator=(const WienerFilter &) = delete; 30 | 31 | // Updates the filter estimate. 32 | void Update( 33 | int32_t num_analyzed_frames, 34 | rtc::ArrayView noise_spectrum, 35 | rtc::ArrayView prev_noise_spectrum, 36 | rtc::ArrayView parametric_noise_spectrum, 37 | rtc::ArrayView signal_spectrum); 38 | 39 | // Compute an overall gain scaling factor. 40 | float ComputeOverallScalingFactor(int32_t num_analyzed_frames, 41 | float prior_speech_probability, 42 | float energy_before_filtering, 43 | float energy_after_filtering) const; 44 | 45 | // Returns the filter. 46 | rtc::ArrayView get_filter() const { 47 | return filter_; 48 | } 49 | 50 | private: 51 | const SuppressionParams &suppression_params_; 52 | std::array spectrum_prev_process_; 53 | std::array initial_spectral_estimate_; 54 | std::array filter_; 55 | }; 56 | 57 | } // namespace webrtc 58 | 59 | #endif // MODULES_AUDIO_PROCESSING_NS_WIENER_FILTER_H_ 60 | -------------------------------------------------------------------------------- /timing.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #if defined(__APPLE__) 5 | # include 6 | #elif defined(_WIN32) 7 | # define WIN32_LEAN_AND_MEAN 8 | 9 | # include 10 | 11 | #else // __linux 12 | 13 | # include 14 | 15 | # ifndef CLOCK_MONOTONIC //_RAW 16 | # define CLOCK_MONOTONIC CLOCK_REALTIME 17 | # endif 18 | #endif 19 | 20 | static 21 | uint64_t nanotimer() { 22 | static int ever = 0; 23 | #if defined(__APPLE__) 24 | static mach_timebase_info_data_t frequency; 25 | if (!ever) { 26 | if (mach_timebase_info(&frequency) != KERN_SUCCESS) { 27 | return 0; 28 | } 29 | ever = 1; 30 | } 31 | return (mach_absolute_time() * frequency.numer / frequency.denom); 32 | #elif defined(_WIN32) 33 | static LARGE_INTEGER frequency; 34 | if (!ever) { 35 | QueryPerformanceFrequency(&frequency); 36 | ever = 1; 37 | } 38 | LARGE_INTEGER t; 39 | QueryPerformanceCounter(&t); 40 | return (t.QuadPart * (uint64_t) 1e9) / frequency.QuadPart; 41 | #else // __linux 42 | struct timespec t; 43 | if (!ever) { 44 | if (clock_gettime(CLOCK_MONOTONIC, &t) != 0) { 45 | return 0; 46 | } 47 | ever = 1; 48 | } 49 | clock_gettime(CLOCK_MONOTONIC, &t); 50 | return (t.tv_sec * (uint64_t) 1e9) + t.tv_nsec; 51 | #endif 52 | } 53 | 54 | 55 | static double now() { 56 | static uint64_t epoch = 0; 57 | if (!epoch) { 58 | epoch = nanotimer(); 59 | } 60 | return (nanotimer() - epoch) / 1e9; 61 | }; 62 | 63 | double calcElapsed(double start, double end) { 64 | double took = -start; 65 | return took + end; 66 | } --------------------------------------------------------------------------------