├── .gitignore ├── .gitmodules ├── .travis.yml ├── COPYING ├── Cargo.toml ├── README.md ├── examples ├── karaoke.rs ├── recording-configs │ ├── record-far-sample.json5 │ ├── record-pipeline.json5 │ └── record-sample.json5 ├── recording.rs └── simple.rs ├── rustfmt.toml ├── src ├── config.rs └── lib.rs └── webrtc-audio-processing-sys ├── COPYING ├── Cargo.toml ├── README.md ├── build.rs └── src ├── lib.rs ├── wrapper.cpp └── wrapper.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | **/target 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "webrtc-audio-processing-sys/webrtc-audio-processing"] 2 | path = webrtc-audio-processing-sys/webrtc-audio-processing 3 | url = https://github.com/tonarino/pulseaudio-webrtc-audio-processing.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: rust 3 | os: 4 | - linux 5 | - osx 6 | rust: 7 | - stable 8 | - beta 9 | - nightly 10 | matrix: 11 | allow_failures: 12 | - rust: nightly 13 | cache: cargo 14 | script: 15 | - cargo build --verbose --features "bundled" 16 | - cargo test --verbose --features "bundled" 17 | addons: 18 | homebrew: 19 | packages: 20 | - portaudio 21 | - pkg-config 22 | update: true 23 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Google Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | * Neither the name of Google nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "webrtc-audio-processing" 3 | version = "0.4.0" 4 | authors = ["Ryo Kawaguchi "] 5 | repository = "https://github.com/tonarino/webrtc-audio-processing" 6 | edition = "2018" 7 | description = "A wrapper for WebRTC's AudioProcessing module." 8 | documentation = "https://docs.rs/webrtc-audio-processing" 9 | keywords = ["ffi"] 10 | categories = ["multimedia::audio"] 11 | license-file = "COPYING" 12 | 13 | [badges] 14 | travis-ci = { repository = "tonarino/webrtc-audio-processing", branch = "master" } 15 | maintenance = { status = "actively-developed" } 16 | 17 | [features] 18 | derive_serde = ["webrtc-audio-processing-sys/derive_serde", "serde"] 19 | bundled = ["webrtc-audio-processing-sys/bundled"] 20 | 21 | [dependencies] 22 | serde = { version = "1", features = ["derive"], optional = true } 23 | webrtc-audio-processing-sys = { path = "webrtc-audio-processing-sys", version = "0.4.0" } 24 | 25 | [[example]] 26 | name = "recording" 27 | required-features = ["derive_serde"] 28 | 29 | [dev-dependencies] 30 | anyhow = "1" 31 | crossbeam-channel = "0.5" 32 | ctrlc = { version = "3", features = ["termination"] } 33 | hound = "3.4" 34 | json5 = "0.3" 35 | portaudio = "0.7" 36 | regex = "1" 37 | serde = { version = "1", features = ["derive"] } 38 | structopt = "0.3" 39 | log = "0.4" 40 | 41 | [package.metadata.docs.rs] 42 | features = ["bundled"] 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # webrtc-audio-processing 2 | [![Crates.io](https://img.shields.io/crates/v/webrtc-audio-processing.svg)](https://crates.io/crates/webrtc-audio-processing) 3 | [![Docs.rs](https://docs.rs/webrtc-audio-processing/badge.svg)](https://docs.rs/webrtc-audio-processing) 4 | [![Build Status](https://travis-ci.org/tonarino/webrtc-audio-processing.svg?branch=master)](https://travis-ci.org/tonarino/webrtc-audio-processing) 5 | [![dependency status](https://deps.rs/repo/github/tonarino/webrtc-audio-processing/status.svg)](https://deps.rs/repo/github/tonarino/webrtc-audio-processing) 6 | 7 | A wrapper around [PulseAudio's repackaging of WebRTC's AudioProcessing module](https://www.freedesktop.org/software/pulseaudio/webrtc-audio-processing/). 8 | 9 | `webrtc-audio-processing` can remove echo from an audio input stream in the situation where a speaker is feeding back into a microphone, as well as noise-removal, auto-gain-control, voice-activity-detection, and more! 10 | 11 | ## Example Usage 12 | 13 | See `examples/simple.rs` for an example of how to use this crate. 14 | 15 | ## Building 16 | 17 | ### Feature Flags 18 | 19 | * `bundled` - Build `webrtc-audio-procesing` from the included C++ code 20 | * `derive_serde` - Derive `serialize` and `deserialize` traits for Serde use 21 | 22 | ### Dynamic linking 23 | 24 | By default the build will attempt to dynamically link with the library installed via your OS's package manager. 25 | 26 | You can specify an include path yourself by setting the environment variable `WEBRTC_AUDIO_PROCESSING_INCLUDE`. 27 | 28 | ### Packages 29 | 30 | ```sh 31 | sudo apt install libwebrtc-audio-processing-dev # Ubuntu/Debian 32 | sudo pacman -S webrtc-audio-processing # Arch 33 | ``` 34 | 35 | ### Build from source 36 | 37 | The webrtc source code is included as a git submodule. Be sure to clone this repo with the `--recursive` flag, or pull the submodule with `git submodule update --init`. 38 | 39 | Building from source and static linking can be enabled with the `bundled` feature flag. You need the following tools to build from source: 40 | 41 | * `clang` or `gcc` 42 | * `autotools` (MacOS: `brew install automake`, `brew install autoconf`) 43 | * `libtoolize` (typically `glibtoolize` on MacOS: `brew install libtool`) 44 | * `pkg-config` (MacOS: `brew install pkg-config`) 45 | * `automake` (MacOS: `brew install automake`) 46 | 47 | ## Publishing 48 | 49 | ```bash 50 | cargo login 51 | cd ./webrtc-audio-processing-sys 52 | cargo publish --features derive_serde --features bundled 53 | cd ../ 54 | cargo publish --features derive_serde --features bundled 55 | ``` 56 | 57 | ## Contributing 58 | 59 | ### Version increment 60 | 61 | We are using semantic versioning. When incrementing a version, please do so in a separate commit, and also mark it with a Github tag. 62 | -------------------------------------------------------------------------------- /examples/karaoke.rs: -------------------------------------------------------------------------------- 1 | // This example loops the microphone input back to the speakers, while applying echo cancellation, 2 | // creating an experience similar to Karaoke microphones. It uses PortAudio as an interface to the 3 | // underlying audio devices. 4 | use anyhow::Error; 5 | use std::{ 6 | sync::{ 7 | atomic::{AtomicBool, Ordering}, 8 | Arc, 9 | }, 10 | thread, 11 | time::Duration, 12 | }; 13 | use webrtc_audio_processing::*; 14 | 15 | // The highest sample rate that webrtc-audio-processing supports. 16 | const SAMPLE_RATE: f64 = 48_000.0; 17 | 18 | // webrtc-audio-processing expects a 10ms chunk for each process call. 19 | const FRAMES_PER_BUFFER: u32 = 480; 20 | 21 | fn create_processor( 22 | num_capture_channels: i32, 23 | num_render_channels: i32, 24 | ) -> Result { 25 | let mut processor = Processor::new(&InitializationConfig { 26 | num_capture_channels, 27 | num_render_channels, 28 | ..InitializationConfig::default() 29 | })?; 30 | 31 | // High pass filter is a prerequisite to running echo cancellation. 32 | let config = Config { 33 | echo_cancellation: Some(EchoCancellation { 34 | suppression_level: EchoCancellationSuppressionLevel::Low, 35 | stream_delay_ms: Some(0), 36 | enable_delay_agnostic: true, 37 | enable_extended_filter: true, 38 | }), 39 | enable_high_pass_filter: true, 40 | ..Config::default() 41 | }; 42 | processor.set_config(config); 43 | 44 | Ok(processor) 45 | } 46 | 47 | fn wait_ctrlc() -> Result<(), Error> { 48 | let running = Arc::new(AtomicBool::new(true)); 49 | 50 | ctrlc::set_handler({ 51 | let running = running.clone(); 52 | move || { 53 | running.store(false, Ordering::SeqCst); 54 | } 55 | })?; 56 | 57 | while running.load(Ordering::SeqCst) { 58 | thread::sleep(Duration::from_millis(10)); 59 | } 60 | 61 | Ok(()) 62 | } 63 | 64 | fn main() -> Result<(), Error> { 65 | // Monoral microphone. 66 | let input_channels = 1; 67 | // Monoral speaker. 68 | let output_channels = 1; 69 | 70 | let mut processor = create_processor(input_channels, output_channels)?; 71 | 72 | let pa = portaudio::PortAudio::new()?; 73 | 74 | let stream_settings = pa.default_duplex_stream_settings( 75 | input_channels, 76 | output_channels, 77 | SAMPLE_RATE, 78 | FRAMES_PER_BUFFER, 79 | )?; 80 | 81 | // Memory allocation should not happen inside the audio loop. 82 | let mut processed = vec![0f32; FRAMES_PER_BUFFER as usize * input_channels as usize]; 83 | 84 | let mut stream = pa.open_non_blocking_stream( 85 | stream_settings, 86 | move |portaudio::DuplexStreamCallbackArgs { in_buffer, out_buffer, frames, .. }| { 87 | assert_eq!(frames as u32, FRAMES_PER_BUFFER); 88 | 89 | processed.copy_from_slice(in_buffer); 90 | processor.process_capture_frame(&mut processed).unwrap(); 91 | 92 | // Play back the processed audio capture. 93 | out_buffer.copy_from_slice(&processed); 94 | processor.process_render_frame(out_buffer).unwrap(); 95 | 96 | portaudio::Continue 97 | }, 98 | )?; 99 | 100 | stream.start()?; 101 | 102 | wait_ctrlc()?; 103 | 104 | Ok(()) 105 | } 106 | -------------------------------------------------------------------------------- /examples/recording-configs/record-far-sample.json5: -------------------------------------------------------------------------------- 1 | { 2 | capture: { 3 | device_name: "^MacBook Pro Microphone$", 4 | num_channels: 1, 5 | preprocess_sink_path: "render.wav", 6 | }, 7 | render: { 8 | device_name: "^MacBook Pro Speakers$", 9 | num_channels: 1, 10 | }, 11 | config: { 12 | enable_transient_suppressor: false, 13 | enable_high_pass_filter: false, 14 | }, 15 | } 16 | -------------------------------------------------------------------------------- /examples/recording-configs/record-pipeline.json5: -------------------------------------------------------------------------------- 1 | { 2 | capture: { 3 | device_name: "^MacBook Pro Microphone$", 4 | num_channels: 1, 5 | source_path: "capture.wav", 6 | postprocess_sink_path: "capture-processed.wav", 7 | }, 8 | render: { 9 | device_name: "^MacBook Pro Speakers$", 10 | num_channels: 1, 11 | source_path: "render.wav", 12 | mute: true, 13 | }, 14 | config: { 15 | enable_transient_suppressor: false, 16 | enable_high_pass_filter: true, 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /examples/recording-configs/record-sample.json5: -------------------------------------------------------------------------------- 1 | { 2 | capture: { 3 | device_name: "^MacBook Pro Microphone$", 4 | num_channels: 1, 5 | preprocess_sink_path: "capture.wav", 6 | }, 7 | render: { 8 | device_name: "^MacBook Pro Speakers$", 9 | num_channels: 1, 10 | source_path: "render.wav", 11 | }, 12 | config: { 13 | enable_transient_suppressor: false, 14 | enable_high_pass_filter: false, 15 | }, 16 | } 17 | -------------------------------------------------------------------------------- /examples/recording.rs: -------------------------------------------------------------------------------- 1 | /// An example binary to help evaluate webrtc audio processing pipeline, in particular its echo 2 | /// canceller. You can use it to record a sample with your audio setup, and you can run the 3 | /// pipeline repeatedly using the sampled audio, to test different configurations of the pipeline. 4 | /// 5 | /// # Record a sample 6 | /// 7 | /// Play back a pre-recorded audio stream from your speakers, while recording the microphone 8 | /// input as a WAV file. 9 | /// 10 | /// ``` 11 | /// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ 12 | /// examples/recording-configs/record-sample.json5 13 | /// ``` 14 | /// 15 | /// # Run the pipeline with the sample 16 | /// 17 | /// Run the audio processing pipeline with the recorded capture and render frames. You can then 18 | /// analyze the capture-processed.wav to understand the effect produced by the pipeline. 19 | /// 20 | /// ``` 21 | /// $ cargo run --example recording --features bundled --features derive_serde -- --config-file \ 22 | /// examples/recording-configs/record-pipeline.json5 23 | /// ``` 24 | use anyhow::{anyhow, Error}; 25 | use hound::{WavIntoSamples, WavReader, WavWriter}; 26 | use regex::Regex; 27 | use serde::{Deserialize, Serialize}; 28 | use std::{ 29 | fs::{self, File}, 30 | io::{BufReader, BufWriter}, 31 | path::{Path, PathBuf}, 32 | sync::{ 33 | atomic::{AtomicBool, Ordering}, 34 | Arc, 35 | }, 36 | thread, 37 | time::Duration, 38 | }; 39 | use structopt::StructOpt; 40 | use webrtc_audio_processing::*; 41 | 42 | const AUDIO_SAMPLE_RATE: u32 = 48_000; 43 | const AUDIO_INTERLEAVED: bool = true; 44 | 45 | #[derive(Debug, StructOpt)] 46 | struct Args { 47 | /// Configuration file that stores JSON serialization of [`Option`] struct. 48 | #[structopt(short, long)] 49 | pub config_file: PathBuf, 50 | } 51 | 52 | #[derive(Deserialize, Serialize, Default, Clone, Debug)] 53 | struct CaptureOptions { 54 | /// Name of the audio capture device. 55 | device_name: String, 56 | /// The number of audio capture channels. 57 | num_channels: u16, 58 | /// If specified, it reads the capture stream from the WAV file instead of the device. 59 | source_path: Option, 60 | /// If specified, it writes the capture stream to the WAV file before applying the processing. 61 | preprocess_sink_path: Option, 62 | /// If specified, it writes the capture stream to the WAV file after applying the processing. 63 | postprocess_sink_path: Option, 64 | } 65 | 66 | #[derive(Deserialize, Serialize, Default, Clone, Debug)] 67 | struct RenderOptions { 68 | /// Name of the audio playback device. 69 | device_name: String, 70 | /// The number of audio playback channels. 71 | num_channels: u16, 72 | /// If specified, it plays back the audio stream from the WAV file. Otherwise, a stream of 73 | /// zeros are sent to the audio device. 74 | source_path: Option, 75 | /// If true, the output is muted. 76 | #[serde(default)] 77 | mute: bool, 78 | } 79 | 80 | #[derive(Deserialize, Serialize, Default, Clone, Debug)] 81 | struct Options { 82 | /// Options for audio capture / recording. 83 | capture: CaptureOptions, 84 | /// Options for audio render / playback. 85 | render: RenderOptions, 86 | /// Configurations of the audio processing pipeline. 87 | config: Config, 88 | } 89 | 90 | fn match_device( 91 | pa: &portaudio::PortAudio, 92 | device_name: Regex, 93 | ) -> Result { 94 | for device in (pa.devices()?).flatten() { 95 | if device_name.is_match(device.1.name) { 96 | return Ok(device.0); 97 | } 98 | } 99 | Err(anyhow!("Audio device matching \"{}\" not found.", device_name)) 100 | } 101 | 102 | fn create_stream_settings( 103 | pa: &portaudio::PortAudio, 104 | opt: &Options, 105 | ) -> Result, Error> { 106 | let input_device = match_device(pa, Regex::new(&opt.capture.device_name)?)?; 107 | let input_device_info = &pa.device_info(input_device)?; 108 | let input_params = portaudio::StreamParameters::::new( 109 | input_device, 110 | opt.capture.num_channels as i32, 111 | AUDIO_INTERLEAVED, 112 | input_device_info.default_low_input_latency, 113 | ); 114 | 115 | let output_device = match_device(pa, Regex::new(&opt.render.device_name)?)?; 116 | let output_device_info = &pa.device_info(output_device)?; 117 | let output_params = portaudio::StreamParameters::::new( 118 | output_device, 119 | opt.render.num_channels as i32, 120 | AUDIO_INTERLEAVED, 121 | output_device_info.default_low_output_latency, 122 | ); 123 | 124 | pa.is_duplex_format_supported(input_params, output_params, f64::from(AUDIO_SAMPLE_RATE))?; 125 | 126 | Ok(portaudio::DuplexStreamSettings::new( 127 | input_params, 128 | output_params, 129 | f64::from(AUDIO_SAMPLE_RATE), 130 | NUM_SAMPLES_PER_FRAME as u32, 131 | )) 132 | } 133 | 134 | fn open_wav_writer(path: &Path, channels: u16) -> Result>, Error> { 135 | let sink = hound::WavWriter::>::create( 136 | path, 137 | hound::WavSpec { 138 | channels, 139 | sample_rate: AUDIO_SAMPLE_RATE, 140 | bits_per_sample: 32, 141 | sample_format: hound::SampleFormat::Float, 142 | }, 143 | )?; 144 | 145 | Ok(sink) 146 | } 147 | 148 | fn open_wav_reader(path: &Path) -> Result, f32>, Error> { 149 | let reader = WavReader::>::open(path)?; 150 | Ok(reader.into_samples()) 151 | } 152 | 153 | // The destination array is an interleaved audio stream. 154 | // Returns false if there are no more entries to read from the source. 155 | fn copy_stream(source: &mut WavIntoSamples, f32>, dest: &mut [f32]) -> bool { 156 | let mut dest_iter = dest.iter_mut(); 157 | 'outer: for sample in source { 158 | for channel in &sample { 159 | *dest_iter.next().unwrap() = *channel; 160 | if dest_iter.len() == 0 { 161 | break 'outer; 162 | } 163 | } 164 | } 165 | 166 | let source_eof = dest_iter.len() > 0; 167 | 168 | // Zero-fill the remainder of the destination array if we finish consuming 169 | // the source. 170 | for sample in dest_iter { 171 | *sample = 0.0; 172 | } 173 | 174 | !source_eof 175 | } 176 | 177 | fn main() -> Result<(), Error> { 178 | let args = Args::from_args(); 179 | let opt: Options = json5::from_str(&fs::read_to_string(&args.config_file)?)?; 180 | 181 | let pa = portaudio::PortAudio::new()?; 182 | 183 | let mut processor = Processor::new(&InitializationConfig { 184 | num_capture_channels: opt.capture.num_channels as i32, 185 | num_render_channels: opt.render.num_channels as i32, 186 | ..Default::default() 187 | })?; 188 | 189 | processor.set_config(opt.config.clone()); 190 | 191 | let running = Arc::new(AtomicBool::new(true)); 192 | 193 | let mut capture_source = 194 | if let Some(path) = &opt.capture.source_path { Some(open_wav_reader(path)?) } else { None }; 195 | let mut capture_preprocess_sink = if let Some(path) = &opt.capture.preprocess_sink_path { 196 | Some(open_wav_writer(path, opt.capture.num_channels)?) 197 | } else { 198 | None 199 | }; 200 | let mut capture_postprocess_sink = if let Some(path) = &opt.capture.postprocess_sink_path { 201 | Some(open_wav_writer(path, opt.capture.num_channels)?) 202 | } else { 203 | None 204 | }; 205 | let mut render_source = 206 | if let Some(path) = &opt.render.source_path { Some(open_wav_reader(path)?) } else { None }; 207 | 208 | let audio_callback = { 209 | // Allocate buffers outside the performance-sensitive audio loop. 210 | let mut input_mut = 211 | vec![0f32; NUM_SAMPLES_PER_FRAME as usize * opt.capture.num_channels as usize]; 212 | 213 | let running = running.clone(); 214 | let mute = opt.render.mute; 215 | let mut processor = processor.clone(); 216 | move |portaudio::DuplexStreamCallbackArgs { in_buffer, out_buffer, frames, .. }| { 217 | assert_eq!(frames, NUM_SAMPLES_PER_FRAME as usize); 218 | 219 | let mut should_continue = true; 220 | 221 | if let Some(source) = &mut capture_source { 222 | if !copy_stream(source, &mut input_mut) { 223 | should_continue = false; 224 | } 225 | } else { 226 | input_mut.copy_from_slice(in_buffer); 227 | } 228 | 229 | if let Some(sink) = &mut capture_preprocess_sink { 230 | for sample in &input_mut { 231 | sink.write_sample(*sample).unwrap(); 232 | } 233 | } 234 | 235 | processor.process_capture_frame(&mut input_mut).unwrap(); 236 | 237 | if let Some(sink) = &mut capture_postprocess_sink { 238 | for sample in &input_mut { 239 | sink.write_sample(*sample).unwrap(); 240 | } 241 | } 242 | 243 | if let Some(source) = &mut render_source { 244 | if !copy_stream(source, out_buffer) { 245 | should_continue = false; 246 | } 247 | } else { 248 | out_buffer.iter_mut().for_each(|m| *m = 0.0) 249 | } 250 | 251 | processor.process_render_frame(out_buffer).unwrap(); 252 | 253 | if mute { 254 | out_buffer.iter_mut().for_each(|m| *m = 0.0) 255 | } 256 | 257 | if should_continue { 258 | portaudio::Continue 259 | } else { 260 | running.store(false, Ordering::SeqCst); 261 | portaudio::Complete 262 | } 263 | } 264 | }; 265 | 266 | let stream_settings = create_stream_settings(&pa, &opt)?; 267 | let mut stream = pa.open_non_blocking_stream(stream_settings, audio_callback)?; 268 | stream.start()?; 269 | 270 | ctrlc::set_handler({ 271 | let running = running.clone(); 272 | move || { 273 | running.store(false, Ordering::SeqCst); 274 | } 275 | })?; 276 | 277 | while running.load(Ordering::SeqCst) { 278 | thread::sleep(Duration::from_millis(10)); 279 | } 280 | 281 | println!("{:#?}", processor.get_stats()); 282 | 283 | Ok(()) 284 | } 285 | -------------------------------------------------------------------------------- /examples/simple.rs: -------------------------------------------------------------------------------- 1 | use webrtc_audio_processing::*; 2 | 3 | fn main() { 4 | let config = InitializationConfig { 5 | num_capture_channels: 2, // Stereo mic input 6 | num_render_channels: 2, // Stereo speaker output 7 | ..InitializationConfig::default() 8 | }; 9 | 10 | let mut ap = Processor::new(&config).unwrap(); 11 | 12 | let config = Config { 13 | echo_cancellation: Some(EchoCancellation { 14 | suppression_level: EchoCancellationSuppressionLevel::High, 15 | enable_delay_agnostic: false, 16 | enable_extended_filter: false, 17 | stream_delay_ms: None, 18 | }), 19 | ..Config::default() 20 | }; 21 | ap.set_config(config); 22 | 23 | // The render_frame is what is sent to the speakers, and 24 | // capture_frame is audio captured from a microphone. 25 | let (render_frame, capture_frame) = sample_stereo_frames(); 26 | 27 | let mut render_frame_output = render_frame.clone(); 28 | ap.process_render_frame(&mut render_frame_output).unwrap(); 29 | 30 | assert_eq!(render_frame, render_frame_output, "render_frame should not be modified."); 31 | 32 | let mut capture_frame_output = capture_frame.clone(); 33 | ap.process_capture_frame(&mut capture_frame_output).unwrap(); 34 | 35 | assert_ne!( 36 | capture_frame, capture_frame_output, 37 | "Echo cancellation should have modified capture_frame." 38 | ); 39 | 40 | // capture_frame_output is now ready to send to a remote peer. 41 | println!("Successfully processed a render and capture frame through WebRTC!"); 42 | } 43 | 44 | /// Generate example stereo frames that simulates a situation where the 45 | /// microphone (capture) would be picking up the speaker (render) output. 46 | fn sample_stereo_frames() -> (Vec, Vec) { 47 | let num_samples_per_frame = NUM_SAMPLES_PER_FRAME as usize; 48 | 49 | let mut render_frame = Vec::with_capacity(num_samples_per_frame * 2); 50 | let mut capture_frame = Vec::with_capacity(num_samples_per_frame * 2); 51 | for i in 0..num_samples_per_frame { 52 | render_frame.push((i as f32 / 40.0).cos() * 0.4); 53 | render_frame.push((i as f32 / 40.0).cos() * 0.2); 54 | capture_frame.push((i as f32 / 20.0).sin() * 0.4 + render_frame[i * 2] * 0.2); 55 | capture_frame.push((i as f32 / 20.0).sin() * 0.2 + render_frame[i * 2 + 1] * 0.2); 56 | } 57 | 58 | (render_frame, capture_frame) 59 | } 60 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | indent_style = "Block" 2 | use_small_heuristics="Max" 3 | merge_imports = true 4 | match_block_trailing_comma = true 5 | reorder_impl_items = true 6 | use_field_init_shorthand = true 7 | use_try_shorthand = true 8 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use webrtc_audio_processing_sys as ffi; 2 | 3 | pub use ffi::InitializationConfig; 4 | 5 | #[cfg(feature = "derive_serde")] 6 | use serde::{Deserialize, Serialize}; 7 | 8 | /// A level of non-linear suppression during AEC (aka NLP). 9 | #[derive(Debug, Copy, Clone, PartialEq)] 10 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 11 | pub enum EchoCancellationSuppressionLevel { 12 | /// Lowest suppression level. 13 | /// Minimum overdrive exponent = 1.0 (zero suppression). 14 | Lowest, 15 | /// Lower suppression level. 16 | /// Minimum overdrive exponent = 2.0. 17 | Lower, 18 | /// Low suppression level. 19 | /// Minimum overdrive exponent = 3.0. 20 | Low, 21 | /// Moderate suppression level. 22 | /// Minimum overdrive exponent = 6.0. 23 | Moderate, 24 | /// Higher suppression level. 25 | /// Minimum overdrive exponent = 15.0. 26 | High, 27 | } 28 | 29 | impl From for ffi::EchoCancellation_SuppressionLevel { 30 | fn from(other: EchoCancellationSuppressionLevel) -> ffi::EchoCancellation_SuppressionLevel { 31 | match other { 32 | EchoCancellationSuppressionLevel::Lowest => { 33 | ffi::EchoCancellation_SuppressionLevel::LOWEST 34 | }, 35 | EchoCancellationSuppressionLevel::Lower => { 36 | ffi::EchoCancellation_SuppressionLevel::LOWER 37 | }, 38 | EchoCancellationSuppressionLevel::Low => ffi::EchoCancellation_SuppressionLevel::LOW, 39 | EchoCancellationSuppressionLevel::Moderate => { 40 | ffi::EchoCancellation_SuppressionLevel::MODERATE 41 | }, 42 | EchoCancellationSuppressionLevel::High => ffi::EchoCancellation_SuppressionLevel::HIGH, 43 | } 44 | } 45 | } 46 | 47 | /// Echo cancellation configuration. 48 | #[derive(Debug, Clone, PartialEq)] 49 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 50 | pub struct EchoCancellation { 51 | /// Determines the aggressiveness of the suppressor. A higher level trades off 52 | /// double-talk performance for increased echo suppression. 53 | pub suppression_level: EchoCancellationSuppressionLevel, 54 | 55 | /// Use to enable the extended filter mode in the AEC, along with robustness 56 | /// measures around the reported system delays. It comes with a significant 57 | /// increase in AEC complexity, but is much more robust to unreliable reported 58 | /// delays. 59 | pub enable_extended_filter: bool, 60 | 61 | /// Enables delay-agnostic echo cancellation. This feature relies on internally 62 | /// estimated delays between the process and reverse streams, thus not relying 63 | /// on reported system delays. 64 | pub enable_delay_agnostic: bool, 65 | 66 | /// Sets the delay in ms between process_render_frame() receiving a far-end 67 | /// frame and process_capture_frame() receiving a near-end frame containing 68 | /// the corresponding echo. You should set this only if you are certain that 69 | /// the delay will be stable and constant. enable_delay_agnostic will be 70 | /// ignored when this option is set. 71 | pub stream_delay_ms: Option, 72 | } 73 | 74 | impl From for ffi::EchoCancellation { 75 | fn from(other: EchoCancellation) -> ffi::EchoCancellation { 76 | ffi::EchoCancellation { 77 | enable: true, 78 | suppression_level: other.suppression_level.into(), 79 | enable_extended_filter: other.enable_extended_filter, 80 | enable_delay_agnostic: other.enable_delay_agnostic, 81 | stream_delay_ms: other.stream_delay_ms.into(), 82 | } 83 | } 84 | } 85 | 86 | /// Mode of gain control. 87 | #[derive(Debug, Copy, Clone, PartialEq)] 88 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 89 | pub enum GainControlMode { 90 | /// Bring the signal to an appropriate range by applying an adaptive gain 91 | /// control. The volume is dynamically amplified with a microphone with 92 | /// small pickup and vice versa. 93 | AdaptiveDigital, 94 | 95 | /// Unlike ADAPTIVE_DIGITAL, it only compresses (i.e. gradually reduces 96 | /// gain with increasing level) the input signal when at higher levels. 97 | /// Use this where the capture signal level is predictable, so that a 98 | /// known gain can be applied. 99 | FixedDigital, 100 | } 101 | 102 | impl From for ffi::GainControl_Mode { 103 | fn from(other: GainControlMode) -> ffi::GainControl_Mode { 104 | match other { 105 | GainControlMode::AdaptiveDigital => ffi::GainControl_Mode::ADAPTIVE_DIGITAL, 106 | GainControlMode::FixedDigital => ffi::GainControl_Mode::FIXED_DIGITAL, 107 | } 108 | } 109 | } 110 | 111 | /// Gain control configuration. 112 | #[derive(Debug, Clone, PartialEq)] 113 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 114 | pub struct GainControl { 115 | /// Determines what type of gain control is applied. 116 | pub mode: GainControlMode, 117 | 118 | /// Sets the target peak level (or envelope) of the AGC in dBFs (decibels from 119 | /// digital full-scale). The convention is to use positive values. 120 | /// For instance, passing in a value of 3 corresponds to -3 dBFs, or a target 121 | /// level 3 dB below full-scale. Limited to [0, 31]. 122 | pub target_level_dbfs: i32, 123 | 124 | /// Sets the maximum gain the digital compression stage may apply, in dB. A 125 | /// higher number corresponds to greater compression, while a value of 0 will 126 | /// leave the signal uncompressed. Limited to [0, 90]. 127 | pub compression_gain_db: i32, 128 | 129 | /// When enabled, the compression stage will hard limit the signal to the 130 | /// target level. Otherwise, the signal will be compressed but not limited 131 | /// above the target level. 132 | pub enable_limiter: bool, 133 | } 134 | 135 | impl From for ffi::GainControl { 136 | fn from(other: GainControl) -> ffi::GainControl { 137 | ffi::GainControl { 138 | enable: true, 139 | mode: other.mode.into(), 140 | target_level_dbfs: other.target_level_dbfs, 141 | compression_gain_db: other.compression_gain_db, 142 | enable_limiter: other.enable_limiter, 143 | } 144 | } 145 | } 146 | 147 | /// A level of noise suppression. 148 | #[derive(Debug, Copy, Clone, PartialEq)] 149 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 150 | pub enum NoiseSuppressionLevel { 151 | /// Lower suppression level. 152 | Low, 153 | /// Moderate suppression level. 154 | Moderate, 155 | /// Higher suppression level. 156 | High, 157 | /// Even higher suppression level. 158 | VeryHigh, 159 | } 160 | 161 | impl From for ffi::NoiseSuppression_SuppressionLevel { 162 | fn from(other: NoiseSuppressionLevel) -> ffi::NoiseSuppression_SuppressionLevel { 163 | match other { 164 | NoiseSuppressionLevel::Low => ffi::NoiseSuppression_SuppressionLevel::LOW, 165 | NoiseSuppressionLevel::Moderate => ffi::NoiseSuppression_SuppressionLevel::MODERATE, 166 | NoiseSuppressionLevel::High => ffi::NoiseSuppression_SuppressionLevel::HIGH, 167 | NoiseSuppressionLevel::VeryHigh => ffi::NoiseSuppression_SuppressionLevel::VERY_HIGH, 168 | } 169 | } 170 | } 171 | 172 | /// Noise suppression configuration. 173 | #[derive(Debug, Clone, PartialEq)] 174 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 175 | pub struct NoiseSuppression { 176 | /// Determines the aggressiveness of the suppression. Increasing the level will 177 | /// reduce the noise level at the expense of a higher speech distortion. 178 | pub suppression_level: NoiseSuppressionLevel, 179 | } 180 | 181 | impl From for ffi::NoiseSuppression { 182 | fn from(other: NoiseSuppression) -> ffi::NoiseSuppression { 183 | ffi::NoiseSuppression { enable: true, suppression_level: other.suppression_level.into() } 184 | } 185 | } 186 | 187 | /// The sensitivity of the noise detector. 188 | #[derive(Debug, Copy, Clone, PartialEq)] 189 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 190 | pub enum VoiceDetectionLikelihood { 191 | /// Even lower detection likelihood. 192 | VeryLow, 193 | /// Lower detection likelihood. 194 | Low, 195 | /// Moderate detection likelihood. 196 | Moderate, 197 | /// Higher detection likelihood. 198 | High, 199 | } 200 | 201 | impl From for ffi::VoiceDetection_DetectionLikelihood { 202 | fn from(other: VoiceDetectionLikelihood) -> ffi::VoiceDetection_DetectionLikelihood { 203 | match other { 204 | VoiceDetectionLikelihood::VeryLow => ffi::VoiceDetection_DetectionLikelihood::VERY_LOW, 205 | VoiceDetectionLikelihood::Low => ffi::VoiceDetection_DetectionLikelihood::LOW, 206 | VoiceDetectionLikelihood::Moderate => ffi::VoiceDetection_DetectionLikelihood::MODERATE, 207 | VoiceDetectionLikelihood::High => ffi::VoiceDetection_DetectionLikelihood::HIGH, 208 | } 209 | } 210 | } 211 | 212 | /// Voice detection configuration. 213 | #[derive(Debug, Clone, PartialEq)] 214 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 215 | pub struct VoiceDetection { 216 | /// Specifies the likelihood that a frame will be declared to contain voice. A 217 | /// higher value makes it more likely that speech will not be clipped, at the 218 | /// expense of more noise being detected as voice. 219 | pub detection_likelihood: VoiceDetectionLikelihood, 220 | } 221 | 222 | impl From for ffi::VoiceDetection { 223 | fn from(other: VoiceDetection) -> ffi::VoiceDetection { 224 | ffi::VoiceDetection { 225 | enable: true, 226 | detection_likelihood: other.detection_likelihood.into(), 227 | } 228 | } 229 | } 230 | 231 | /// Config that can be used mid-processing. 232 | #[derive(Debug, Default, Clone, PartialEq)] 233 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 234 | pub struct Config { 235 | /// Enable and configure AEC (acoustic echo cancellation). 236 | pub echo_cancellation: Option, 237 | 238 | /// Enable and configure AGC (automatic gain control). 239 | pub gain_control: Option, 240 | 241 | /// Enable and configure noise suppression. 242 | pub noise_suppression: Option, 243 | 244 | /// Enable and configure voice detection. 245 | pub voice_detection: Option, 246 | 247 | /// Use to enable experimental transient noise suppression. 248 | #[cfg_attr(feature = "derive_serde", serde(default))] 249 | pub enable_transient_suppressor: bool, 250 | 251 | /// Use to enable a filtering component which removes DC offset and 252 | /// low-frequency noise. 253 | #[cfg_attr(feature = "derive_serde", serde(default))] 254 | pub enable_high_pass_filter: bool, 255 | } 256 | 257 | impl From for ffi::Config { 258 | fn from(other: Config) -> ffi::Config { 259 | let echo_cancellation = if let Some(enabled_value) = other.echo_cancellation { 260 | enabled_value.into() 261 | } else { 262 | ffi::EchoCancellation { enable: false, ..ffi::EchoCancellation::default() } 263 | }; 264 | 265 | let gain_control = if let Some(enabled_value) = other.gain_control { 266 | enabled_value.into() 267 | } else { 268 | ffi::GainControl { enable: false, ..ffi::GainControl::default() } 269 | }; 270 | 271 | let noise_suppression = if let Some(enabled_value) = other.noise_suppression { 272 | enabled_value.into() 273 | } else { 274 | ffi::NoiseSuppression { enable: false, ..ffi::NoiseSuppression::default() } 275 | }; 276 | 277 | let voice_detection = if let Some(enabled_value) = other.voice_detection { 278 | enabled_value.into() 279 | } else { 280 | ffi::VoiceDetection { enable: false, ..ffi::VoiceDetection::default() } 281 | }; 282 | 283 | ffi::Config { 284 | echo_cancellation, 285 | gain_control, 286 | noise_suppression, 287 | voice_detection, 288 | enable_transient_suppressor: other.enable_transient_suppressor, 289 | enable_high_pass_filter: other.enable_high_pass_filter, 290 | } 291 | } 292 | } 293 | 294 | /// Statistics about the processor state. 295 | #[derive(Debug, Clone)] 296 | #[cfg_attr(feature = "derive_serde", derive(Serialize, Deserialize))] 297 | pub struct Stats { 298 | /// True if voice is detected in the current frame. 299 | pub has_voice: Option, 300 | 301 | /// False if the current frame almost certainly contains no echo and true if it 302 | /// _might_ contain echo. 303 | pub has_echo: Option, 304 | 305 | /// Root mean square (RMS) level in dBFs (decibels from digital full-scale), or 306 | /// alternately dBov. It is computed over all primary stream frames since the 307 | /// last call to |get_stats()|. The returned value is constrained to [-127, 0], 308 | /// where -127 indicates muted. 309 | pub rms_dbfs: Option, 310 | 311 | /// Prior speech probability of the current frame averaged over output 312 | /// channels, internally computed by noise suppressor. 313 | pub speech_probability: Option, 314 | 315 | /// RERL = ERL + ERLE 316 | pub residual_echo_return_loss: Option, 317 | 318 | /// ERL = 10log_10(P_far / P_echo) 319 | pub echo_return_loss: Option, 320 | 321 | /// ERLE = 10log_10(P_echo / P_out) 322 | pub echo_return_loss_enhancement: Option, 323 | 324 | /// (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) 325 | pub a_nlp: Option, 326 | 327 | /// Median of the measured delay in ms. The values are aggregated until the 328 | /// first call to |get_stats()| and afterwards aggregated and updated every 329 | /// second. 330 | pub delay_median_ms: Option, 331 | 332 | /// Standard deviation of the measured delay in ms. The values are aggregated 333 | /// until the first call to |get_stats()| and afterwards aggregated and updated 334 | /// every second. 335 | pub delay_standard_deviation_ms: Option, 336 | 337 | /// The fraction of delay estimates that can make the echo cancellation perform 338 | /// poorly. 339 | pub delay_fraction_poor_delays: Option, 340 | } 341 | 342 | impl From for Stats { 343 | fn from(other: ffi::Stats) -> Stats { 344 | Stats { 345 | has_voice: other.has_voice.into(), 346 | has_echo: other.has_echo.into(), 347 | rms_dbfs: other.rms_dbfs.into(), 348 | speech_probability: other.speech_probability.into(), 349 | residual_echo_return_loss: other.residual_echo_return_loss.into(), 350 | echo_return_loss: other.echo_return_loss.into(), 351 | echo_return_loss_enhancement: other.echo_return_loss_enhancement.into(), 352 | a_nlp: other.a_nlp.into(), 353 | delay_median_ms: other.delay_median_ms.into(), 354 | delay_standard_deviation_ms: other.delay_standard_deviation_ms.into(), 355 | delay_fraction_poor_delays: other.delay_fraction_poor_delays.into(), 356 | } 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate is a wrapper around [PulseAudio's repackaging of WebRTC's AudioProcessing module](https://www.freedesktop.org/software/pulseaudio/webrtc-audio-processing/). 2 | //! 3 | //! See `examples/simple.rs` for an example of how to use the library. 4 | 5 | #![warn(clippy::all)] 6 | #![warn(missing_docs)] 7 | 8 | mod config; 9 | 10 | use std::{error, fmt, sync::Arc}; 11 | use webrtc_audio_processing_sys as ffi; 12 | 13 | pub use config::*; 14 | pub use ffi::NUM_SAMPLES_PER_FRAME; 15 | 16 | /// Represents an error inside webrtc::AudioProcessing. 17 | /// See the documentation of [`webrtc::AudioProcessing::Error`](https://cgit.freedesktop.org/pulseaudio/webrtc-audio-processing/tree/webrtc/modules/audio_processing/include/audio_processing.h?id=9def8cf10d3c97640d32f1328535e881288f700f) 18 | /// for further details. 19 | #[derive(Debug)] 20 | pub struct Error { 21 | /// webrtc::AudioProcessing::Error 22 | code: i32, 23 | } 24 | 25 | impl fmt::Display for Error { 26 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 27 | write!(f, "ffi::AudioProcessing::Error code: {}", self.code) 28 | } 29 | } 30 | 31 | impl error::Error for Error {} 32 | 33 | /// `Processor` provides an access to webrtc's audio processing e.g. echo 34 | /// cancellation and automatic gain control. It can be cloned, and cloned 35 | /// instances share the same underlying processor module. It's the recommended 36 | /// way to run the `Processor` in multi-threaded application. 37 | #[derive(Clone)] 38 | pub struct Processor { 39 | inner: Arc, 40 | // TODO: Refactor. It's not necessary to have two frame buffers as 41 | // `Processor`s are cloned for each thread. 42 | deinterleaved_capture_frame: Vec>, 43 | deinterleaved_render_frame: Vec>, 44 | } 45 | 46 | impl Processor { 47 | /// Creates a new `Processor`. `InitializationConfig` is only used on 48 | /// instantiation, however new configs can be be passed to `set_config()` 49 | /// at any time during processing. 50 | pub fn new(config: &ffi::InitializationConfig) -> Result { 51 | Ok(Self { 52 | inner: Arc::new(AudioProcessing::new(config)?), 53 | deinterleaved_capture_frame: vec![ 54 | vec![0f32; NUM_SAMPLES_PER_FRAME as usize]; 55 | config.num_capture_channels as usize 56 | ], 57 | deinterleaved_render_frame: vec![ 58 | vec![0f32; NUM_SAMPLES_PER_FRAME as usize]; 59 | config.num_render_channels as usize 60 | ], 61 | }) 62 | } 63 | 64 | /// Processes and modifies the audio frame from a capture device by applying 65 | /// signal processing as specified in the config. `frame` should hold an 66 | /// interleaved f32 audio frame, with NUM_SAMPLES_PER_FRAME samples. 67 | pub fn process_capture_frame(&mut self, frame: &mut [f32]) -> Result<(), Error> { 68 | Self::deinterleave(frame, &mut self.deinterleaved_capture_frame); 69 | self.inner.process_capture_frame(&mut self.deinterleaved_capture_frame)?; 70 | Self::interleave(&self.deinterleaved_capture_frame, frame); 71 | Ok(()) 72 | } 73 | 74 | /// Processes and modifies the audio frame from a capture device by applying 75 | /// signal processing as specified in the config. `frame` should be a Vec of 76 | /// length 'num_capture_channels', with each inner Vec representing a channel 77 | /// with NUM_SAMPLES_PER_FRAME samples. 78 | pub fn process_capture_frame_noninterleaved( 79 | &mut self, 80 | frame: &mut [Vec], 81 | ) -> Result<(), Error> { 82 | self.inner.process_capture_frame(frame) 83 | } 84 | 85 | /// Processes and optionally modifies the audio frame from a playback device. 86 | /// `frame` should hold an interleaved `f32` audio frame, with 87 | /// `NUM_SAMPLES_PER_FRAME` samples. 88 | pub fn process_render_frame(&mut self, frame: &mut [f32]) -> Result<(), Error> { 89 | Self::deinterleave(frame, &mut self.deinterleaved_render_frame); 90 | self.inner.process_render_frame(&mut self.deinterleaved_render_frame)?; 91 | Self::interleave(&self.deinterleaved_render_frame, frame); 92 | Ok(()) 93 | } 94 | 95 | /// Processes and optionally modifies the audio frame from a playback device. 96 | /// `frame` should be a Vec of length 'num_render_channels', with each inner Vec 97 | /// representing a channel with NUM_SAMPLES_PER_FRAME samples. 98 | pub fn process_render_frame_noninterleaved( 99 | &mut self, 100 | frame: &mut [Vec], 101 | ) -> Result<(), Error> { 102 | self.inner.process_render_frame(frame) 103 | } 104 | 105 | /// Returns statistics from the last `process_capture_frame()` call. 106 | pub fn get_stats(&self) -> Stats { 107 | self.inner.get_stats() 108 | } 109 | 110 | /// Immediately updates the configurations of the internal signal processor. 111 | /// May be called multiple times after the initialization and during 112 | /// processing. 113 | pub fn set_config(&mut self, config: Config) { 114 | self.inner.set_config(config); 115 | } 116 | 117 | /// Signals the AEC and AGC that the audio output will be / is muted. 118 | /// They may use the hint to improve their parameter adaptation. 119 | pub fn set_output_will_be_muted(&self, muted: bool) { 120 | self.inner.set_output_will_be_muted(muted); 121 | } 122 | 123 | /// Signals the AEC and AGC that the next frame will contain key press sound 124 | pub fn set_stream_key_pressed(&self, pressed: bool) { 125 | self.inner.set_stream_key_pressed(pressed); 126 | } 127 | 128 | /// De-interleaves multi-channel frame `src` into `dst`. 129 | /// 130 | /// ```text 131 | /// e.g. A stereo frame with 3 samples: 132 | /// 133 | /// Interleaved 134 | /// +---+---+---+---+---+---+ 135 | /// |L0 |R0 |L1 |R1 |L2 |R2 | 136 | /// +---+---+---+---+---+---+ 137 | /// 138 | /// Non-interleaved 139 | /// +---+---+---+ 140 | /// |L0 |L1 |L2 | 141 | /// +---+---+---+ 142 | /// |R0 |R1 |R2 | 143 | /// +---+---+---+ 144 | /// ``` 145 | fn deinterleave>(src: &[f32], dst: &mut [T]) { 146 | let num_channels = dst.len(); 147 | let num_samples = dst[0].as_mut().len(); 148 | assert_eq!(src.len(), num_channels * num_samples); 149 | for channel_index in 0..num_channels { 150 | for sample_index in 0..num_samples { 151 | dst[channel_index].as_mut()[sample_index] = 152 | src[num_channels * sample_index + channel_index]; 153 | } 154 | } 155 | } 156 | 157 | /// Reverts the `deinterleave` operation. 158 | fn interleave>(src: &[T], dst: &mut [f32]) { 159 | let num_channels = src.len(); 160 | let num_samples = src[0].as_ref().len(); 161 | assert_eq!(dst.len(), num_channels * num_samples); 162 | for channel_index in 0..num_channels { 163 | for sample_index in 0..num_samples { 164 | dst[num_channels * sample_index + channel_index] = 165 | src[channel_index].as_ref()[sample_index]; 166 | } 167 | } 168 | } 169 | } 170 | 171 | /// Minimal wrapper for safe and synchronized ffi. 172 | struct AudioProcessing { 173 | inner: *mut ffi::AudioProcessing, 174 | } 175 | 176 | impl AudioProcessing { 177 | fn new(config: &ffi::InitializationConfig) -> Result { 178 | let mut code = 0; 179 | let inner = unsafe { ffi::audio_processing_create(config, &mut code) }; 180 | if !inner.is_null() { 181 | Ok(Self { inner }) 182 | } else { 183 | Err(Error { code }) 184 | } 185 | } 186 | 187 | fn process_capture_frame(&self, frame: &mut [Vec]) -> Result<(), Error> { 188 | let mut frame_ptr = frame.iter_mut().map(|v| v.as_mut_ptr()).collect::>(); 189 | unsafe { 190 | let code = ffi::process_capture_frame(self.inner, frame_ptr.as_mut_ptr()); 191 | if ffi::is_success(code) { 192 | Ok(()) 193 | } else { 194 | Err(Error { code }) 195 | } 196 | } 197 | } 198 | 199 | fn process_render_frame(&self, frame: &mut [Vec]) -> Result<(), Error> { 200 | let mut frame_ptr = frame.iter_mut().map(|v| v.as_mut_ptr()).collect::>(); 201 | unsafe { 202 | let code = ffi::process_render_frame(self.inner, frame_ptr.as_mut_ptr()); 203 | if ffi::is_success(code) { 204 | Ok(()) 205 | } else { 206 | Err(Error { code }) 207 | } 208 | } 209 | } 210 | 211 | fn get_stats(&self) -> Stats { 212 | unsafe { ffi::get_stats(self.inner).into() } 213 | } 214 | 215 | fn set_config(&self, config: Config) { 216 | unsafe { 217 | ffi::set_config(self.inner, &config.into()); 218 | } 219 | } 220 | 221 | fn set_output_will_be_muted(&self, muted: bool) { 222 | unsafe { 223 | ffi::set_output_will_be_muted(self.inner, muted); 224 | } 225 | } 226 | 227 | fn set_stream_key_pressed(&self, pressed: bool) { 228 | unsafe { 229 | ffi::set_stream_key_pressed(self.inner, pressed); 230 | } 231 | } 232 | } 233 | 234 | impl Drop for AudioProcessing { 235 | fn drop(&mut self) { 236 | unsafe { 237 | ffi::audio_processing_delete(self.inner); 238 | } 239 | } 240 | } 241 | 242 | // ffi::AudioProcessing provides thread safety with a few exceptions around 243 | // the concurrent usage of its getters and setters e.g. `set_stream_delay_ms()`. 244 | unsafe impl Sync for AudioProcessing {} 245 | unsafe impl Send for AudioProcessing {} 246 | 247 | #[cfg(test)] 248 | mod tests { 249 | use super::*; 250 | use std::{thread, time::Duration}; 251 | 252 | #[test] 253 | fn test_create_failure() { 254 | let config = 255 | InitializationConfig { num_capture_channels: 0, ..InitializationConfig::default() }; 256 | assert!(Processor::new(&config).is_err()); 257 | } 258 | 259 | #[test] 260 | fn test_create_drop() { 261 | let config = InitializationConfig { 262 | num_capture_channels: 1, 263 | num_render_channels: 1, 264 | ..InitializationConfig::default() 265 | }; 266 | let _p = Processor::new(&config).unwrap(); 267 | } 268 | 269 | #[test] 270 | fn test_deinterleave_interleave() { 271 | let num_channels = 2usize; 272 | let num_samples = 3usize; 273 | 274 | let interleaved = (0..num_channels * num_samples).map(|v| v as f32).collect::>(); 275 | let mut deinterleaved = vec![vec![-1f32; num_samples]; num_channels]; 276 | Processor::deinterleave(&interleaved, &mut deinterleaved); 277 | assert_eq!(vec![vec![0f32, 2f32, 4f32], vec![1f32, 3f32, 5f32]], deinterleaved); 278 | 279 | let mut interleaved_out = vec![-1f32; num_samples * num_channels]; 280 | Processor::interleave(&deinterleaved, &mut interleaved_out); 281 | assert_eq!(interleaved, interleaved_out); 282 | } 283 | 284 | fn sample_stereo_frames() -> (Vec, Vec) { 285 | let num_samples_per_frame = NUM_SAMPLES_PER_FRAME as usize; 286 | 287 | // Stereo frame with a lower frequency cosine wave. 288 | let mut render_frame = Vec::with_capacity(num_samples_per_frame * 2); 289 | for i in 0..num_samples_per_frame { 290 | render_frame.push((i as f32 / 40.0).cos() * 0.4); 291 | render_frame.push((i as f32 / 40.0).cos() * 0.2); 292 | } 293 | 294 | // Stereo frame with a higher frequency sine wave, mixed with the cosine 295 | // wave from render frame. 296 | let mut capture_frame = Vec::with_capacity(num_samples_per_frame * 2); 297 | for i in 0..num_samples_per_frame { 298 | capture_frame.push((i as f32 / 20.0).sin() * 0.4 + render_frame[i * 2] * 0.2); 299 | capture_frame.push((i as f32 / 20.0).sin() * 0.2 + render_frame[i * 2 + 1] * 0.2); 300 | } 301 | 302 | (render_frame, capture_frame) 303 | } 304 | 305 | #[test] 306 | fn test_nominal() { 307 | let config = InitializationConfig { 308 | num_capture_channels: 2, 309 | num_render_channels: 2, 310 | ..InitializationConfig::default() 311 | }; 312 | let mut ap = Processor::new(&config).unwrap(); 313 | 314 | let config = Config { 315 | echo_cancellation: Some(EchoCancellation { 316 | suppression_level: EchoCancellationSuppressionLevel::High, 317 | stream_delay_ms: None, 318 | enable_delay_agnostic: false, 319 | enable_extended_filter: false, 320 | }), 321 | ..Config::default() 322 | }; 323 | ap.set_config(config); 324 | 325 | let (render_frame, capture_frame) = sample_stereo_frames(); 326 | 327 | let mut render_frame_output = render_frame.clone(); 328 | ap.process_render_frame(&mut render_frame_output).unwrap(); 329 | 330 | // Render frame should not be modified. 331 | assert_eq!(render_frame, render_frame_output); 332 | 333 | let mut capture_frame_output = capture_frame.clone(); 334 | ap.process_capture_frame(&mut capture_frame_output).unwrap(); 335 | 336 | // Echo cancellation should have modified the capture frame. 337 | // We don't validate how it's modified. Out of scope for this unit test. 338 | assert_ne!(capture_frame, capture_frame_output); 339 | 340 | let stats = ap.get_stats(); 341 | assert!(stats.echo_return_loss.is_some()); 342 | println!("{:#?}", stats); 343 | } 344 | 345 | #[test] 346 | #[ignore] 347 | fn test_nominal_threaded() { 348 | let config = InitializationConfig { 349 | num_capture_channels: 2, 350 | num_render_channels: 2, 351 | ..InitializationConfig::default() 352 | }; 353 | let ap = Processor::new(&config).unwrap(); 354 | 355 | let (render_frame, capture_frame) = sample_stereo_frames(); 356 | 357 | let mut config_ap = ap.clone(); 358 | let config_thread = thread::spawn(move || { 359 | thread::sleep(Duration::from_millis(100)); 360 | 361 | let config = Config { 362 | echo_cancellation: Some(EchoCancellation { 363 | suppression_level: EchoCancellationSuppressionLevel::High, 364 | stream_delay_ms: None, 365 | enable_delay_agnostic: false, 366 | enable_extended_filter: false, 367 | }), 368 | ..Config::default() 369 | }; 370 | config_ap.set_config(config); 371 | }); 372 | 373 | let mut render_ap = ap.clone(); 374 | let render_thread = thread::spawn(move || { 375 | for _ in 0..100 { 376 | let mut render_frame_output = render_frame.clone(); 377 | render_ap.process_render_frame(&mut render_frame_output).unwrap(); 378 | 379 | thread::sleep(Duration::from_millis(10)); 380 | } 381 | }); 382 | 383 | let mut capture_ap = ap.clone(); 384 | let capture_thread = thread::spawn(move || { 385 | for i in 0..100 { 386 | let mut capture_frame_output = capture_frame.clone(); 387 | capture_ap.process_capture_frame(&mut capture_frame_output).unwrap(); 388 | 389 | let stats = capture_ap.get_stats(); 390 | if i < 5 { 391 | // first 50ms 392 | assert!(stats.echo_return_loss.is_none()); 393 | } else if i >= 95 { 394 | // last 50ms 395 | assert!(stats.echo_return_loss.is_some()); 396 | } 397 | 398 | thread::sleep(Duration::from_millis(10)); 399 | } 400 | }); 401 | 402 | config_thread.join().unwrap(); 403 | render_thread.join().unwrap(); 404 | capture_thread.join().unwrap(); 405 | } 406 | 407 | #[test] 408 | fn test_tweak_processor_params() { 409 | let config = InitializationConfig { 410 | num_capture_channels: 2, 411 | num_render_channels: 2, 412 | ..InitializationConfig::default() 413 | }; 414 | let mut ap = Processor::new(&config).unwrap(); 415 | 416 | // tweak params outside of config 417 | ap.set_output_will_be_muted(true); 418 | ap.set_stream_key_pressed(true); 419 | 420 | // test one process call 421 | let (render_frame, capture_frame) = sample_stereo_frames(); 422 | 423 | let mut render_frame_output = render_frame.clone(); 424 | ap.process_render_frame(&mut render_frame_output).unwrap(); 425 | let mut capture_frame_output = capture_frame.clone(); 426 | ap.process_capture_frame(&mut capture_frame_output).unwrap(); 427 | 428 | // it shouldn't crash 429 | } 430 | } 431 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Google Inc. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | * Neither the name of Google nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "webrtc-audio-processing-sys" 3 | description = "A wrapper for WebRTC's AudioProcessing module." 4 | documentation = "https://docs.rs/webrtc-audio-processing-sys" 5 | version = "0.4.0" 6 | authors = ["Ryo Kawaguchi "] 7 | repository = "https://github.com/tonarino/webrtc-audio-processing" 8 | edition = "2018" 9 | links = "webrtc-audio-processing" 10 | build = "build.rs" 11 | keywords = ["ffi"] 12 | categories = ["multimedia::audio"] 13 | license-file = "COPYING" 14 | 15 | [badges] 16 | travis-ci = { repository = "tonarino/webrtc-audio-processing", branch = "master" } 17 | maintenance = { status = "actively-developed" } 18 | 19 | [features] 20 | derive_serde = ["serde"] 21 | bundled = [] 22 | 23 | [build-dependencies] 24 | anyhow = "1" 25 | autotools = "0.2" 26 | bindgen = "0" 27 | cc = "1" 28 | fs_extra = "1" 29 | regex = "1" 30 | pkg-config = "0.3" 31 | 32 | [dependencies] 33 | serde = { version = "1", features = ["derive"], optional = true } 34 | 35 | [package.metadata.docs.rs] 36 | features = ["bundled"] 37 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/README.md: -------------------------------------------------------------------------------- 1 | # webrtc-audio-processing-sys 2 | [![Crates.io](https://img.shields.io/crates/v/webrtc-audio-processing-sys.svg)](https://crates.io/crates/webrtc-audio-processing-sys) 3 | [![Docs.rs](https://docs.rs/webrtc-audio-processing-sys/badge.svg)](https://docs.rs/webrtc-audio-processing-sys) 4 | [![Build Status](https://travis-ci.org/tonarino/webrtc-audio-processing.svg?branch=master)](https://travis-ci.org/tonarino/webrtc-audio-processing) 5 | [![dependency status](https://deps.rs/repo/github/tonarino/webrtc-audio-processing/status.svg)](https://deps.rs/repo/github/tonarino/webrtc-audio-processing) 6 | 7 | A wrapper around [PulseAudio's repackaging of WebRTC's AudioProcessing module](https://www.freedesktop.org/software/pulseaudio/webrtc-audio-processing/). 8 | 9 | ## Building 10 | 11 | ### Dynamic linking 12 | 13 | By default the build will attempt to dynamically link with the library installed via your OS's package manager. 14 | 15 | You can specify an include path yourself by setting the environment variable `WEBRTC_AUDIO_PROCESSING_INCLUDE`. 16 | 17 | ### Packages 18 | 19 | ```sh 20 | sudo apt install webrtc-audio-processing-dev # Ubuntu/Debian 21 | sudo pacman -S webrtc-audio-processing # Arch 22 | ``` 23 | 24 | ### Static linking 25 | 26 | Static linking can be enabled with the `bundled` feature flag. 27 | 28 | The following tools are needed in order to use the `bundled` feature flag: 29 | 30 | * libtool (`$ sudo apt install libtool`) 31 | * autotools (`$ sudo apt install autotools-dev`) 32 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/build.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use regex::Regex; 3 | use std::{ 4 | env, 5 | fs::File, 6 | io::{Read, Write}, 7 | path::{Path, PathBuf}, 8 | }; 9 | 10 | const DEPLOYMENT_TARGET_VAR: &str = "MACOSX_DEPLOYMENT_TARGET"; 11 | 12 | fn out_dir() -> PathBuf { 13 | std::env::var("OUT_DIR").expect("OUT_DIR environment var not set.").into() 14 | } 15 | 16 | #[cfg(not(feature = "bundled"))] 17 | mod webrtc { 18 | use super::*; 19 | use anyhow::bail; 20 | 21 | const LIB_NAME: &str = "webrtc-audio-processing"; 22 | 23 | pub(super) fn get_build_paths() -> Result<(PathBuf, PathBuf)> { 24 | let (pkgconfig_include_path, pkgconfig_lib_path) = find_pkgconfig_paths()?; 25 | 26 | let include_path = std::env::var("WEBRTC_AUDIO_PROCESSING_INCLUDE") 27 | .ok() 28 | .map(|x| x.into()) 29 | .or(pkgconfig_include_path); 30 | let lib_path = std::env::var("WEBRTC_AUDIO_PROCESSING_LIB") 31 | .ok() 32 | .map(|x| x.into()) 33 | .or(pkgconfig_lib_path); 34 | 35 | println!("{:?}, {:?}", include_path, lib_path); 36 | 37 | match (include_path, lib_path) { 38 | (Some(include_path), Some(lib_path)) => Ok((include_path, lib_path)), 39 | _ => { 40 | eprintln!("Couldn't find either header or lib files for {}.", LIB_NAME); 41 | eprintln!("See the crate README for installation instructions, or use the 'bundled' feature to statically compile."); 42 | bail!("Aborting compilation due to linker failure."); 43 | }, 44 | } 45 | } 46 | 47 | pub(super) fn build_if_necessary() -> Result<()> { 48 | Ok(()) 49 | } 50 | 51 | fn find_pkgconfig_paths() -> Result<(Option, Option)> { 52 | Ok(pkg_config::Config::new() 53 | .probe(LIB_NAME) 54 | .and_then(|mut lib| Ok((lib.include_paths.pop(), lib.link_paths.pop())))?) 55 | } 56 | } 57 | 58 | #[cfg(feature = "bundled")] 59 | mod webrtc { 60 | use super::*; 61 | use anyhow::{anyhow, bail}; 62 | 63 | const BUNDLED_SOURCE_PATH: &str = "./webrtc-audio-processing"; 64 | 65 | pub(super) fn get_build_paths() -> Result<(PathBuf, PathBuf)> { 66 | let include_path = out_dir().join(BUNDLED_SOURCE_PATH); 67 | let lib_path = out_dir().join("lib"); 68 | Ok((include_path, lib_path)) 69 | } 70 | 71 | fn copy_source_to_out_dir() -> Result { 72 | use fs_extra::dir::CopyOptions; 73 | 74 | if Path::new(BUNDLED_SOURCE_PATH).read_dir()?.next().is_none() { 75 | eprintln!("The webrtc-audio-processing source directory is empty."); 76 | eprintln!("See the crate README for installation instructions."); 77 | eprintln!("Remember to clone the repo recursively if building from source."); 78 | bail!("Aborting compilation because bundled source directory is empty."); 79 | } 80 | 81 | let out_dir = out_dir(); 82 | let mut options = CopyOptions::new(); 83 | options.overwrite = true; 84 | 85 | fs_extra::dir::copy(BUNDLED_SOURCE_PATH, &out_dir, &options)?; 86 | 87 | Ok(out_dir.join(BUNDLED_SOURCE_PATH)) 88 | } 89 | 90 | pub(super) fn build_if_necessary() -> Result<()> { 91 | let build_dir = copy_source_to_out_dir()?; 92 | 93 | if cfg!(target_os = "macos") { 94 | run_command(&build_dir, "glibtoolize", None)?; 95 | } else { 96 | run_command(&build_dir, "libtoolize", None)?; 97 | } 98 | 99 | run_command(&build_dir, "aclocal", None)?; 100 | run_command(&build_dir, "automake", Some(&["--add-missing", "--copy"]))?; 101 | run_command(&build_dir, "autoconf", None)?; 102 | 103 | let target = std::env::var("TARGET").unwrap(); 104 | autotools::Config::new(build_dir) 105 | .cflag("-fPIC") 106 | .cxxflag("-fPIC") 107 | .config_option("host", Some(&target)) 108 | .disable_shared() 109 | .enable_static() 110 | .build(); 111 | 112 | Ok(()) 113 | } 114 | 115 | fn run_command>( 116 | curr_dir: P, 117 | cmd: &str, 118 | args_opt: Option<&[&str]>, 119 | ) -> Result<()> { 120 | let mut command = std::process::Command::new(cmd); 121 | 122 | command.current_dir(curr_dir); 123 | 124 | if let Some(args) = args_opt { 125 | command.args(args); 126 | } 127 | 128 | let _output = command.output().map_err(|e| { 129 | anyhow!("Error running command '{}' with args '{:?}' - {:?}", cmd, args_opt, e) 130 | })?; 131 | 132 | Ok(()) 133 | } 134 | } 135 | 136 | // TODO: Consider fixing this with the upstream. 137 | // https://github.com/rust-lang/rust-bindgen/issues/1089 138 | // https://github.com/rust-lang/rust-bindgen/issues/1301 139 | fn derive_serde(binding_file: &Path) -> Result<()> { 140 | let mut contents = String::new(); 141 | File::open(binding_file)?.read_to_string(&mut contents)?; 142 | 143 | let new_contents = format!( 144 | "use serde::{{Serialize, Deserialize}};\n{}", 145 | Regex::new(r"#\s*\[\s*derive\s*\((?P[^)]+)\)\s*\]\s*pub\s*(?Pstruct|enum)")? 146 | .replace_all(&contents, "#[derive($d, Serialize, Deserialize)] pub $s") 147 | ); 148 | 149 | File::create(&binding_file)?.write_all(new_contents.as_bytes())?; 150 | 151 | Ok(()) 152 | } 153 | 154 | fn main() -> Result<()> { 155 | webrtc::build_if_necessary()?; 156 | let (webrtc_include, webrtc_lib) = webrtc::get_build_paths()?; 157 | 158 | let mut cc_build = cc::Build::new(); 159 | 160 | // set mac minimum version 161 | if cfg!(target_os = "macos") { 162 | let min_version = match env::var(DEPLOYMENT_TARGET_VAR) { 163 | Ok(ver) => ver, 164 | Err(_) => { 165 | String::from(match std::env::var("CARGO_CFG_TARGET_ARCH").unwrap().as_str() { 166 | "x86_64" => "10.10", // Using what I found here https://github.com/webrtc-uwp/chromium-build/blob/master/config/mac/mac_sdk.gni#L17 167 | "aarch64" => "11.0", // Apple silicon started here. 168 | arch => panic!("unknown arch: {}", arch), 169 | }) 170 | }, 171 | }; 172 | 173 | // `cc` doesn't try to pick up on this automatically, but `clang` needs it to 174 | // generate a "correct" Objective-C symbol table which better matches XCode. 175 | // See https://github.com/h4llow3En/mac-notification-sys/issues/45. 176 | cc_build.flag(&format!("-mmacos-version-min={}", min_version)); 177 | } 178 | 179 | cc_build 180 | .cpp(true) 181 | .file("src/wrapper.cpp") 182 | .include(&webrtc_include) 183 | .flag("-Wno-unused-parameter") 184 | .flag("-Wno-deprecated-declarations") 185 | .flag("-std=c++11") 186 | .out_dir(&out_dir()) 187 | .compile("webrtc_audio_processing_wrapper"); 188 | 189 | println!("cargo:rustc-link-search=native={}", webrtc_lib.display()); 190 | println!("cargo:rustc-link-lib=static=webrtc_audio_processing_wrapper"); 191 | 192 | println!("cargo:rerun-if-env-changed={}", DEPLOYMENT_TARGET_VAR); 193 | 194 | if cfg!(feature = "bundled") { 195 | println!("cargo:rustc-link-lib=static=webrtc_audio_processing"); 196 | } else { 197 | println!("cargo:rustc-link-lib=dylib=webrtc_audio_processing"); 198 | } 199 | 200 | if cfg!(target_os = "macos") { 201 | println!("cargo:rustc-link-lib=dylib=c++"); 202 | } else { 203 | println!("cargo:rustc-link-lib=dylib=stdc++"); 204 | } 205 | 206 | let binding_file = out_dir().join("bindings.rs"); 207 | bindgen::Builder::default() 208 | .header("src/wrapper.hpp") 209 | .generate_comments(true) 210 | .rustified_enum(".*") 211 | .derive_debug(true) 212 | .derive_default(true) 213 | .derive_partialeq(true) 214 | .clang_arg(&format!("-I{}", &webrtc_include.display())) 215 | .disable_name_namespacing() 216 | .generate() 217 | .expect("Unable to generate bindings") 218 | .write_to_file(&binding_file) 219 | .expect("Couldn't write bindings!"); 220 | 221 | if cfg!(feature = "derive_serde") { 222 | derive_serde(&binding_file).expect("Failed to modify derive macros"); 223 | } 224 | 225 | Ok(()) 226 | } 227 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_upper_case_globals)] 2 | #![allow(non_camel_case_types)] 3 | #![allow(non_snake_case)] 4 | 5 | include!(concat!(env!("OUT_DIR"), "/bindings.rs")); 6 | 7 | impl Into> for OptionalBool { 8 | fn into(self) -> Option { 9 | if self.has_value { 10 | Some(self.value) 11 | } else { 12 | None 13 | } 14 | } 15 | } 16 | 17 | impl From> for OptionalBool { 18 | fn from(other: Option) -> OptionalBool { 19 | if let Some(value) = other { 20 | OptionalBool { has_value: true, value } 21 | } else { 22 | OptionalBool { has_value: false, value: false } 23 | } 24 | } 25 | } 26 | 27 | impl Into> for OptionalInt { 28 | fn into(self) -> Option { 29 | if self.has_value { 30 | Some(self.value) 31 | } else { 32 | None 33 | } 34 | } 35 | } 36 | 37 | impl From> for OptionalInt { 38 | fn from(other: Option) -> OptionalInt { 39 | if let Some(value) = other { 40 | OptionalInt { has_value: true, value } 41 | } else { 42 | OptionalInt { has_value: false, value: 0 } 43 | } 44 | } 45 | } 46 | 47 | impl Into> for OptionalDouble { 48 | fn into(self) -> Option { 49 | if self.has_value { 50 | Some(self.value) 51 | } else { 52 | None 53 | } 54 | } 55 | } 56 | 57 | impl From> for OptionalDouble { 58 | fn from(other: Option) -> OptionalDouble { 59 | if let Some(value) = other { 60 | OptionalDouble { has_value: true, value } 61 | } else { 62 | OptionalDouble { has_value: false, value: 0.0 } 63 | } 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod tests { 69 | use super::*; 70 | 71 | fn init_config_with_all_enabled() -> InitializationConfig { 72 | InitializationConfig { 73 | num_capture_channels: 1, 74 | num_render_channels: 1, 75 | enable_experimental_agc: true, 76 | enable_intelligibility_enhancer: true, 77 | } 78 | } 79 | 80 | fn config_with_all_enabled() -> Config { 81 | Config { 82 | echo_cancellation: EchoCancellation { 83 | enable: true, 84 | suppression_level: EchoCancellation_SuppressionLevel::HIGH, 85 | }, 86 | gain_control: GainControl { 87 | enable: true, 88 | target_level_dbfs: 3, 89 | compression_gain_db: 3, 90 | enable_limiter: true, 91 | }, 92 | noise_suppression: NoiseSuppression { 93 | enable: true, 94 | suppression_level: NoiseSuppression_SuppressionLevel::HIGH, 95 | }, 96 | voice_detection: VoiceDetection { 97 | enable: true, 98 | detection_likelihood: VoiceDetection_DetectionLikelihood::HIGH, 99 | }, 100 | enable_extended_filter: true, 101 | enable_delay_agnostic: true, 102 | enable_transient_suppressor: true, 103 | enable_high_pass_filter: true, 104 | } 105 | } 106 | 107 | #[test] 108 | fn test_create_failure() { 109 | unsafe { 110 | let config = InitializationConfig::default(); 111 | let mut error = 0; 112 | let ap = audio_processing_create(&config, &mut error); 113 | assert!(ap.is_null()); 114 | assert!(!is_success(error)); 115 | } 116 | } 117 | 118 | #[test] 119 | fn test_create_delete() { 120 | unsafe { 121 | let config = InitializationConfig { 122 | num_capture_channels: 1, 123 | num_render_channels: 1, 124 | ..InitializationConfig::default() 125 | }; 126 | let mut error = 0; 127 | let ap = audio_processing_create(&config, &mut error); 128 | assert!(!ap.is_null()); 129 | assert!(is_success(error)); 130 | audio_processing_delete(ap); 131 | } 132 | } 133 | 134 | #[test] 135 | fn test_config() { 136 | unsafe { 137 | let mut error = 0; 138 | let ap = audio_processing_create(&init_config_with_all_enabled(), &mut error); 139 | assert!(!ap.is_null()); 140 | assert!(is_success(error)); 141 | 142 | let config = Config::default(); 143 | set_config(ap, &config); 144 | 145 | let config = config_with_all_enabled(); 146 | set_config(ap, &config); 147 | 148 | audio_processing_delete(ap); 149 | } 150 | } 151 | 152 | #[test] 153 | fn test_process() { 154 | unsafe { 155 | let mut error = 0; 156 | let ap = audio_processing_create(&init_config_with_all_enabled(), &mut error); 157 | assert!(!ap.is_null()); 158 | assert!(is_success(error)); 159 | 160 | let config = config_with_all_enabled(); 161 | set_config(ap, &config); 162 | 163 | let mut frame = vec![vec![0f32; NUM_SAMPLES_PER_FRAME as usize]; 1]; 164 | let mut frame_ptr = frame.iter_mut().map(|v| v.as_mut_ptr()).collect::>(); 165 | assert!(is_success(process_render_frame(ap, frame_ptr.as_mut_ptr()))); 166 | assert!(is_success(process_capture_frame(ap, frame_ptr.as_mut_ptr()))); 167 | 168 | audio_processing_delete(ap); 169 | } 170 | } 171 | 172 | #[test] 173 | fn test_empty_stats() { 174 | unsafe { 175 | let config = InitializationConfig { 176 | num_capture_channels: 1, 177 | num_render_channels: 1, 178 | ..InitializationConfig::default() 179 | }; 180 | let mut error = 0; 181 | let ap = audio_processing_create(&config, &mut error); 182 | assert!(!ap.is_null()); 183 | assert!(is_success(error)); 184 | 185 | let stats = get_stats(ap); 186 | println!("Stats:\n{:#?}", stats); 187 | assert!(!stats.has_voice.has_value); 188 | assert!(!stats.has_echo.has_value); 189 | assert!(!stats.rms_dbfs.has_value); 190 | assert!(!stats.speech_probability.has_value); 191 | assert!(!stats.residual_echo_return_loss.has_value); 192 | assert!(!stats.echo_return_loss.has_value); 193 | assert!(!stats.echo_return_loss_enhancement.has_value); 194 | assert!(!stats.a_nlp.has_value); 195 | assert!(!stats.delay_median_ms.has_value); 196 | assert!(!stats.delay_standard_deviation_ms.has_value); 197 | assert!(!stats.delay_fraction_poor_delays.has_value); 198 | 199 | audio_processing_delete(ap); 200 | } 201 | } 202 | 203 | #[test] 204 | fn test_some_stats() { 205 | unsafe { 206 | let mut error = 0; 207 | let ap = audio_processing_create(&init_config_with_all_enabled(), &mut error); 208 | assert!(!ap.is_null()); 209 | assert!(is_success(error)); 210 | 211 | let config = config_with_all_enabled(); 212 | set_config(ap, &config); 213 | 214 | let mut frame = vec![vec![0f32; NUM_SAMPLES_PER_FRAME as usize]; 1]; 215 | let mut frame_ptr = frame.iter_mut().map(|v| v.as_mut_ptr()).collect::>(); 216 | assert!(is_success(process_render_frame(ap, frame_ptr.as_mut_ptr()))); 217 | assert!(is_success(process_capture_frame(ap, frame_ptr.as_mut_ptr()))); 218 | let stats = get_stats(ap); 219 | println!("Stats:\n{:#?}", stats); 220 | assert!(stats.has_voice.has_value); 221 | assert!(stats.has_echo.has_value); 222 | assert!(stats.rms_dbfs.has_value); 223 | assert!(stats.speech_probability.has_value); 224 | assert!(stats.residual_echo_return_loss.has_value); 225 | assert!(stats.echo_return_loss.has_value); 226 | assert!(stats.echo_return_loss_enhancement.has_value); 227 | assert!(stats.a_nlp.has_value); 228 | assert!(stats.delay_median_ms.has_value); 229 | assert!(stats.delay_standard_deviation_ms.has_value); 230 | assert!(stats.delay_fraction_poor_delays.has_value); 231 | 232 | audio_processing_delete(ap); 233 | } 234 | } 235 | } 236 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/src/wrapper.cpp: -------------------------------------------------------------------------------- 1 | // TODO(ryo): Add TraceCallback. 2 | 3 | #include "wrapper.hpp" 4 | 5 | #include 6 | #include 7 | 8 | #define WEBRTC_POSIX 9 | #define WEBRTC_AUDIO_PROCESSING_ONLY_BUILD 10 | 11 | #include 12 | #include 13 | 14 | namespace webrtc_audio_processing { 15 | namespace { 16 | 17 | // This is the default that Chromium uses. 18 | const int AGC_STARTUP_MIN_VOLUME = 85; 19 | 20 | OptionalDouble make_optional_double(const double value) { 21 | OptionalDouble rv; 22 | rv.has_value = true; 23 | rv.value = value; 24 | return rv; 25 | } 26 | 27 | OptionalInt make_optional_int(const int value) { 28 | OptionalInt rv; 29 | rv.has_value = true; 30 | rv.value = value; 31 | return rv; 32 | } 33 | 34 | OptionalBool make_optional_bool(const bool value) { 35 | OptionalBool rv; 36 | rv.has_value = true; 37 | rv.value = value; 38 | return rv; 39 | } 40 | 41 | } // namespace 42 | 43 | struct AudioProcessing { 44 | std::unique_ptr processor; 45 | webrtc::StreamConfig capture_stream_config; 46 | webrtc::StreamConfig render_stream_config; 47 | OptionalInt stream_delay_ms; 48 | }; 49 | 50 | AudioProcessing* audio_processing_create( 51 | const InitializationConfig& init_config, 52 | int* error) { 53 | webrtc::Config config; 54 | if (init_config.enable_experimental_agc) { 55 | config.Set( 56 | new webrtc::ExperimentalAgc(true, AGC_STARTUP_MIN_VOLUME)); 57 | } 58 | if (init_config.enable_intelligibility_enhancer) { 59 | config.Set(new webrtc::Intelligibility(true)); 60 | } 61 | // TODO(ryo): Experiment with the webrtc's builtin beamformer. There are some 62 | // preconditions; see |ec_fixate_spec()| in the pulseaudio's example. 63 | 64 | AudioProcessing* ap = new AudioProcessing; 65 | ap->processor.reset(webrtc::AudioProcessing::Create(config)); 66 | 67 | const bool has_keyboard = false; 68 | ap->capture_stream_config = webrtc::StreamConfig( 69 | SAMPLE_RATE_HZ, init_config.num_capture_channels, has_keyboard); 70 | ap->render_stream_config = webrtc::StreamConfig( 71 | SAMPLE_RATE_HZ, init_config.num_render_channels, has_keyboard); 72 | 73 | webrtc::ProcessingConfig pconfig = { 74 | ap->capture_stream_config, 75 | ap->capture_stream_config, 76 | ap->render_stream_config, 77 | ap->render_stream_config, 78 | }; 79 | const int code = ap->processor->Initialize(pconfig); 80 | if (code != webrtc::AudioProcessing::kNoError) { 81 | *error = code; 82 | delete ap; 83 | return nullptr; 84 | } 85 | 86 | return ap; 87 | } 88 | 89 | int process_capture_frame(AudioProcessing* ap, float** channels) { 90 | auto* p = ap->processor.get(); 91 | 92 | if (p->echo_cancellation()->is_enabled()) { 93 | p->set_stream_delay_ms( 94 | ap->stream_delay_ms.has_value ? ap->stream_delay_ms.value : 0); 95 | } 96 | 97 | return p->ProcessStream( 98 | channels, ap->capture_stream_config, ap->capture_stream_config, channels); 99 | } 100 | 101 | int process_render_frame(AudioProcessing* ap, float** channels) { 102 | return ap->processor->ProcessReverseStream( 103 | channels, ap->render_stream_config, ap->render_stream_config, channels); 104 | } 105 | 106 | Stats get_stats(AudioProcessing* ap) { 107 | auto* p = ap->processor.get(); 108 | 109 | Stats stats; 110 | if (p->voice_detection()->is_enabled()) { 111 | stats.has_voice = 112 | make_optional_bool(p->voice_detection()->stream_has_voice()); 113 | } 114 | if (p->echo_cancellation()->is_enabled()) { 115 | stats.has_echo = 116 | make_optional_bool(p->echo_cancellation()->stream_has_echo()); 117 | } 118 | if (p->level_estimator()->is_enabled()) { 119 | stats.rms_dbfs = make_optional_int(-1 * p->level_estimator()->RMS()); 120 | } 121 | if (p->noise_suppression()->is_enabled()) { 122 | if (p->noise_suppression()->speech_probability() 123 | != webrtc::AudioProcessing::kUnsupportedFunctionError) { 124 | stats.speech_probability = 125 | make_optional_double(p->noise_suppression()->speech_probability()); 126 | } 127 | // TODO(ryo): NoiseSuppression supports NoiseEstimate function in the latest 128 | // master. 129 | } 130 | 131 | // TODO(ryo): AudioProcessing supports useful GetStatistics function in the 132 | // latest master. 133 | if (p->echo_cancellation()->is_enabled()) { 134 | webrtc::EchoCancellation::Metrics metrics; 135 | if (p->echo_cancellation()->GetMetrics(&metrics) 136 | == webrtc::AudioProcessing::kNoError) { 137 | stats.residual_echo_return_loss = 138 | make_optional_double(metrics.residual_echo_return_loss.instant); 139 | stats.echo_return_loss = 140 | make_optional_double(metrics.echo_return_loss.instant); 141 | stats.echo_return_loss_enhancement = 142 | make_optional_double(metrics.echo_return_loss_enhancement.instant); 143 | stats.a_nlp = make_optional_double(metrics.a_nlp.instant); 144 | } 145 | 146 | int delay_median_ms = -1; 147 | int delay_stddev_ms = -1; 148 | float fraction_poor_delays = -1; 149 | if (p->echo_cancellation()->GetDelayMetrics( 150 | &delay_median_ms, &delay_stddev_ms, &fraction_poor_delays) 151 | == webrtc::AudioProcessing::kNoError) { 152 | stats.delay_median_ms = make_optional_int(delay_median_ms); 153 | stats.delay_standard_deviation_ms = make_optional_int(delay_stddev_ms); 154 | stats.delay_fraction_poor_delays = 155 | make_optional_double(fraction_poor_delays); 156 | } 157 | } 158 | 159 | return stats; 160 | } 161 | 162 | void set_config(AudioProcessing* ap, const Config& config) { 163 | auto* p = ap->processor.get(); 164 | 165 | webrtc::Config extra_config; 166 | extra_config.Set( 167 | new webrtc::ExtendedFilter( 168 | config.echo_cancellation.enable_extended_filter)); 169 | extra_config.Set( 170 | new webrtc::DelayAgnostic( 171 | !config.echo_cancellation.stream_delay_ms.has_value && 172 | config.echo_cancellation.enable_delay_agnostic)); 173 | extra_config.Set( 174 | new webrtc::ExperimentalNs(config.enable_transient_suppressor)); 175 | // TODO(ryo): There is a new RefinedAdaptiveFilter in the latest master. 176 | p->SetExtraOptions(extra_config); 177 | 178 | // TODO(ryo): Look into EchoCanceller3. 179 | if (config.echo_cancellation.enable) { 180 | ap->stream_delay_ms = config.echo_cancellation.stream_delay_ms; 181 | // According to the webrtc documentation, drift compensation should not be 182 | // necessary as long as we are using the same audio device for input and 183 | // output. 184 | p->echo_cancellation()->enable_drift_compensation(false); 185 | p->echo_cancellation()->enable_metrics(true); 186 | p->echo_cancellation()->enable_delay_logging(true); 187 | p->echo_cancellation()->set_suppression_level( 188 | static_cast( 189 | config.echo_cancellation.suppression_level)); 190 | p->echo_cancellation()->Enable(true); 191 | } else { 192 | p->echo_cancellation()->Enable(false); 193 | } 194 | 195 | if (config.gain_control.enable) { 196 | p->gain_control()->set_mode( 197 | static_cast(config.gain_control.mode)); 198 | p->gain_control()->set_target_level_dbfs( 199 | config.gain_control.target_level_dbfs); 200 | p->gain_control()->set_compression_gain_db( 201 | config.gain_control.compression_gain_db); 202 | p->gain_control()->enable_limiter(config.gain_control.enable_limiter); 203 | p->gain_control()->Enable(true); 204 | } else { 205 | p->gain_control()->Enable(false); 206 | } 207 | 208 | if (config.noise_suppression.enable) { 209 | p->noise_suppression()->set_level( 210 | static_cast( 211 | config.noise_suppression.suppression_level)); 212 | p->noise_suppression()->Enable(true); 213 | } else { 214 | p->noise_suppression()->Enable(false); 215 | } 216 | 217 | if (config.voice_detection.enable) { 218 | p->voice_detection()->set_likelihood( 219 | static_cast( 220 | config.voice_detection.detection_likelihood)); 221 | p->voice_detection()->set_frame_size_ms(FRAME_MS); 222 | p->voice_detection()->Enable(true); 223 | } else { 224 | p->voice_detection()->Enable(false); 225 | } 226 | 227 | p->high_pass_filter()->Enable(config.enable_high_pass_filter); 228 | 229 | p->level_estimator()->Enable(true); 230 | } 231 | 232 | void set_output_will_be_muted(AudioProcessing* ap, bool muted) { 233 | ap->processor->set_output_will_be_muted(muted); 234 | } 235 | 236 | void set_stream_key_pressed(AudioProcessing* ap, bool pressed) { 237 | ap->processor->set_stream_key_pressed(pressed); 238 | } 239 | 240 | void audio_processing_delete(AudioProcessing* ap) { 241 | delete ap; 242 | } 243 | 244 | bool is_success(const int code) { 245 | return code == webrtc::AudioProcessing::kNoError; 246 | } 247 | 248 | } // namespace webrtc_audio_processing 249 | -------------------------------------------------------------------------------- /webrtc-audio-processing-sys/src/wrapper.hpp: -------------------------------------------------------------------------------- 1 | // This is a c++ header file, but we are using minimal c++ constructs and not 2 | // including any complex header files to keep Rust interoperability simple. 3 | 4 | #ifndef WEBRTC_AUDIO_PROCESSING_WRAPPER_HPP_ 5 | #define WEBRTC_AUDIO_PROCESSING_WRAPPER_HPP_ 6 | 7 | namespace webrtc_audio_processing { 8 | 9 | // AudioProcessing accepts only one of 48000, 32000, 16000, and 8000 hz. 10 | // TODO: support multiple sample rates. 11 | const int SAMPLE_RATE_HZ = 48000; 12 | 13 | // AudioProcessing expects each frame to be of fixed 10 ms. 14 | const int FRAME_MS = 10; 15 | 16 | ///
The number of expected samples per frame.
17 | const int NUM_SAMPLES_PER_FRAME = SAMPLE_RATE_HZ * FRAME_MS / 1000; 18 | 19 | struct AudioProcessing; 20 | 21 | struct OptionalDouble { 22 | bool has_value = false; 23 | double value = 0.0; 24 | }; 25 | 26 | struct OptionalInt { 27 | bool has_value = false; 28 | int value = 0; 29 | }; 30 | 31 | struct OptionalBool { 32 | bool has_value = false; 33 | bool value = false; 34 | }; 35 | 36 | ///
A configuration used only when initializing a Processor.
37 | struct InitializationConfig { 38 | int num_capture_channels; 39 | int num_render_channels; 40 | 41 | // TODO: Investigate how it's different from the default gain control and the effect of using the two at the same time. 42 | bool enable_experimental_agc; 43 | 44 | bool enable_intelligibility_enhancer; 45 | }; 46 | 47 | ///
Echo cancellation configuration.
48 | struct EchoCancellation { 49 | ///
Whether to use echo cancellation.
50 | bool enable; 51 | 52 | ///
A level of echo suppression.
53 | enum SuppressionLevel { 54 | LOWEST, 55 | LOWER, 56 | LOW, 57 | MODERATE, 58 | HIGH, 59 | }; 60 | 61 | ///
62 | /// Determines the aggressiveness of the suppressor. A higher level trades off 63 | /// double-talk performance for increased echo suppression. 64 | ///
65 | SuppressionLevel suppression_level; 66 | 67 | ///
68 | /// Use to enable the extended filter mode in the AEC, along with robustness 69 | /// measures around the reported system delays. It comes with a significant 70 | /// increase in AEC complexity, but is much more robust to unreliable reported 71 | /// delays. 72 | ///
73 | bool enable_extended_filter; 74 | 75 | ///
76 | /// Enables delay-agnostic echo cancellation. This feature relies on internally 77 | /// estimated delays between the process and reverse streams, thus not relying 78 | /// on reported system delays. 79 | ///
80 | bool enable_delay_agnostic; 81 | 82 | ///
83 | /// Sets the delay in ms between process_render_frame() receiving a far-end 84 | /// frame and process_capture_frame() receiving a near-end frame containing 85 | /// the corresponding echo. You should set this only if you are certain that 86 | /// the delay will be stable and constant. enable_delay_agnostic will be 87 | /// ignored when this option is set. 88 | ///
89 | OptionalInt stream_delay_ms; 90 | }; 91 | 92 | ///
Gain control configuration.
93 | struct GainControl { 94 | ///
Whether to use gain control.
95 | bool enable; 96 | 97 | ///
Mode of gain control.
98 | enum Mode { 99 | ///
Not supported yet.
100 | /// TODO(skywhale): Expose set_stream_analog_level() and 101 | /// stream_analog_level(). 102 | ADAPTIVE_ANALOG, 103 | 104 | ///
105 | /// Bring the signal to an appropriate range by applying an adaptive gain 106 | /// control. The volume is dynamically amplified with a microphone with 107 | /// small pickup and vice versa. 108 | ///
109 | ADAPTIVE_DIGITAL, 110 | 111 | ///
112 | /// Unlike ADAPTIVE_DIGITAL, it only compresses (i.e. gradually reduces 113 | /// gain with increasing level) the input signal when at higher levels. 114 | /// Use this where the capture signal level is predictable, so that a 115 | /// known gain can be applied. 116 | ///
117 | FIXED_DIGITAL, 118 | }; 119 | 120 | ///
Determines what type of gain control is applied.
121 | Mode mode; 122 | 123 | ///
124 | /// Sets the target peak level (or envelope) of the AGC in dBFs (decibels from 125 | /// digital full-scale). The convention is to use positive values. 126 | /// For instance, passing in a value of 3 corresponds to -3 dBFs, or a target 127 | /// level 3 dB below full-scale. Limited to [0, 31]. 128 | ///
129 | int target_level_dbfs; 130 | 131 | ///
132 | /// Sets the maximum gain the digital compression stage may apply, in dB. A 133 | /// higher number corresponds to greater compression, while a value of 0 will 134 | /// leave the signal uncompressed. Limited to [0, 90]. 135 | ///
136 | int compression_gain_db; 137 | 138 | ///
139 | /// When enabled, the compression stage will hard limit the signal to the 140 | /// target level. Otherwise, the signal will be compressed but not limited 141 | /// above the target level. 142 | ///
143 | bool enable_limiter; 144 | }; 145 | 146 | ///
Noise suppression configuration.
147 | struct NoiseSuppression { 148 | ///
Whether to use noise supression.
149 | bool enable; 150 | 151 | ///
A level of noise suppression.
152 | enum SuppressionLevel { 153 | LOW, 154 | MODERATE, 155 | HIGH, 156 | VERY_HIGH, 157 | }; 158 | 159 | ///
160 | /// Determines the aggressiveness of the suppression. Increasing the level will 161 | /// reduce the noise level at the expense of a higher speech distortion. 162 | ///
163 | SuppressionLevel suppression_level; 164 | }; 165 | 166 | ///
Voice detection configuration.
167 | struct VoiceDetection { 168 | ///
Whether to use voice detection.
169 | bool enable; 170 | 171 | ///
The sensitivity of the noise detector.
172 | enum DetectionLikelihood { 173 | VERY_LOW, 174 | LOW, 175 | MODERATE, 176 | HIGH, 177 | }; 178 | 179 | ///
180 | /// Specifies the likelihood that a frame will be declared to contain voice. A 181 | /// higher value makes it more likely that speech will not be clipped, at the 182 | /// expense of more noise being detected as voice. 183 | ///
184 | DetectionLikelihood detection_likelihood; 185 | }; 186 | 187 | ///
Config that can be used mid-processing.
188 | struct Config { 189 | EchoCancellation echo_cancellation; 190 | GainControl gain_control; 191 | NoiseSuppression noise_suppression; 192 | VoiceDetection voice_detection; 193 | 194 | ///
195 | /// Use to enable experimental transient noise suppression. 196 | ///
197 | bool enable_transient_suppressor; 198 | 199 | ///
200 | /// Use to enable a filtering component which removes DC offset and 201 | /// low-frequency noise. 202 | ///
203 | bool enable_high_pass_filter; 204 | }; 205 | 206 | ///
Statistics about the processor state.
207 | struct Stats { 208 | ///
209 | /// True if voice is detected in the current frame. 210 | ///
211 | OptionalBool has_voice; 212 | 213 | ///
214 | /// False if the current frame almost certainly contains no echo and true if it 215 | /// _might_ contain echo. 216 | ///
217 | OptionalBool has_echo; 218 | 219 | ///
220 | /// Root mean square (RMS) level in dBFs (decibels from digital full-scale), or 221 | /// alternately dBov. It is computed over all primary stream frames since the 222 | /// last call to |get_stats()|. The returned value is constrained to [-127, 0], 223 | /// where -127 indicates muted. 224 | ///
225 | OptionalInt rms_dbfs; 226 | 227 | ///
228 | /// Prior speech probability of the current frame averaged over output 229 | /// channels, internally computed by noise suppressor. 230 | ///
231 | OptionalDouble speech_probability; 232 | 233 | ///
234 | /// RERL = ERL + ERLE 235 | ///
236 | OptionalDouble residual_echo_return_loss; 237 | 238 | ///
239 | /// ERL = 10log_10(P_far / P_echo) 240 | ///
241 | OptionalDouble echo_return_loss; 242 | 243 | ///
244 | /// ERLE = 10log_10(P_echo / P_out) 245 | ///
246 | OptionalDouble echo_return_loss_enhancement; 247 | 248 | ///
249 | /// (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) 250 | ///
251 | OptionalDouble a_nlp; 252 | 253 | ///
254 | /// Median of the measured delay in ms. The values are aggregated until the 255 | /// first call to |get_stats()| and afterwards aggregated and updated every 256 | /// second. 257 | ///
258 | OptionalInt delay_median_ms; 259 | 260 | ///
261 | /// Standard deviation of the measured delay in ms. The values are aggregated 262 | /// until the first call to |get_stats()| and afterwards aggregated and updated 263 | /// every second. 264 | ///
265 | OptionalInt delay_standard_deviation_ms; 266 | 267 | ///
268 | /// The fraction of delay estimates that can make the echo cancellation perform 269 | /// poorly. 270 | ///
271 | OptionalDouble delay_fraction_poor_delays; 272 | }; 273 | 274 | // Creates a new instance of the signal processor. 275 | AudioProcessing* audio_processing_create(const InitializationConfig& init_config, int* error); 276 | 277 | // Processes and modifies the audio frame from a capture device. Each element in 278 | // |channels| is an array of float representing a single-channel frame of 10 ms 279 | // length. Returns an error code or |kNoError|. 280 | int process_capture_frame(AudioProcessing* ap, float** channels); 281 | 282 | // Processes and optionally modifies the audio frame from a playback device. 283 | // Each element in |channels| is an array of float representing a single-channel 284 | // frame of 10 ms length. Returns an error code or |kNoError|. 285 | int process_render_frame(AudioProcessing* ap, float** channel3); 286 | 287 | // Returns statistics from the last |process_capture_frame()| call. 288 | Stats get_stats(AudioProcessing* ap); 289 | 290 | // Immediately updates the configurations of the signal processor. 291 | // May be called multiple times after the initialization and during processing. 292 | void set_config(AudioProcessing* ap, const Config& config); 293 | 294 | // Signals the AEC and AGC that the audio output will be / is muted. 295 | // They may use the hint to improve their parameter adaptation. 296 | void set_output_will_be_muted(AudioProcessing* ap, bool muted); 297 | 298 | /// Signals the AEC and AGC that the next frame will contain key press sound 299 | void set_stream_key_pressed(AudioProcessing* ap, bool pressed); 300 | 301 | // Every processor created by |audio_processing_create()| needs to destroyed by 302 | // this function. 303 | void audio_processing_delete(AudioProcessing* ap); 304 | 305 | // Returns true iff the code indicates a successful operation. 306 | bool is_success(int code); 307 | 308 | } // namespace webrtc_audio_processing 309 | 310 | #endif // WEBRTC_AUDIO_PROCESSING_WRAPPER_HPP_ 311 | --------------------------------------------------------------------------------