├── .clang-format
├── examples
    ├── devices.js
    ├── live.js
    ├── record.js
    └── analyze-files.js
├── lib
    ├── resources
    │   └── vad.onnx
    ├── 3rd_party
    │   ├── vcruntime
    │   │   ├── x64
    │   │   │   ├── vcruntime140.dll
    │   │   │   └── vcruntime140_1.dll
    │   │   └── x86
    │   │   │   └── vcruntime140.dll
    │   └── webrtcvad
    │   │   └── webrtc
    │   │       ├── rtc_base
    │   │           ├── system
    │   │           │   ├── inline.h
    │   │           │   └── arch.h
    │   │           ├── compile_assert_c.h
    │   │           ├── type_traits.h
    │   │           ├── sanitizer.h
    │   │           ├── checks.cc
    │   │           └── numerics
    │   │           │   └── safe_compare.h
    │   │       ├── common_audio
    │   │           ├── third_party
    │   │           │   └── spl_sqrt_floor
    │   │           │   │   ├── spl_sqrt_floor.h
    │   │           │   │   └── spl_sqrt_floor.c
    │   │           ├── signal_processing
    │   │           │   ├── spl_inl.c
    │   │           │   ├── energy.c
    │   │           │   ├── cross_correlation.c
    │   │           │   ├── get_scaling_square.c
    │   │           │   ├── dot_product_with_scale.h
    │   │           │   ├── resample_by_2_internal.h
    │   │           │   ├── downsample_fast.c
    │   │           │   ├── division_operations.c
    │   │           │   ├── include
    │   │           │   │   ├── real_fft.h
    │   │           │   │   └── spl_inl.h
    │   │           │   ├── complex_bit_reverse.c
    │   │           │   ├── spl_init.c
    │   │           │   ├── vector_scaling_operations.c
    │   │           │   ├── spl_sqrt.c
    │   │           │   ├── min_max_operations.c
    │   │           │   ├── resample_48khz.c
    │   │           │   ├── resample_fractional.c
    │   │           │   ├── complex_fft_tables.h
    │   │           │   └── complex_fft.c
    │   │           └── vad
    │   │           │   ├── vad_gmm.h
    │   │           │   ├── vad_filterbank.h
    │   │           │   ├── vad_sp.h
    │   │           │   ├── vad_gmm.c
    │   │           │   ├── include
    │   │           │       └── webrtc_vad.h
    │   │           │   ├── webrtc_vad.c
    │   │           │   ├── vad_core.h
    │   │           │   └── vad_sp.c
    │   │       ├── system_wrappers
    │   │           └── include
    │   │           │   └── cpu_features_wrapper.h
    │   │       └── typedefs.h
    ├── include
    │   ├── webrtcvad.h
    │   ├── aligned.h
    │   ├── devices.h
    │   ├── microphone.h
    │   └── chunk_processor.h
    ├── src
    │   ├── webrtcvad.cpp
    │   ├── devices.cpp
    │   ├── microphone.cpp
    │   └── chunk_processor.cpp
    ├── test
    │   └── main.cpp
    └── CMakeLists.txt
├── .npmignore
├── .gitignore
├── package.json
├── LICENSE
├── include
    └── speech_recorder.h
├── setup.sh
├── src
    ├── index.js
    └── speech_recorder.cpp
├── README.md
└── binding.gyp


/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | IncludeBlocks: Preserve
3 | 


--------------------------------------------------------------------------------
/examples/devices.js:
--------------------------------------------------------------------------------
1 | const { devices } = require("../src/index");
2 | 
3 | console.log(devices());
4 | 


--------------------------------------------------------------------------------
/lib/resources/vad.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/serenadeai/speech-recorder/HEAD/lib/resources/vad.onnx


--------------------------------------------------------------------------------
/lib/3rd_party/vcruntime/x64/vcruntime140.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/serenadeai/speech-recorder/HEAD/lib/3rd_party/vcruntime/x64/vcruntime140.dll


--------------------------------------------------------------------------------
/lib/3rd_party/vcruntime/x86/vcruntime140.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/serenadeai/speech-recorder/HEAD/lib/3rd_party/vcruntime/x86/vcruntime140.dll


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | build
 2 | prebuilds
 3 | lib/3rd_party
 4 | lib/build
 5 | lib/CMakeLists.txt
 6 | lib/include
 7 | lib/src
 8 | lib/test
 9 | lib/install
10 | 


--------------------------------------------------------------------------------
/lib/3rd_party/vcruntime/x64/vcruntime140_1.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/serenadeai/speech-recorder/HEAD/lib/3rd_party/vcruntime/x64/vcruntime140_1.dll


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | .DS_Store
 3 | *.log
 4 | *.wav
 5 | *.raw
 6 | *.map
 7 | bin
 8 | build
 9 | dist
10 | node_modules
11 | lib/3rd_party/portaudio
12 | lib/3rd_party/onnxruntime
13 | lib/install
14 | prebuilds
15 | 


--------------------------------------------------------------------------------
/lib/include/webrtcvad.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | extern "C" {
 4 | #include "webrtc/common_audio/vad/include/webrtc_vad.h"
 5 | }
 6 | 
 7 | namespace speechrecorder {
 8 | 
 9 | class WebrtcVad {
10 |  private:
11 |   VadInst* instance_ = nullptr;
12 |   int level_;
13 |   int sampleRate_;
14 | 
15 |  public:
16 |   WebrtcVad(int level, int sampleRate);
17 |   ~WebrtcVad();
18 |   bool Process(int16_t* buffer, size_t size);
19 |   void Reset();
20 | };
21 | 
22 | }  // namespace speechrecorder
23 | 


--------------------------------------------------------------------------------
/lib/include/aligned.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #ifndef ALIGNED
 4 | #if defined(_WIN32)
 5 | #define ALIGNED                                                   \
 6 |   void* operator new(size_t i) { return _aligned_malloc(i, 64); } \
 7 |   void operator delete(void* p) { _aligned_free(p); }
 8 | #elif defined(__linux__)
 9 | #define ALIGNED                                                 \
10 |   void* operator new(size_t i) { return aligned_alloc(64, i); } \
11 |   void operator delete(void* p) { free(p); }
12 | #else
13 | #define ALIGNED
14 | #endif
15 | #endif
16 | 


--------------------------------------------------------------------------------
/examples/live.js:
--------------------------------------------------------------------------------
 1 | const { SpeechRecorder } = require("../src/index.js");
 2 | 
 3 | const recorder = new SpeechRecorder({
 4 |   onChunkStart: () => {
 5 |     console.log(Date.now(), "Chunk start");
 6 |   },
 7 |   onAudio: ({ speaking, probability, volume }) => {
 8 |     console.log(Date.now(), speaking, probability, volume);
 9 |   },
10 |   onChunkEnd: () => {
11 |     console.log(Date.now(), "Chunk end");
12 |   },
13 | });
14 | 
15 | console.log("Recording...");
16 | recorder.start();
17 | setTimeout(() => {
18 |   console.log("Done!");
19 |   recorder.stop();
20 | }, 60000);
21 | 


--------------------------------------------------------------------------------
/lib/src/webrtcvad.cpp:
--------------------------------------------------------------------------------
 1 | #include "microphone.h"
 2 | #include "webrtcvad.h"
 3 | 
 4 | namespace speechrecorder {
 5 | 
 6 | WebrtcVad::WebrtcVad(int level, int sampleRate)
 7 |     : level_(level), sampleRate_(sampleRate) {
 8 |   Reset();
 9 | }
10 | 
11 | WebrtcVad::~WebrtcVad() {
12 |   if (instance_ != nullptr) {
13 |     WebRtcVad_Free(instance_);
14 |   }
15 | }
16 | 
17 | bool WebrtcVad::Process(int16_t* buffer, size_t size) {
18 |   return WebRtcVad_Process(instance_, sampleRate_, buffer, size) == 1;
19 | }
20 | 
21 | void WebrtcVad::Reset() {
22 |   if (instance_ != nullptr) {
23 |     WebRtcVad_Free(instance_);
24 |   }
25 | 
26 |   instance_ = WebRtcVad_Create();
27 |   WebRtcVad_Init(instance_);
28 |   WebRtcVad_set_mode(instance_, level_);
29 | }
30 | 
31 | }  // namespace speechrecorder
32 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "speech-recorder",
 3 |   "version": "2.1.0",
 4 |   "description": "A node.js library for streaming audio and speech from the microphone.",
 5 |   "main": "src/index.js",
 6 |   "repository": "https://github.com/serenadeai/speech-recorder",
 7 |   "author": "Serenade",
 8 |   "license": "MIT",
 9 |   "gypfile": true,
10 |   "binary": {
11 |     "napi_versions": [6]
12 |   },
13 |   "scripts": {
14 |     "build": "bash build.sh",
15 |     "clean": "rm -rf build prebuilds lib/build lib/install",
16 |     "install": "prebuild-install -r napi || node-gyp rebuild"
17 |   },
18 |   "dependencies": {
19 |     "bindings": "^1.5.0",
20 |     "node-addon-api": "^4.2.0",
21 |     "prebuild-install": "^7.0.0"
22 |   },
23 |   "devDependencies": {
24 |     "prebuild": "^11.0.0",
25 |     "wavefile": "^11.0.0"
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/record.js:
--------------------------------------------------------------------------------
 1 | const fs = require("fs");
 2 | const { SpeechRecorder } = require("../src/index");
 3 | const { WaveFile } = require("wavefile");
 4 | 
 5 | if (process.argv.length < 3) {
 6 |   console.log("Usage: node record.js /path/to/output.wav");
 7 |   process.exit(1);
 8 | }
 9 | 
10 | let buffer = [];
11 | const sampleRate = 16000;
12 | const recorder = new SpeechRecorder({
13 |   onAudio: ({ audio }) => {
14 |     for (let i = 0; i < audio.length; i++) {
15 |       buffer.push(audio[i]);
16 |     }
17 | 
18 |     if (buffer.length >= sampleRate * 5) {
19 |       let wav = new WaveFile();
20 |       wav.fromScratch(1, sampleRate, "16", buffer);
21 |       fs.writeFileSync(process.argv[2], wav.toBuffer());
22 |       process.exit(1);
23 |     }
24 |   },
25 | });
26 | 
27 | console.log("Ready...");
28 | setTimeout(() => {
29 |   console.log("Go!");
30 |   recorder.start();
31 | }, 1000);
32 | 


--------------------------------------------------------------------------------
/lib/include/devices.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | 
 5 | namespace speechrecorder {
 6 | 
 7 | struct Device {
 8 |   int id;
 9 |   std::string name;
10 |   std::string apiName;
11 |   int maxInputChannels;
12 |   int maxOutputChannels;
13 |   double defaultSampleRate;
14 |   bool isDefaultInput;
15 |   bool isDefaultOutput;
16 | 
17 |   Device(int id, std::string name, std::string apiName, int maxInputChannels,
18 |          int maxOutputChannels, double defaultSampleRate, bool isDefaultInput,
19 |          bool isDefaultOutput)
20 |       : id(id),
21 |         name(name),
22 |         apiName(apiName),
23 |         maxInputChannels(maxInputChannels),
24 |         maxOutputChannels(maxOutputChannels),
25 |         defaultSampleRate(defaultSampleRate),
26 |         isDefaultInput(isDefaultInput),
27 |         isDefaultOutput(isDefaultOutput) {}
28 | };
29 | 
30 | std::vector<Device> GetDevices();
31 | 
32 | }  // namespace speechrecorder
33 | 


--------------------------------------------------------------------------------
/lib/include/microphone.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <portaudio.h>
 4 | #include <readerwriterqueue.h>
 5 | 
 6 | #include <functional>
 7 | #include <string>
 8 | #include <vector>
 9 | 
10 | #include "webrtcvad.h"
11 | 
12 | using namespace moodycamel;
13 | 
14 | namespace speechrecorder {
15 | 
16 | struct MicrophoneCallbackData {
17 |   std::vector<short>* buffer;
18 |   int bufferIndex = 0;
19 |   BlockingReaderWriterQueue<short*>* queue;
20 | };
21 | 
22 | class Microphone {
23 |  private:
24 |   std::vector<short> buffer_;
25 |   MicrophoneCallbackData callbackData_;
26 |   int device_;
27 |   int samplesPerFrame_;
28 |   int sampleRate_;
29 |   PaStream* stream_;
30 | 
31 |   void HandleError(PaError error, const std::string& message);
32 | 
33 |  public:
34 |   Microphone(int device, int samplesPerFrame, int sampleRate,
35 |              BlockingReaderWriterQueue<short*>* queue);
36 |   void Start();
37 |   void Stop();
38 | };
39 | 
40 | }  // namespace speechrecorder
41 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/system/inline.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef RTC_BASE_SYSTEM_INLINE_H_
12 | #define RTC_BASE_SYSTEM_INLINE_H_
13 | 
14 | #if defined(_MSC_VER)
15 | 
16 | #define RTC_FORCE_INLINE __forceinline
17 | #define RTC_NO_INLINE __declspec(noinline)
18 | 
19 | #elif defined(__GNUC__)
20 | 
21 | #define RTC_FORCE_INLINE __attribute__((__always_inline__))
22 | #define RTC_NO_INLINE __attribute__((__noinline__))
23 | 
24 | #else
25 | 
26 | #define RTC_FORCE_INLINE
27 | #define RTC_NO_INLINE
28 | 
29 | #endif
30 | 
31 | #endif  // RTC_BASE_SYSTEM_INLINE_H_
32 | 


--------------------------------------------------------------------------------
/lib/test/main.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <chrono>
 3 | #include <iostream>
 4 | #include <thread>
 5 | #include <vector>
 6 | 
 7 | #include "chunk_processor.h"
 8 | 
 9 | int main(int argc, char** argv) {
10 |   speechrecorder::ChunkProcessorOptions options;
11 |   options.onChunkStart = [](std::vector<short> audio) {
12 |     std::cout << "Chunk start" << std::endl;
13 |   };
14 |   options.onAudio = [](std::vector<short> audio, bool speaking, double volume,
15 |                        bool speech, double probability,
16 |                        int consecutiveSilence) {
17 |     std::cout << "Speaking: " << speaking << " Volume: " << volume
18 |               << " Probability: " << probability << std::endl;
19 |   };
20 |   options.onChunkEnd = []() {
21 |     std::cout << "Chunk end" << std::endl;
22 |   };
23 | 
24 |   speechrecorder::ChunkProcessor processor(argv[1], options);
25 |   processor.Start();
26 |   std::this_thread::sleep_for(std::chrono::milliseconds(3000));
27 |   processor.Stop();
28 |   std::cout << "Done" << std::endl;
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2021 Serenade Labs, Inc.
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/lib/src/devices.cpp:
--------------------------------------------------------------------------------
 1 | #include <portaudio.h>
 2 | 
 3 | #include <string>
 4 | #include <vector>
 5 | 
 6 | #include "devices.h"
 7 | 
 8 | namespace speechrecorder {
 9 | 
10 | std::vector<Device> GetDevices() {
11 |   Pa_Initialize();
12 |   std::vector<Device> result;
13 | 
14 |   int count = Pa_GetDeviceCount();
15 |   for (int i = 0; i < count; i++) {
16 |     const PaDeviceInfo* info = Pa_GetDeviceInfo(i);
17 |     bool include = info->maxInputChannels > 0;
18 | 
19 | #ifdef WIN32
20 |     if (strcmp(Pa_GetHostApiInfo(info->hostApi)->name, "MME") != 0) {
21 |       include = false;
22 |     }
23 | #endif
24 | 
25 |     if (include) {
26 |       result.emplace_back(i, info->name, Pa_GetHostApiInfo(info->hostApi)->name,
27 |                           info->maxInputChannels, info->maxOutputChannels,
28 |                           info->defaultSampleRate,
29 |                           i == Pa_GetDefaultInputDevice(),
30 |                           i == Pa_GetDefaultOutputDevice());
31 |     }
32 |   }
33 | 
34 |   return result;
35 | }
36 | 
37 | }  // namespace speechrecorder
38 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/compile_assert_c.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef RTC_BASE_COMPILE_ASSERT_C_H_
12 | #define RTC_BASE_COMPILE_ASSERT_C_H_
13 | 
14 | // Use this macro to verify at compile time that certain restrictions are met.
15 | // The argument is the boolean expression to evaluate.
16 | // Example:
17 | //   RTC_COMPILE_ASSERT(sizeof(foo) < 128);
18 | // Note: In C++, use static_assert instead!
19 | #define RTC_COMPILE_ASSERT(expression) \
20 |   switch (0) {                         \
21 |     case 0:                            \
22 |     case expression:;                  \
23 |   }
24 | 
25 | #endif  // RTC_BASE_COMPILE_ASSERT_C_H_
26 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include <stdint.h>
12 | 
13 | //
14 | // WebRtcSpl_SqrtFloor(...)
15 | //
16 | // Returns the square root of the input value |value|. The precision of this
17 | // function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
18 | // If |value| is a negative number then 0 is returned.
19 | //
20 | // Algorithm:
21 | //
22 | // An iterative 4 cylce/bit routine
23 | //
24 | // Input:
25 | //      - value     : Value to calculate sqrt of
26 | //
27 | // Return value     : Result of the sqrt calculation
28 | //
29 | int32_t WebRtcSpl_SqrtFloor(int32_t value);
30 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/spl_inl.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include <stdint.h>
12 | 
13 | #include "webrtc/common_audio/signal_processing/include/spl_inl.h"
14 | 
15 | // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
16 | // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
17 | // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
18 | // n.
19 | const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
20 |     32, 8,  17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
21 |     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  26, 25, 24,
22 |     4,  11, 23, 31, 3,  7,  10, 16, 22, 30, -1, -1, 2,  6,  13, 9,
23 |     -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1,  27, 5,  12,
24 | };
25 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/energy.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | 
12 | /*
13 |  * This file contains the function WebRtcSpl_Energy().
14 |  * The description header can be found in signal_processing_library.h
15 |  *
16 |  */
17 | 
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 | 
20 | int32_t WebRtcSpl_Energy(int16_t* vector,
21 |                          size_t vector_length,
22 |                          int* scale_factor)
23 | {
24 |     int32_t en = 0;
25 |     size_t i;
26 |     int scaling =
27 |         WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
28 |     size_t looptimes = vector_length;
29 |     int16_t *vectorptr = vector;
30 | 
31 |     for (i = 0; i < looptimes; i++)
32 |     {
33 |       en += (*vectorptr * *vectorptr) >> scaling;
34 |       vectorptr++;
35 |     }
36 |     *scale_factor = scaling;
37 | 
38 |     return en;
39 | }
40 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/cross_correlation.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 | 
13 | /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
14 | void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
15 |                                  const int16_t* seq1,
16 |                                  const int16_t* seq2,
17 |                                  size_t dim_seq,
18 |                                  size_t dim_cross_correlation,
19 |                                  int right_shifts,
20 |                                  int step_seq2) {
21 |   size_t i = 0, j = 0;
22 | 
23 |   for (i = 0; i < dim_cross_correlation; i++) {
24 |     int32_t corr = 0;
25 |     for (j = 0; j < dim_seq; j++)
26 |       corr += (seq1[j] * seq2[j]) >> right_shifts;
27 |     seq2 += step_seq2;
28 |     *cross_correlation++ = corr;
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/include/speech_recorder.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <napi.h>
 4 | 
 5 | #include <atomic>
 6 | #include <functional>
 7 | #include <thread>
 8 | 
 9 | #include "aligned.h"
10 | #include "chunk_processor.h"
11 | 
12 | struct SpeechRecorderCallbackData {
13 |   std::string event = "";
14 |   std::vector<short> audio;
15 |   bool speaking = false;
16 |   double volume = 0.0;
17 |   bool speech = false;
18 |   double probability = 0.0;
19 |   int consecutiveSilence = 0;
20 | };
21 | 
22 | class SpeechRecorder : public Napi::ObjectWrap<SpeechRecorder> {
23 |  private:
24 |   std::thread thread_;
25 |   Napi::ThreadSafeFunction threadSafeFunction_;
26 |   std::atomic<bool> stopped_;
27 |   BlockingReaderWriterQueue<SpeechRecorderCallbackData*> queue_;
28 |   Napi::FunctionReference callback_;
29 |   std::function<void(Napi::Env, Napi::Function, SpeechRecorderCallbackData*)>
30 |       threadSafeFunctionCallback_;
31 |   std::string modelPath_;
32 |   speechrecorder::ChunkProcessorOptions options_;
33 |   speechrecorder::ChunkProcessor processor_;
34 |   std::unique_ptr<speechrecorder::ChunkProcessor> processFileProcessor_;
35 | 
36 |   void ProcessFile(const Napi::CallbackInfo& info);
37 |   void Start(const Napi::CallbackInfo& info);
38 |   void Stop(const Napi::CallbackInfo& info);
39 | 
40 |  public:
41 |   SpeechRecorder(const Napi::CallbackInfo& info);
42 |   static Napi::Object Init(Napi::Env env, Napi::Object exports);
43 | 
44 |   ALIGNED
45 | };
46 | 
47 | Napi::Value GetDevices(const Napi::CallbackInfo& info);
48 | Napi::Object Init(Napi::Env env, Napi::Object exports);
49 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/get_scaling_square.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | 
12 | /*
13 |  * This file contains the function WebRtcSpl_GetScalingSquare().
14 |  * The description header can be found in signal_processing_library.h
15 |  *
16 |  */
17 | 
18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
19 | 
20 | int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
21 |                                    size_t in_vector_length,
22 |                                    size_t times)
23 | {
24 |     int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
25 |     size_t i;
26 |     int16_t smax = -1;
27 |     int16_t sabs;
28 |     int16_t *sptr = in_vector;
29 |     int16_t t;
30 |     size_t looptimes = in_vector_length;
31 | 
32 |     for (i = looptimes; i > 0; i--)
33 |     {
34 |         sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
35 |         smax = (sabs > smax ? sabs : smax);
36 |     }
37 |     t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
38 | 
39 |     if (smax == 0)
40 |     {
41 |         return 0; // Since norm(0) returns 0
42 |     } else
43 |     {
44 |         return (t > nbits) ? 0 : nbits - t;
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_gmm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // Gaussian probability calculations internally used in vad_core.c.
12 | 
13 | #ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
14 | #define COMMON_AUDIO_VAD_VAD_GMM_H_
15 | 
16 | #include <stdint.h>
17 | 
18 | // Calculates the probability for |input|, given that |input| comes from a
19 | // normal distribution with mean and standard deviation (|mean|, |std|).
20 | //
21 | // Inputs:
22 | //      - input         : input sample in Q4.
23 | //      - mean          : mean input in the statistical model, Q7.
24 | //      - std           : standard deviation, Q7.
25 | //
26 | // Output:
27 | //
28 | //      - delta         : input used when updating the model, Q11.
29 | //                        |delta| = (|input| - |mean|) / |std|^2.
30 | //
31 | // Return:
32 | //   (probability for |input|) =
33 | //    1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
34 | int32_t WebRtcVad_GaussianProbability(int16_t input,
35 |                                       int16_t mean,
36 |                                       int16_t std,
37 |                                       int16_t* delta);
38 | 
39 | #endif  // COMMON_AUDIO_VAD_VAD_GMM_H_
40 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/dot_product_with_scale.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
13 | 
14 | #include <stdint.h>
15 | #include <string.h>
16 | 
17 | #ifdef __cplusplus
18 | extern "C" {
19 | #endif
20 | 
21 | // Calculates the dot product between two (int16_t) vectors.
22 | //
23 | // Input:
24 | //      - vector1       : Vector 1
25 | //      - vector2       : Vector 2
26 | //      - vector_length : Number of samples used in the dot product
27 | //      - scaling       : The number of right bit shifts to apply on each term
28 | //                        during calculation to avoid overflow, i.e., the
29 | //                        output will be in Q(-|scaling|)
30 | //
31 | // Return value         : The dot product in Q(-scaling)
32 | int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
33 |                                       const int16_t* vector2,
34 |                                       size_t length,
35 |                                       int scaling);
36 | 
37 | #ifdef __cplusplus
38 | }
39 | #endif  // __cplusplus
40 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
41 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/system_wrappers/include/cpu_features_wrapper.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
12 | #define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
13 | 
14 | #include <stdint.h>
15 | 
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 | 
20 | // List of features in x86.
21 | typedef enum { kSSE2, kSSE3 } CPUFeature;
22 | 
23 | // List of features in ARM.
24 | enum {
25 |   kCPUFeatureARMv7 = (1 << 0),
26 |   kCPUFeatureVFPv3 = (1 << 1),
27 |   kCPUFeatureNEON = (1 << 2),
28 |   kCPUFeatureLDREXSTREX = (1 << 3)
29 | };
30 | 
31 | typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
32 | 
33 | // Returns true if the CPU supports the feature.
34 | extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
35 | 
36 | // No CPU feature is available => straight C path.
37 | extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
38 | 
39 | // Return the features in an ARM device.
40 | // It detects the features in the hardware platform, and returns supported
41 | // values in the above enum definition as a bitmask.
42 | extern uint64_t WebRtc_GetCPUFeaturesARM(void);
43 | 
44 | #if defined(__cplusplus) || defined(c_plusplus)
45 | }  // extern "C"
46 | #endif
47 | 
48 | #endif  // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
49 | 


--------------------------------------------------------------------------------
/lib/include/chunk_processor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <readerwriterqueue.h>
 4 | 
 5 | #include <atomic>
 6 | #include <mutex>
 7 | #include <string>
 8 | #include <thread>
 9 | #include <vector>
10 | 
11 | #include "aligned.h"
12 | #include "microphone.h"
13 | #include "onnxruntime_cxx_api.h"
14 | #include "webrtcvad.h"
15 | 
16 | namespace speechrecorder {
17 | 
18 | struct ChunkProcessorOptions {
19 |   int consecutiveFramesForSilence = 5;
20 |   int consecutiveFramesForSpeaking = 1;
21 |   int device = -1;
22 |   int leadingBufferFrames = 10;
23 |   std::function<void(std::vector<short>)> onChunkStart = nullptr;
24 |   std::function<void(std::vector<short>, bool, double, bool, double, int)>
25 |       onAudio = nullptr;
26 |   std::function<void()> onChunkEnd = nullptr;
27 |   int samplesPerFrame = 480;
28 |   int sampleRate = 16000;
29 |   int sileroVadBufferSize = 2000;
30 |   int sileroVadRateLimit = 3;
31 |   double sileroVadSilenceThreshold = 0.1;
32 |   double sileroVadSpeakingThreshold = 0.3;
33 |   int webrtcVadLevel = 3;
34 |   int webrtcVadBufferSize = 480;
35 |   int webrtcVadResultsSize = 10;
36 | };
37 | 
38 | class ChunkProcessor {
39 |  private:
40 |   std::vector<short> leadingBuffer_;
41 |   int consecutiveSilence_ = 0;
42 |   int consecutiveSpeaking_ = 0;
43 |   int framesUntilSileroVad_ = 0;
44 |   Microphone microphone_;
45 |   BlockingReaderWriterQueue<short*> queue_;
46 |   std::vector<float> sileroBuffer_;
47 |   double sileroVadProbability_ = 0.0;
48 |   bool speaking_ = false;
49 |   std::atomic<bool> stopped_;
50 |   std::mutex toggleLock_;
51 |   std::thread startThread_;
52 |   std::thread stopThread_;
53 |   std::thread queueThread_;
54 |   WebrtcVad webrtcVad_;
55 |   std::vector<short> webrtcVadBuffer_;
56 |   std::vector<bool> webrtcVadResults_;
57 | 
58 |  public:
59 |   ChunkProcessorOptions options_;
60 |   ChunkProcessor(std::string modelPath, ChunkProcessorOptions options);
61 |   ~ChunkProcessor();
62 |   void Process(short* audio);
63 |   void Reset();
64 |   void Start();
65 |   void Stop();
66 | 
67 |   ALIGNED
68 | };
69 | 
70 | }  // namespace speechrecorder
71 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_filterbank.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This file includes feature calculating functionality used in vad_core.c.
13 |  */
14 | 
15 | #ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
16 | #define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
17 | 
18 | #include "webrtc/common_audio/vad/vad_core.h"
19 | 
20 | // Takes |data_length| samples of |data_in| and calculates the logarithm of the
21 | // energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
22 | //        80 Hz - 250 Hz
23 | //        250 Hz - 500 Hz
24 | //        500 Hz - 1000 Hz
25 | //        1000 Hz - 2000 Hz
26 | //        2000 Hz - 3000 Hz
27 | //        3000 Hz - 4000 Hz
28 | //
29 | // The values are given in Q4 and written to |features|. Further, an approximate
30 | // overall energy is returned. The return value is used in
31 | // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
32 | // the threshold |kMinEnergy|.
33 | //
34 | // - self         [i/o] : State information of the VAD.
35 | // - data_in      [i]   : Input audio data, for feature extraction.
36 | // - data_length  [i]   : Audio data size, in number of samples.
37 | // - features     [o]   : 10 * log10(energy in each frequency band), Q4.
38 | // - returns            : Total energy of the signal (NOTE! This value is not
39 | //                        exact. It is only used in a comparison.)
40 | int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
41 |                                     const int16_t* data_in,
42 |                                     size_t data_length,
43 |                                     int16_t* features);
44 | 
45 | #endif  // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
46 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/system/arch.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // This file contains platform-specific typedefs and defines.
12 | // Much of it is derived from Chromium's build/build_config.h.
13 | 
14 | #ifndef RTC_BASE_SYSTEM_ARCH_H_
15 | #define RTC_BASE_SYSTEM_ARCH_H_
16 | 
17 | // Processor architecture detection.  For more info on what's defined, see:
18 | //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
19 | //   http://www.agner.org/optimize/calling_conventions.pdf
20 | //   or with gcc, run: "echo | gcc -E -dM -"
21 | #if defined(_M_X64) || defined(__x86_64__)
22 | #define WEBRTC_ARCH_X86_FAMILY
23 | #define WEBRTC_ARCH_X86_64
24 | #define WEBRTC_ARCH_64_BITS
25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
26 | #elif defined(__aarch64__)
27 | #define WEBRTC_ARCH_ARM_FAMILY
28 | #define WEBRTC_ARCH_64_BITS
29 | #define WEBRTC_ARCH_LITTLE_ENDIAN
30 | #elif defined(_M_IX86) || defined(__i386__)
31 | #define WEBRTC_ARCH_X86_FAMILY
32 | #define WEBRTC_ARCH_X86
33 | #define WEBRTC_ARCH_32_BITS
34 | #define WEBRTC_ARCH_LITTLE_ENDIAN
35 | #elif defined(__ARMEL__)
36 | #define WEBRTC_ARCH_ARM_FAMILY
37 | #define WEBRTC_ARCH_32_BITS
38 | #define WEBRTC_ARCH_LITTLE_ENDIAN
39 | #elif defined(__MIPSEL__)
40 | #define WEBRTC_ARCH_MIPS_FAMILY
41 | #if defined(__LP64__)
42 | #define WEBRTC_ARCH_64_BITS
43 | #else
44 | #define WEBRTC_ARCH_32_BITS
45 | #endif
46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
47 | #elif defined(__pnacl__)
48 | #define WEBRTC_ARCH_32_BITS
49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
50 | #else
51 | #error Please add support for your architecture in typedefs.h
52 | #endif
53 | 
54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
56 | #endif
57 | 
58 | #endif  // RTC_BASE_SYSTEM_ARCH_H_
59 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_sp.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | // This file includes specific signal processing tools used in vad_core.c.
12 | 
13 | #ifndef COMMON_AUDIO_VAD_VAD_SP_H_
14 | #define COMMON_AUDIO_VAD_VAD_SP_H_
15 | 
16 | #include "webrtc/common_audio/vad/vad_core.h"
17 | 
18 | // Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
19 | //
20 | // Inputs:
21 | //      - signal_in     : Input signal.
22 | //      - in_length     : Length of input signal in samples.
23 | //
24 | // Input & Output:
25 | //      - filter_state  : Current filter states of the two all-pass filters. The
26 | //                        |filter_state| is updated after all samples have been
27 | //                        processed.
28 | //
29 | // Output:
30 | //      - signal_out    : Downsampled signal (of length |in_length| / 2).
31 | void WebRtcVad_Downsampling(const int16_t* signal_in,
32 |                             int16_t* signal_out,
33 |                             int32_t* filter_state,
34 |                             size_t in_length);
35 | 
36 | // Updates and returns the smoothed feature minimum. As minimum we use the
37 | // median of the five smallest feature values in a 100 frames long window.
38 | // As long as |handle->frame_counter| is zero, that is, we haven't received any
39 | // "valid" data, FindMinimum() outputs the default value of 1600.
40 | //
41 | // Inputs:
42 | //      - feature_value : New feature value to update with.
43 | //      - channel       : Channel number.
44 | //
45 | // Input & Output:
46 | //      - handle        : State information of the VAD.
47 | //
48 | // Returns:
49 | //                      : Smoothed minimum value for a moving window.
50 | int16_t WebRtcVad_FindMinimum(VadInstT* handle,
51 |                               int16_t feature_value,
52 |                               int channel);
53 | 
54 | #endif  // COMMON_AUDIO_VAD_VAD_SP_H_
55 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 5 | pushd "$HERE" &> /dev/null
 6 | 
 7 | if [[ -z "$1" ]] ; then
 8 |   echo "Usage: setup.sh x86|x64|arm64"
 9 |   exit 1
10 | fi
11 | 
12 | rm -rf tmp lib/3rd_party/portaudio lib/3rd_party/onnxruntime
13 | 
14 | mkdir -p tmp/portaudio
15 | cd tmp/portaudio
16 | curl -Lo portaudio.tgz http://files.portaudio.com/archives/pa_stable_v190700_20210406.tgz
17 | tar xvf portaudio.tgz
18 | 
19 | cd portaudio
20 | mkdir dist install
21 | cd dist
22 | 
23 | portaudio_cmake="cmake"
24 | if [[ `uname -s` == "MINGW"* ]] ; then
25 |   if [[ "$1" == "x86" ]] ; then
26 |     portaudio_cmake+=" -A Win32"
27 |   elif [[ "$1" == "x64" ]] ; then
28 |     portaudio_cmake+=" -A x64"
29 |   fi
30 | elif [[ `uname -s` == "Darwin" ]] ; then
31 |   portaudio_cmake+=" -DCMAKE_OSX_DEPLOYMENT_TARGET=10.14"
32 |   if [[ "$1" == "x64" ]] ; then
33 |     portaudio_cmake+=" -DCMAKE_OSX_ARCHITECTURES=x86_64"
34 |   elif [[ "$1" == "arm64" ]] ; then
35 |     portaudio_cmake+=" -DCMAKE_OSX_ARCHITECTURES=arm64"
36 |   fi
37 | fi
38 | 
39 | portaudio_cmake+=" .."
40 | eval $portaudio_cmake
41 | cmake --build . --config Release
42 | cmake --install . --prefix ../install
43 | cp -r ../install ../../../../lib/3rd_party/portaudio
44 | 
45 | cd ../../..
46 | mkdir onnxruntime
47 | cd onnxruntime
48 | 
49 | if [[ `uname -s` == "MINGW"* ]] ; then
50 |   mkdir -p ../../lib/3rd_party/onnxruntime/lib
51 |   curl -Lo onnxruntime.zip https://www.nuget.org/api/v2/package/Microsoft.ML.OnnxRuntime/1.10.0
52 |   unzip onnxruntime.zip
53 |   cp -r build/native/include ../../lib/3rd_party/onnxruntime
54 | 
55 |   path="win-x86"
56 |   if [[ "$1" == "x64" ]] ; then
57 |     path="win-x64"
58 |   fi
59 | 
60 |   cp runtimes/$path/native/*.dll ../../lib/3rd_party/onnxruntime/lib
61 |   cp runtimes/$path/native/*.lib ../../lib/3rd_party/onnxruntime/lib
62 | else
63 |   path="onnxruntime-linux-x64-1.10.0"
64 |   if [[ `uname -s` == "Darwin" ]] ; then
65 |     if [[ "$1" == "x64" ]] ; then
66 |       path="onnxruntime-osx-x86_64-1.10.0"
67 |     elif [[ "$1" == "arm64" ]] ; then
68 |       path="onnxruntime-osx-arm64-1.10.0"
69 |     fi
70 |   fi
71 | 
72 |   curl -Lo onnxruntime.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.10.0/$path.tgz
73 |   tar xvf onnxruntime.tgz
74 |   cp -r $path ../../lib/3rd_party/onnxruntime
75 | fi
76 | 
77 | cd ../..
78 | rm -rf tmp
79 | popd &> /dev/null
80 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/resample_by_2_internal.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This header file contains some internal resampling functions.
13 |  *
14 |  */
15 | 
16 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
17 | #define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
18 | 
19 | #include <stdint.h>
20 | 
21 | /*******************************************************************
22 |  * resample_by_2_fast.c
23 |  * Functions for internal use in the other resample functions
24 |  ******************************************************************/
25 | void WebRtcSpl_DownBy2IntToShort(int32_t* in,
26 |                                  int32_t len,
27 |                                  int16_t* out,
28 |                                  int32_t* state);
29 | 
30 | void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
31 |                                  int32_t len,
32 |                                  int32_t* out,
33 |                                  int32_t* state);
34 | 
35 | void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
36 |                                int32_t len,
37 |                                int32_t* out,
38 |                                int32_t* state);
39 | 
40 | void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
41 |                              int32_t len,
42 |                              int32_t* out,
43 |                              int32_t* state);
44 | 
45 | void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
46 |                                int32_t len,
47 |                                int16_t* out,
48 |                                int32_t* state);
49 | 
50 | void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
51 |                                int32_t len,
52 |                                int32_t* out,
53 |                                int32_t* state);
54 | 
55 | void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
56 |                              int32_t len,
57 |                              int32_t* out,
58 |                              int32_t* state);
59 | 
60 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
61 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
 3 |  * license.
 4 |  *
 5 |  * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
 6 |  * Date: Fri, Jun 24, 2011 at 3:20 AM
 7 |  * Subject: Re: sqrt routine
 8 |  * To: Kevin Ma <kma@google.com>
 9 |  * Hi Kevin,
10 |  * Thanks for asking. Those routines are public domain (originally posted to
11 |  * comp.sys.arm a long time ago), so you can use them freely for any purpose.
12 |  * Cheers,
13 |  * Wilco
14 |  *
15 |  * ----- Original Message -----
16 |  * From: "Kevin Ma" <kma@google.com>
17 |  * To: <Wilco.Dijkstra@ntlworld.com>
18 |  * Sent: Thursday, June 23, 2011 11:44 PM
19 |  * Subject: Fwd: sqrt routine
20 |  * Hi Wilco,
21 |  * I saw your sqrt routine from several web sites, including
22 |  * http://www.finesse.demon.co.uk/steven/sqrt.html.
23 |  * Just wonder if there's any copyright information with your Successive
24 |  * approximation routines, or if I can freely use it for any purpose.
25 |  * Thanks.
26 |  * Kevin
27 |  */
28 | 
29 | // Minor modifications in code style for WebRTC, 2012.
30 | 
31 | #include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
32 | 
33 | /*
34 |  * Algorithm:
35 |  * Successive approximation of the equation (root + delta) ^ 2 = N
36 |  * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
37 |  * Use delta = 2^i for i = 15 .. 0.
38 |  *
39 |  * Output precision is 16 bits. Note for large input values (close to
40 |  * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
41 |  * contains the MSB information (a non-sign value). Do with caution
42 |  * if you need to cast the output to int16_t type.
43 |  *
44 |  * If the input value is negative, it returns 0.
45 |  */
46 | 
47 | #define WEBRTC_SPL_SQRT_ITER(N)                 \
48 |   try1 = root + (1 << (N));                     \
49 |   if (value >= try1 << (N))                     \
50 |   {                                             \
51 |     value -= try1 << (N);                       \
52 |     root |= 2 << (N);                           \
53 |   }
54 | 
55 | int32_t WebRtcSpl_SqrtFloor(int32_t value)
56 | {
57 |   int32_t root = 0, try1;
58 | 
59 |   WEBRTC_SPL_SQRT_ITER (15);
60 |   WEBRTC_SPL_SQRT_ITER (14);
61 |   WEBRTC_SPL_SQRT_ITER (13);
62 |   WEBRTC_SPL_SQRT_ITER (12);
63 |   WEBRTC_SPL_SQRT_ITER (11);
64 |   WEBRTC_SPL_SQRT_ITER (10);
65 |   WEBRTC_SPL_SQRT_ITER ( 9);
66 |   WEBRTC_SPL_SQRT_ITER ( 8);
67 |   WEBRTC_SPL_SQRT_ITER ( 7);
68 |   WEBRTC_SPL_SQRT_ITER ( 6);
69 |   WEBRTC_SPL_SQRT_ITER ( 5);
70 |   WEBRTC_SPL_SQRT_ITER ( 4);
71 |   WEBRTC_SPL_SQRT_ITER ( 3);
72 |   WEBRTC_SPL_SQRT_ITER ( 2);
73 |   WEBRTC_SPL_SQRT_ITER ( 1);
74 |   WEBRTC_SPL_SQRT_ITER ( 0);
75 | 
76 |   return root >> 1;
77 | }
78 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/downsample_fast.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
12 | 
13 | #include "webrtc/rtc_base/checks.h"
14 | #include "webrtc/rtc_base/sanitizer.h"
15 | 
16 | // TODO(Bjornv): Change the function parameter order to WebRTC code style.
17 | // C version of WebRtcSpl_DownsampleFast() for generic platforms.
18 | int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
19 |                               size_t data_in_length,
20 |                               int16_t* data_out,
21 |                               size_t data_out_length,
22 |                               const int16_t* __restrict coefficients,
23 |                               size_t coefficients_length,
24 |                               int factor,
25 |                               size_t delay) {
26 |   int16_t* const original_data_out = data_out;
27 |   size_t i = 0;
28 |   size_t j = 0;
29 |   int32_t out_s32 = 0;
30 |   size_t endpos = delay + factor * (data_out_length - 1) + 1;
31 | 
32 |   // Return error if any of the running conditions doesn't meet.
33 |   if (data_out_length == 0 || coefficients_length == 0
34 |                            || data_in_length < endpos) {
35 |     return -1;
36 |   }
37 | 
38 |   rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
39 |                            coefficients_length);
40 | 
41 |   for (i = delay; i < endpos; i += factor) {
42 |     out_s32 = 2048;  // Round value, 0.5 in Q12.
43 | 
44 |     for (j = 0; j < coefficients_length; j++) {
45 |       // Negative overflow is permitted here, because this is
46 |       // auto-regressive filters, and the state for each batch run is
47 |       // stored in the "negative" positions of the output vector.
48 |       rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
49 |           sizeof(data_in[0]), 1);
50 |       // out_s32 is in Q12 domain.
51 |       out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
52 |     }
53 | 
54 |     out_s32 >>= 12;  // Q0.
55 | 
56 |     // Saturate and store the output.
57 |     *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
58 |   }
59 | 
60 |   RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
61 |   rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
62 |                            data_out_length);
63 | 
64 |   return 0;
65 | }
66 | 


--------------------------------------------------------------------------------
/lib/src/microphone.cpp:
--------------------------------------------------------------------------------
 1 | #include <portaudio.h>
 2 | 
 3 | #include <cstring>
 4 | #include <functional>
 5 | #include <iostream>
 6 | #include <vector>
 7 | 
 8 | #include "microphone.h"
 9 | #include "webrtcvad.h"
10 | 
11 | using namespace moodycamel;
12 | 
13 | namespace speechrecorder {
14 | 
15 | int callback(const void* input, void* output, unsigned long samplesPerFrame,
16 |              const PaStreamCallbackTimeInfo* timeInfo,
17 |              PaStreamCallbackFlags statusFlags, void* callbackData) {
18 |   if (input == nullptr || callbackData == nullptr) {
19 |     return paContinue;
20 |   }
21 | 
22 |   MicrophoneCallbackData* data = (MicrophoneCallbackData*)callbackData;
23 |   short* audio = (short*)input;
24 |   for (int i = 0; i < samplesPerFrame; i++) {
25 |     data->buffer->at((data->bufferIndex + i) % data->buffer->size()) = audio[i];
26 |   }
27 | 
28 |   data->queue->enqueue(data->buffer->data() + data->bufferIndex);
29 |   data->bufferIndex =
30 |       (data->bufferIndex + samplesPerFrame) % data->buffer->size();
31 |   return paContinue;
32 | }
33 | 
34 | Microphone::Microphone(int device, int samplesPerFrame, int sampleRate,
35 |                        BlockingReaderWriterQueue<short*>* queue)
36 |     : device_(device),
37 |       samplesPerFrame_(samplesPerFrame),
38 |       sampleRate_(sampleRate) {
39 |   for (int i = 0; i < samplesPerFrame * 10; i++) {
40 |     buffer_.push_back(0);
41 |   }
42 | 
43 |   callbackData_ = {&buffer_, 0, queue};
44 |   PaError error = Pa_Initialize();
45 |   if (error != paNoError) {
46 |     HandleError(error, "Initialize");
47 |   }
48 | 
49 |   if (device_ == -1) {
50 |     device_ = Pa_GetDefaultInputDevice();
51 |   }
52 | }
53 | 
54 | void Microphone::HandleError(PaError error, const std::string& message) {
55 |   Pa_Terminate();
56 |   std::cerr << "PortAudio Error: " << message << std::endl
57 |             << "Error number: " << error << std::endl
58 |             << "Error message: " << Pa_GetErrorText(error) << std::endl;
59 |   exit(error);
60 | }
61 | 
62 | void Microphone::Start() {
63 |   PaError error = paNoError;
64 |   PaStreamParameters parameters;
65 |   parameters.channelCount = 1;
66 |   parameters.sampleFormat = paInt16;
67 |   parameters.device = device_;
68 |   parameters.suggestedLatency =
69 |       Pa_GetDeviceInfo(parameters.device)->defaultLowInputLatency;
70 |   parameters.hostApiSpecificStreamInfo = 0;
71 | 
72 |   error = Pa_OpenStream(&stream_, &parameters, 0, sampleRate_, samplesPerFrame_,
73 |                         paClipOff, callback, &callbackData_);
74 |   if (error != paNoError) {
75 |     HandleError(error, "Open Stream");
76 |   }
77 | 
78 |   Pa_StartStream(stream_);
79 |   if (error != paNoError) {
80 |     HandleError(error, "Start Stream");
81 |   }
82 | }
83 | 
84 | void Microphone::Stop() {
85 |   Pa_AbortStream(stream_);
86 |   Pa_CloseStream(stream_);
87 | }
88 | 
89 | }  // namespace speechrecorder
90 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | const path = require("path");
 2 | const { SpeechRecorder, devices } = require("bindings")("speechrecorder.node");
 3 | 
 4 | class Wrapper {
 5 |   constructor(options, model) {
 6 |     options = options ? options : {};
 7 |     options.consecutiveFramesForSilence =
 8 |       options.consecutiveFramesForSilence !== undefined ? options.consecutiveFramesForSilence : 10;
 9 |     options.consecutiveFramesForSpeaking =
10 |       options.consecutiveFramesForSpeaking !== undefined ? options.consecutiveFramesForSpeaking : 1;
11 |     options.device = options.device !== undefined ? options.device : -1;
12 |     options.leadingBufferFrames =
13 |       options.leadingBufferFrames !== undefined ? options.leadingBufferFrames : 10;
14 |     options.onChunkStart = options.onChunkStart !== undefined ? options.onChunkStart : (data) => {};
15 |     options.onAudio =
16 |       options.onAudio !== undefined
17 |         ? options.onAudio
18 |         : (audio, speaking, volume, speech, probability) => {};
19 |     options.onChunkEnd = options.onChunkEnd !== undefined ? options.onChunkEnd : (data) => {};
20 |     options.samplesPerFrame = options.samplesPerFrame !== undefined ? options.samplesPerFrame : 480;
21 |     options.sampleRate = options.sampleRate !== undefined ? options.sampleRate : 16000;
22 |     options.sileroVadBufferSize =
23 |       options.sileroVadBufferSize !== undefined ? options.sileroVadBufferSize : 2000;
24 |     options.sileroVadRateLimit =
25 |       options.sileroVadRateLimit !== undefined ? options.sileroVadRateLimit : 3;
26 |     options.sileroVadSilenceThreshold =
27 |       options.sileroVadSilenceThreshold !== undefined ? options.sileroVadSilenceThreshold : 0.1;
28 |     options.sileroVadSpeakingThreshold =
29 |       options.sileroVadSpeakingThreshold !== undefined ? options.sileroVadSpeakingThreshold : 0.3;
30 |     options.webrtcVadLevel = options.webrtcVadLevel !== undefined ? options.webrtcVadLevel : 3;
31 |     options.webrtcVadBufferSize =
32 |       options.webrtcVadBufferSize !== undefined ? options.webrtcVadBufferSize : 480;
33 |     options.webrtcVadResultsSize =
34 |       options.webrtcVadResultsSize !== undefined ? options.webrtcVadResultsSize : 10;
35 | 
36 |     this.inner = new SpeechRecorder(
37 |       model !== undefined ? model : path.join(__dirname, "..", "lib", "resources", "vad.onnx"),
38 |       (event, data) => {
39 |         if (event == "chunkStart") {
40 |           options.onChunkStart({ audio: data.audio });
41 |         } else if (event == "audio") {
42 |           options.onAudio({
43 |             audio: data.audio,
44 |             speaking: data.speaking,
45 |             probability: data.probability,
46 |             volume: data.volume,
47 |             speech: data.speech,
48 |             consecutiveSilence: data.consecutiveSilence,
49 |           });
50 |         } else if (event == "chunkEnd") {
51 |           options.onChunkEnd();
52 |         }
53 |       },
54 |       options
55 |     );
56 |   }
57 | 
58 |   processFile(file) {
59 |     this.inner.processFile(path.resolve(file));
60 |   }
61 | 
62 |   start() {
63 |     this.inner.start();
64 |   }
65 | 
66 |   stop() {
67 |     this.inner.stop();
68 |   }
69 | }
70 | 
71 | exports.SpeechRecorder = Wrapper;
72 | exports.devices = devices;
73 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_gmm.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #include "webrtc/common_audio/vad/vad_gmm.h"
12 | 
13 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
14 | 
15 | static const int32_t kCompVar = 22005;
16 | static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.
17 | 
18 | // For a normal distribution, the probability of |input| is calculated and
19 | // returned (in Q20). The formula for normal distributed probability is
20 | //
21 | // 1 / s * exp(-(x - m)^2 / (2 * s^2))
22 | //
23 | // where the parameters are given in the following Q domains:
24 | // m = |mean| (Q7)
25 | // s = |std| (Q7)
26 | // x = |input| (Q4)
27 | // in addition to the probability we output |delta| (in Q11) used when updating
28 | // the noise/speech model.
29 | int32_t WebRtcVad_GaussianProbability(int16_t input,
30 |                                       int16_t mean,
31 |                                       int16_t std,
32 |                                       int16_t* delta) {
33 |   int16_t tmp16, inv_std, inv_std2, exp_value = 0;
34 |   int32_t tmp32;
35 | 
36 |   // Calculate |inv_std| = 1 / s, in Q10.
37 |   // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
38 |   // Q-domain: Q17 / Q7 = Q10.
39 |   tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
40 |   inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
41 | 
42 |   // Calculate |inv_std2| = 1 / s^2, in Q14.
43 |   tmp16 = (inv_std >> 2);  // Q10 -> Q8.
44 |   // Q-domain: (Q8 * Q8) >> 2 = Q14.
45 |   inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
46 |   // TODO(bjornv): Investigate if changing to
47 |   // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
48 |   // gives better accuracy.
49 | 
50 |   tmp16 = (input << 3);  // Q4 -> Q7
51 |   tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7
52 | 
53 |   // To be used later, when updating noise/speech model.
54 |   // |delta| = (x - m) / s^2, in Q11.
55 |   // Q-domain: (Q14 * Q7) >> 10 = Q11.
56 |   *delta = (int16_t)((inv_std2 * tmp16) >> 10);
57 | 
58 |   // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
59 |   // division by two with one shift.
60 |   // Q-domain: (Q11 * Q7) >> 8 = Q10.
61 |   tmp32 = (*delta * tmp16) >> 9;
62 | 
63 |   // If the exponent is small enough to give a non-zero probability we calculate
64 |   // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
65 |   //             ~= exp2(-log2(exp(1)) * |tmp32|).
66 |   if (tmp32 < kCompVar) {
67 |     // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
68 |     // Q-domain: (Q12 * Q10) >> 12 = Q10.
69 |     tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
70 |     tmp16 = -tmp16;
71 |     exp_value = (0x0400 | (tmp16 & 0x03FF));
72 |     tmp16 ^= 0xFFFF;
73 |     tmp16 >>= 10;
74 |     tmp16 += 1;
75 |     // Get |exp_value| = exp(-|tmp32|) in Q10.
76 |     exp_value >>= tmp16;
77 |   }
78 | 
79 |   // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
80 |   // Q-domain: Q10 * Q10 = Q20.
81 |   return inv_std * exp_value;
82 | }
83 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/include/webrtc_vad.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | /*
12 |  * This header file includes the VAD API calls. Specific function calls are
13 |  * given below.
14 |  */
15 | 
16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
18 | 
19 | #include <stddef.h>
20 | #include <stdint.h>
21 | 
22 | typedef struct WebRtcVadInst VadInst;
23 | 
24 | #ifdef __cplusplus
25 | extern "C" {
26 | #endif
27 | 
28 | // Creates an instance to the VAD structure.
29 | VadInst* WebRtcVad_Create(void);
30 | 
31 | // Frees the dynamic memory of a specified VAD instance.
32 | //
33 | // - handle [i] : Pointer to VAD instance that should be freed.
34 | void WebRtcVad_Free(VadInst* handle);
35 | 
36 | // Initializes a VAD instance.
37 | //
38 | // - handle [i/o] : Instance that should be initialized.
39 | //
40 | // returns        : 0 - (OK),
41 | //                 -1 - (null pointer or Default mode could not be set).
42 | int WebRtcVad_Init(VadInst* handle);
43 | 
44 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
45 | // restrictive in reporting speech. Put in other words the probability of being
46 | // speech when the VAD returns 1 is increased with increasing mode. As a
47 | // consequence also the missed detection rate goes up.
48 | //
49 | // - handle [i/o] : VAD instance.
50 | // - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
51 | //
52 | // returns        : 0 - (OK),
53 | //                 -1 - (null pointer, mode could not be set or the VAD instance
54 | //                       has not been initialized).
55 | int WebRtcVad_set_mode(VadInst* handle, int mode);
56 | 
57 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates
58 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
59 | //
60 | // - handle       [i/o] : VAD Instance. Needs to be initialized by
61 | //                        WebRtcVad_Init() before call.
62 | // - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
63 | // - audio_frame  [i]   : Audio frame buffer.
64 | // - frame_length [i]   : Length of audio frame buffer in number of samples.
65 | //
66 | // returns              : 1 - (Active Voice),
67 | //                        0 - (Non-active Voice),
68 | //                       -1 - (Error)
69 | int WebRtcVad_Process(VadInst* handle,
70 |                       int fs,
71 |                       const int16_t* audio_frame,
72 |                       size_t frame_length);
73 | 
74 | // Checks for valid combinations of |rate| and |frame_length|. We support 10,
75 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
76 | //
77 | // - rate         [i] : Sampling frequency (Hz).
78 | // - frame_length [i] : Speech frame buffer length in number of samples.
79 | //
80 | // returns            : 0 - (valid combination), -1 - (invalid combination)
81 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
82 | 
83 | #ifdef __cplusplus
84 | }
85 | #endif
86 | 
87 | #endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
88 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/webrtc_vad.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/vad/include/webrtc_vad.h"
 12 | 
 13 | #include <stdlib.h>
 14 | #include <string.h>
 15 | 
 16 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 17 | #include "webrtc/common_audio/vad/vad_core.h"
 18 | 
 19 | static const int kInitCheck = 42;
 20 | static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
 21 | static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
 22 | static const int kMaxFrameLengthMs = 30;
 23 | 
 24 | VadInst* WebRtcVad_Create() {
 25 |   VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
 26 | 
 27 |   WebRtcSpl_Init();
 28 |   self->init_flag = 0;
 29 | 
 30 |   return (VadInst*)self;
 31 | }
 32 | 
 33 | void WebRtcVad_Free(VadInst* handle) {
 34 |   free(handle);
 35 | }
 36 | 
 37 | // TODO(bjornv): Move WebRtcVad_InitCore() code here.
 38 | int WebRtcVad_Init(VadInst* handle) {
 39 |   // Initialize the core VAD component.
 40 |   return WebRtcVad_InitCore((VadInstT*) handle);
 41 | }
 42 | 
 43 | // TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
 44 | int WebRtcVad_set_mode(VadInst* handle, int mode) {
 45 |   VadInstT* self = (VadInstT*) handle;
 46 | 
 47 |   if (handle == NULL) {
 48 |     return -1;
 49 |   }
 50 |   if (self->init_flag != kInitCheck) {
 51 |     return -1;
 52 |   }
 53 | 
 54 |   return WebRtcVad_set_mode_core(self, mode);
 55 | }
 56 | 
 57 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
 58 |                       size_t frame_length) {
 59 |   int vad = -1;
 60 |   VadInstT* self = (VadInstT*) handle;
 61 | 
 62 |   if (handle == NULL) {
 63 |     return -1;
 64 |   }
 65 | 
 66 |   if (self->init_flag != kInitCheck) {
 67 |     return -1;
 68 |   }
 69 |   if (audio_frame == NULL) {
 70 |     return -1;
 71 |   }
 72 |   if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
 73 |     return -1;
 74 |   }
 75 | 
 76 |   if (fs == 48000) {
 77 |       vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
 78 |   } else if (fs == 32000) {
 79 |     vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
 80 |   } else if (fs == 16000) {
 81 |     vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
 82 |   } else if (fs == 8000) {
 83 |     vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
 84 |   }
 85 | 
 86 |   if (vad > 0) {
 87 |     vad = 1;
 88 |   }
 89 |   return vad;
 90 | }
 91 | 
 92 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
 93 |   int return_value = -1;
 94 |   size_t i;
 95 |   int valid_length_ms;
 96 |   size_t valid_length;
 97 | 
 98 |   // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
 99 |   // see if we have a matching pair.
100 |   for (i = 0; i < kRatesSize; i++) {
101 |     if (kValidRates[i] == rate) {
102 |       for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
103 |           valid_length_ms += 10) {
104 |         valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
105 |         if (frame_length == valid_length) {
106 |           return_value = 0;
107 |           break;
108 |         }
109 |       }
110 |       break;
111 |     }
112 |   }
113 | 
114 |   return return_value;
115 | }
116 | 


--------------------------------------------------------------------------------
/examples/analyze-files.js:
--------------------------------------------------------------------------------
  1 | const fs = require("fs");
  2 | const path = require("path");
  3 | const { SpeechRecorder } = require("../src/index");
  4 | 
  5 | const quantile = (elements, q) => {
  6 |   const sorted = elements.sort((a, b) => a - b);
  7 |   const p = (sorted.length - 1) * q;
  8 |   const base = Math.floor(p);
  9 |   const rest = p - base;
 10 |   if (sorted[base + 1] !== undefined) {
 11 |     return sorted[base] + rest * (sorted[base + 1] - sorted[base]);
 12 |   } else {
 13 |     return sorted[base];
 14 |   }
 15 | };
 16 | 
 17 | if (process.argv.length < 4) {
 18 |   console.log("Usage: node analyze-files.js /path/to/wav/files /path/to/labels");
 19 |   process.exit(1);
 20 | }
 21 | 
 22 | let currentFile;
 23 | let samples = 0;
 24 | const leadingBufferFrames = 10;
 25 | const sampleRate = 16000;
 26 | const samplesPerFrame = 480;
 27 | let results = {};
 28 | let labels = JSON.parse(fs.readFileSync(process.argv[3], "utf8"));
 29 | 
 30 | const recorder = new SpeechRecorder({
 31 |   leadingBufferFrames,
 32 |   samplesPerFrame,
 33 |   sampleRate,
 34 |   onAudio: ({ audio, probability, volume }) => {
 35 |     samples += audio.length;
 36 |   },
 37 | 
 38 |   onChunkStart: ({ audio }) => {
 39 |     results[currentFile].speech.push([]);
 40 |     results[currentFile].speech[results[currentFile].speech.length - 1].push(samples / sampleRate);
 41 |   },
 42 | 
 43 |   onChunkEnd: () => {
 44 |     results[currentFile].speech[results[currentFile].speech.length - 1].push(samples / sampleRate);
 45 |   },
 46 | });
 47 | 
 48 | fs.readdir(process.argv[2], async (error, files) => {
 49 |   for (const file of files) {
 50 |     if (!file.endsWith(".wav")) {
 51 |       continue;
 52 |     }
 53 | 
 54 |     currentFile = file;
 55 |     samples = 0;
 56 |     results[file] = { speech: [] };
 57 |     console.log(`Processing ${file}...`);
 58 |     recorder.processFile(path.join(process.argv[2], file));
 59 |   }
 60 | 
 61 |   let speechWindowTooSmall = [];
 62 |   let noiseWasSpeech = [];
 63 |   let noise = 0;
 64 |   let speech = 0;
 65 |   let extra = [];
 66 |   for (const i of Object.keys(results)) {
 67 |     const label = labels[i].speech;
 68 |     const result = results[i].speech;
 69 | 
 70 |     if (label.length == 0) {
 71 |       noise++;
 72 |     } else {
 73 |       speech++;
 74 |     }
 75 | 
 76 |     if (label.length == 0 && result.length > 0) {
 77 |       console.log("Noise was speech:", i);
 78 |       console.log("VAD:", result);
 79 |       noiseWasSpeech.push(i);
 80 |     }
 81 | 
 82 |     if (label.length > 0 && result.length > 0) {
 83 |       const start = Math.min(...result.map((e) => e[0]));
 84 |       const stop = Math.max(...result.map((e) => e[1]));
 85 |       if (isNaN(start) || isNaN(stop)) {
 86 |         continue;
 87 |       }
 88 | 
 89 |       const tolerance = 0.05;
 90 |       if (
 91 |         start - (leadingBufferFrames * samplesPerFrame) / sampleRate > label[0] + tolerance ||
 92 |         stop < label[1] - tolerance
 93 |       ) {
 94 |         console.log("Speech window too small:", i);
 95 |         console.log("Label:", label);
 96 |         console.log("VAD:", result, start, stop);
 97 |         speechWindowTooSmall.push(i);
 98 |       } else if (stop > label[1]) {
 99 |         extra.push(stop - label[1]);
100 |       }
101 |     }
102 |   }
103 | 
104 |   console.log(
105 |     `\nSpeech window too small: ${(speechWindowTooSmall.length / speech).toFixed(2)} (${
106 |       speechWindowTooSmall.length
107 |     } / ${speech})`
108 |   );
109 | 
110 |   console.log(
111 |     `Noise was speech: ${noise > 0 ? (noiseWasSpeech.length / noise).toFixed(2) : 0} (${
112 |       noiseWasSpeech.length
113 |     } / ${noise})`
114 |   );
115 | 
116 |   if (extra.length > 0) {
117 |     console.log(
118 |       `Average extra speech: ${(extra.reduce((a, b) => a + b) / extra.length).toFixed(2)}`
119 |     );
120 |     console.log(`p50 extra speech: ${quantile(extra, 0.5).toFixed(2)}`);
121 |     console.log(`p90 extra speech: ${quantile(extra, 0.75).toFixed(2)}`);
122 |     console.log(`Max extra speech: ${Math.max(...extra).toFixed(2)}`);
123 |   }
124 | });
125 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/division_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains implementations of the divisions
 14 |  * WebRtcSpl_DivU32U16()
 15 |  * WebRtcSpl_DivW32W16()
 16 |  * WebRtcSpl_DivW32W16ResW16()
 17 |  * WebRtcSpl_DivResultInQ31()
 18 |  * WebRtcSpl_DivW32HiLow()
 19 |  *
 20 |  * The description header can be found in signal_processing_library.h
 21 |  *
 22 |  */
 23 | 
 24 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 25 | #include "webrtc/rtc_base/sanitizer.h"
 26 | 
 27 | uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
 28 | {
 29 |     // Guard against division with 0
 30 |     if (den != 0)
 31 |     {
 32 |         return (uint32_t)(num / den);
 33 |     } else
 34 |     {
 35 |         return (uint32_t)0xFFFFFFFF;
 36 |     }
 37 | }
 38 | 
 39 | int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
 40 | {
 41 |     // Guard against division with 0
 42 |     if (den != 0)
 43 |     {
 44 |         return (int32_t)(num / den);
 45 |     } else
 46 |     {
 47 |         return (int32_t)0x7FFFFFFF;
 48 |     }
 49 | }
 50 | 
 51 | int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
 52 | {
 53 |     // Guard against division with 0
 54 |     if (den != 0)
 55 |     {
 56 |         return (int16_t)(num / den);
 57 |     } else
 58 |     {
 59 |         return (int16_t)0x7FFF;
 60 |     }
 61 | }
 62 | 
 63 | int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
 64 | {
 65 |     int32_t L_num = num;
 66 |     int32_t L_den = den;
 67 |     int32_t div = 0;
 68 |     int k = 31;
 69 |     int change_sign = 0;
 70 | 
 71 |     if (num == 0)
 72 |         return 0;
 73 | 
 74 |     if (num < 0)
 75 |     {
 76 |         change_sign++;
 77 |         L_num = -num;
 78 |     }
 79 |     if (den < 0)
 80 |     {
 81 |         change_sign++;
 82 |         L_den = -den;
 83 |     }
 84 |     while (k--)
 85 |     {
 86 |         div <<= 1;
 87 |         L_num <<= 1;
 88 |         if (L_num >= L_den)
 89 |         {
 90 |             L_num -= L_den;
 91 |             div++;
 92 |         }
 93 |     }
 94 |     if (change_sign == 1)
 95 |     {
 96 |         div = -div;
 97 |     }
 98 |     return div;
 99 | }
100 | 
101 | int32_t RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
102 | WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
103 | {
104 |     int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
105 |     int32_t tmpW32;
106 | 
107 |     approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
108 |     // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
109 | 
110 |     // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
111 |     tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
112 |     // tmpW32 = den * approx
113 | 
114 |     tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
115 |     // UBSan: 2147483647 - -2 cannot be represented in type 'int'
116 | 
117 |     // Store tmpW32 in hi and low format
118 |     tmp_hi = (int16_t)(tmpW32 >> 16);
119 |     tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
120 | 
121 |     // tmpW32 = 1/den in Q29
122 |     tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
123 | 
124 |     // 1/den in hi and low format
125 |     tmp_hi = (int16_t)(tmpW32 >> 16);
126 |     tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
127 | 
128 |     // Store num in hi and low format
129 |     num_hi = (int16_t)(num >> 16);
130 |     num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
131 | 
132 |     // num * (1/den) by 32 bit multiplication (result in Q28)
133 | 
134 |     tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
135 |         (num_low * tmp_hi >> 15);
136 | 
137 |     // Put result in Q31 (convert from Q28)
138 |     tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
139 | 
140 |     return tmpW32;
141 | }
142 | 


--------------------------------------------------------------------------------
/lib/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.15)
  2 | project(speechrecorder)
  3 | 
  4 | set(CMAKE_CXX_STANDARD 17)
  5 | set(CMAKE_OSX_DEPLOYMENT_TARGET 10.14)
  6 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
  7 | 
  8 | if(NOT APPLE AND NOT WIN32)
  9 |     set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
 10 |     set(CMAKE_INSTALL_RPATH "$ORIGIN/")
 11 | endif()
 12 | 
 13 | option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
 14 | 
 15 | if(WIN32)
 16 |     add_compile_options(
 17 |         -DWEBRTC_WIN
 18 |     )
 19 | else()
 20 |     add_compile_options(
 21 |         -DWEBRTC_POSIX
 22 |     )
 23 | endif()
 24 | 
 25 | include(FetchContent)
 26 | set(FETCHCONTENT_UPDATES_DISCONNECTED ON)
 27 | 
 28 | FetchContent_Declare(drwav
 29 |   GIT_REPOSITORY https://github.com/mackron/dr_libs
 30 |   GIT_TAG 9497270f581f43e6b795ce5d98d8764861fb6a50
 31 | )
 32 | 
 33 | FetchContent_Declare(readerwriterqueue
 34 |   GIT_REPOSITORY https://github.com/cameron314/readerwriterqueue
 35 |   GIT_TAG v1.0.6
 36 | )
 37 | 
 38 | FetchContent_MakeAvailable(drwav readerwriterqueue)
 39 | 
 40 | include_directories(
 41 |     include
 42 |     3rd_party/webrtcvad
 43 |     3rd_party/portaudio/include
 44 |     3rd_party/onnxruntime/include
 45 | )
 46 | 
 47 | link_directories(
 48 |     ${CMAKE_SOURCE_DIR}/3rd_party/portaudio/lib
 49 |     ${CMAKE_SOURCE_DIR}/3rd_party/onnxruntime/lib
 50 | )
 51 | 
 52 | file(GLOB_RECURSE SOURCES
 53 |     src/*.cpp
 54 |     3rd_party/webrtcvad/*.c
 55 |     3rd_party/webrtcvad/*.cc
 56 | )
 57 | 
 58 | set(LIBRARIES
 59 |     readerwriterqueue
 60 | )
 61 | 
 62 | if(APPLE)
 63 |     list(APPEND LIBRARIES
 64 |         "-framework AudioToolbox"
 65 |         "-framework AudioUnit"
 66 |         "-framework CoreAudio"
 67 |         "-framework CoreFoundation"
 68 |         "-framework CoreServices"
 69 |         portaudio
 70 |         onnxruntime.1.10.0
 71 |     )
 72 | elseif(WIN32)
 73 |     list(APPEND LIBRARIES
 74 |         onnxruntime
 75 |     )
 76 | 
 77 |     if("${CMAKE_GENERATOR_PLATFORM}" STREQUAL "Win32")
 78 |         list(APPEND LIBRARIES
 79 |             portaudio_x86
 80 |         )
 81 |     else()
 82 |         list(APPEND LIBRARIES
 83 |             portaudio_x64
 84 |         )
 85 |     endif()
 86 | else()
 87 |     list(APPEND LIBRARIES
 88 |         portaudio
 89 |         onnxruntime
 90 |         pthread
 91 |     )
 92 | endif()
 93 | 
 94 | add_library(speechrecorder ${SOURCES})
 95 | target_link_libraries(speechrecorder ${LIBRARIES})
 96 | 
 97 | add_executable(main test/main.cpp)
 98 | target_link_libraries(main speechrecorder)
 99 | 
100 | install(TARGETS speechrecorder DESTINATION lib)
101 | if (WIN32)
102 |     install(
103 |         FILES
104 |             3rd_party/onnxruntime/lib/onnxruntime.dll
105 |             3rd_party/onnxruntime/lib/onnxruntime.lib
106 |             3rd_party/onnxruntime/lib/onnxruntime_providers_shared.dll
107 |             3rd_party/onnxruntime/lib/onnxruntime_providers_shared.lib
108 |         DESTINATION lib
109 |     )
110 |     if("${CMAKE_GENERATOR_PLATFORM}" STREQUAL "Win32")
111 |         install(
112 |             FILES
113 |                 3rd_party/portaudio/bin/portaudio_x86.dll
114 |                 3rd_party/portaudio/lib/portaudio_x86.lib
115 |             DESTINATION lib
116 |         )
117 |     else()
118 |         install(
119 |             FILES
120 |                 3rd_party/portaudio/bin/portaudio_x64.dll
121 |                 3rd_party/portaudio/lib/portaudio_x64.lib
122 |             DESTINATION lib
123 |         )
124 |     endif()
125 | elseif(APPLE)
126 |     install(
127 |         FILES
128 |             3rd_party/onnxruntime/lib/libonnxruntime.1.10.0.dylib
129 |             3rd_party/portaudio/lib/libportaudio.dylib
130 |         PERMISSIONS
131 |             OWNER_READ OWNER_WRITE OWNER_EXECUTE
132 |             GROUP_READ GROUP_EXECUTE
133 |             WORLD_READ WORLD_EXECUTE
134 |         DESTINATION lib
135 |     )
136 | else()
137 |     install(
138 |         FILES
139 |             3rd_party/onnxruntime/lib/libonnxruntime.so
140 |             3rd_party/onnxruntime/lib/libonnxruntime.so.1.10.0
141 |             3rd_party/portaudio/lib/libportaudio.so
142 |         PERMISSIONS
143 |             OWNER_READ OWNER_WRITE OWNER_EXECUTE
144 |             GROUP_READ GROUP_EXECUTE
145 |             WORLD_READ WORLD_EXECUTE
146 |         DESTINATION lib
147 |     )
148 | endif()
149 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_core.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /*
 12 |  * This header file includes the descriptions of the core VAD calls.
 13 |  */
 14 | 
 15 | #ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
 16 | #define COMMON_AUDIO_VAD_VAD_CORE_H_
 17 | 
 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 19 | 
 20 | enum { kNumChannels = 6 };   // Number of frequency bands (named channels).
 21 | enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
 22 | enum { kTableSize = kNumChannels * kNumGaussians };
 23 | enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
 24 | 
 25 | typedef struct VadInstT_ {
 26 |   int vad;
 27 |   int32_t downsampling_filter_states[4];
 28 |   WebRtcSpl_State48khzTo8khz state_48_to_8;
 29 |   int16_t noise_means[kTableSize];
 30 |   int16_t speech_means[kTableSize];
 31 |   int16_t noise_stds[kTableSize];
 32 |   int16_t speech_stds[kTableSize];
 33 |   // TODO(bjornv): Change to |frame_count|.
 34 |   int32_t frame_counter;
 35 |   int16_t over_hang;  // Over Hang
 36 |   int16_t num_of_speech;
 37 |   // TODO(bjornv): Change to |age_vector|.
 38 |   int16_t index_vector[16 * kNumChannels];
 39 |   int16_t low_value_vector[16 * kNumChannels];
 40 |   // TODO(bjornv): Change to |median|.
 41 |   int16_t mean_value[kNumChannels];
 42 |   int16_t upper_state[5];
 43 |   int16_t lower_state[5];
 44 |   int16_t hp_filter_state[4];
 45 |   int16_t over_hang_max_1[3];
 46 |   int16_t over_hang_max_2[3];
 47 |   int16_t individual[3];
 48 |   int16_t total[3];
 49 | 
 50 |   int init_flag;
 51 | } VadInstT;
 52 | 
 53 | // Initializes the core VAD component. The default aggressiveness mode is
 54 | // controlled by |kDefaultMode| in vad_core.c.
 55 | //
 56 | // - self [i/o] : Instance that should be initialized
 57 | //
 58 | // returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
 59 | //                set)
 60 | int WebRtcVad_InitCore(VadInstT* self);
 61 | 
 62 | /****************************************************************************
 63 |  * WebRtcVad_set_mode_core(...)
 64 |  *
 65 |  * This function changes the VAD settings
 66 |  *
 67 |  * Input:
 68 |  *      - inst      : VAD instance
 69 |  *      - mode      : Aggressiveness degree
 70 |  *                    0 (High quality) - 3 (Highly aggressive)
 71 |  *
 72 |  * Output:
 73 |  *      - inst      : Changed  instance
 74 |  *
 75 |  * Return value     :  0 - Ok
 76 |  *                    -1 - Error
 77 |  */
 78 | 
 79 | int WebRtcVad_set_mode_core(VadInstT* self, int mode);
 80 | 
 81 | /****************************************************************************
 82 |  * WebRtcVad_CalcVad48khz(...)
 83 |  * WebRtcVad_CalcVad32khz(...)
 84 |  * WebRtcVad_CalcVad16khz(...)
 85 |  * WebRtcVad_CalcVad8khz(...)
 86 |  *
 87 |  * Calculate probability for active speech and make VAD decision.
 88 |  *
 89 |  * Input:
 90 |  *      - inst          : Instance that should be initialized
 91 |  *      - speech_frame  : Input speech frame
 92 |  *      - frame_length  : Number of input samples
 93 |  *
 94 |  * Output:
 95 |  *      - inst          : Updated filter states etc.
 96 |  *
 97 |  * Return value         : VAD decision
 98 |  *                        0 - No active speech
 99 |  *                        1-6 - Active speech
100 |  */
101 | int WebRtcVad_CalcVad48khz(VadInstT* inst,
102 |                            const int16_t* speech_frame,
103 |                            size_t frame_length);
104 | int WebRtcVad_CalcVad32khz(VadInstT* inst,
105 |                            const int16_t* speech_frame,
106 |                            size_t frame_length);
107 | int WebRtcVad_CalcVad16khz(VadInstT* inst,
108 |                            const int16_t* speech_frame,
109 |                            size_t frame_length);
110 | int WebRtcVad_CalcVad8khz(VadInstT* inst,
111 |                           const int16_t* speech_frame,
112 |                           size_t frame_length);
113 | 
114 | #endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
115 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/include/real_fft.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 3 |  *
 4 |  *  Use of this source code is governed by a BSD-style license
 5 |  *  that can be found in the LICENSE file in the root of the source
 6 |  *  tree. An additional intellectual property rights grant can be found
 7 |  *  in the file PATENTS.  All contributing project authors may
 8 |  *  be found in the AUTHORS file in the root of the source tree.
 9 |  */
10 | 
11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
13 | 
14 | #include <stdint.h>
15 | 
16 | // For ComplexFFT(), the maximum fft order is 10;
17 | // WebRTC APM uses orders of only 7 and 8.
18 | enum { kMaxFFTOrder = 10 };
19 | 
20 | struct RealFFT;
21 | 
22 | #ifdef __cplusplus
23 | extern "C" {
24 | #endif
25 | 
26 | struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
27 | void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
28 | 
29 | // Compute an FFT for a real-valued signal of length of 2^order,
30 | // where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
31 | // specification structure, which must be initialized prior to calling the FFT
32 | // function with WebRtcSpl_CreateRealFFT().
33 | // The relationship between the input and output sequences can
34 | // be expressed in terms of the DFT, i.e.:
35 | //     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
36 | //     n=0,1,2,...N-1
37 | //     N=2^order.
38 | // The conjugate-symmetric output sequence is represented using a CCS vector,
39 | // which is of length N+2, and is organized as follows:
40 | //     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
41 | //     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
42 | // where R[n] and I[n], respectively, denote the real and imaginary components
43 | // for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
44 | // Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
45 | // the foldover frequency.
46 | //
47 | // Input Arguments:
48 | //   self - pointer to preallocated and initialized FFT specification structure.
49 | //   real_data_in - the input signal. For an ARM Neon platform, it must be
50 | //                  aligned on a 32-byte boundary.
51 | //
52 | // Output Arguments:
53 | //   complex_data_out - the output complex signal with (2^order + 2) 16-bit
54 | //                      elements. For an ARM Neon platform, it must be different
55 | //                      from real_data_in, and aligned on a 32-byte boundary.
56 | //
57 | // Return Value:
58 | //   0  - FFT calculation is successful.
59 | //   -1 - Error with bad arguments (null pointers).
60 | int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
61 |                              const int16_t* real_data_in,
62 |                              int16_t* complex_data_out);
63 | 
64 | // Compute the inverse FFT for a conjugate-symmetric input sequence of length of
65 | // 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
66 | // the specification structure, which must be initialized prior to calling the
67 | // FFT function with WebRtcSpl_CreateRealFFT().
68 | // For a transform of length M, the input sequence is represented using a packed
69 | // CCS vector of length M+2, which is explained in the comments for
70 | // WebRtcSpl_RealForwardFFTC above.
71 | //
72 | // Input Arguments:
73 | //   self - pointer to preallocated and initialized FFT specification structure.
74 | //   complex_data_in - the input complex signal with (2^order + 2) 16-bit
75 | //                     elements. For an ARM Neon platform, it must be aligned on
76 | //                     a 32-byte boundary.
77 | //
78 | // Output Arguments:
79 | //   real_data_out - the output real signal. For an ARM Neon platform, it must
80 | //                   be different to complex_data_in, and aligned on a 32-byte
81 | //                   boundary.
82 | //
83 | // Return Value:
84 | //   0 or a positive number - a value that the elements in the |real_data_out|
85 | //                            should be shifted left with in order to get
86 | //                            correct physical values.
87 | //   -1 - Error with bad arguments (null pointers).
88 | int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
89 |                              const int16_t* complex_data_in,
90 |                              int16_t* real_data_out);
91 | 
92 | #ifdef __cplusplus
93 | }
94 | #endif
95 | 
96 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
97 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/type_traits.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #ifndef RTC_BASE_TYPE_TRAITS_H_
 12 | #define RTC_BASE_TYPE_TRAITS_H_
 13 | 
 14 | #include <cstddef>
 15 | #include <type_traits>
 16 | 
 17 | namespace rtc {
 18 | 
 19 | // Determines if the given class has zero-argument .data() and .size() methods
 20 | // whose return values are convertible to T* and size_t, respectively.
 21 | template <typename DS, typename T>
 22 | class HasDataAndSize {
 23 |  private:
 24 |   template <
 25 |       typename C,
 26 |       typename std::enable_if<
 27 |           std::is_convertible<decltype(std::declval<C>().data()), T*>::value &&
 28 |           std::is_convertible<decltype(std::declval<C>().size()),
 29 |                               std::size_t>::value>::type* = nullptr>
 30 |   static int Test(int);
 31 | 
 32 |   template <typename>
 33 |   static char Test(...);
 34 | 
 35 |  public:
 36 |   static constexpr bool value = std::is_same<decltype(Test<DS>(0)), int>::value;
 37 | };
 38 | 
 39 | namespace test_has_data_and_size {
 40 | 
 41 | template <typename DR, typename SR>
 42 | struct Test1 {
 43 |   DR data();
 44 |   SR size();
 45 | };
 46 | static_assert(HasDataAndSize<Test1<int*, int>, int>::value, "");
 47 | static_assert(HasDataAndSize<Test1<int*, int>, const int>::value, "");
 48 | static_assert(HasDataAndSize<Test1<const int*, int>, const int>::value, "");
 49 | static_assert(!HasDataAndSize<Test1<const int*, int>, int>::value,
 50 |               "implicit cast of const int* to int*");
 51 | static_assert(!HasDataAndSize<Test1<char*, size_t>, int>::value,
 52 |               "implicit cast of char* to int*");
 53 | 
 54 | struct Test2 {
 55 |   int* data;
 56 |   size_t size;
 57 | };
 58 | static_assert(!HasDataAndSize<Test2, int>::value,
 59 |               ".data and .size aren't functions");
 60 | 
 61 | struct Test3 {
 62 |   int* data();
 63 | };
 64 | static_assert(!HasDataAndSize<Test3, int>::value, ".size() is missing");
 65 | 
 66 | class Test4 {
 67 |   int* data();
 68 |   size_t size();
 69 | };
 70 | static_assert(!HasDataAndSize<Test4, int>::value,
 71 |               ".data() and .size() are private");
 72 | 
 73 | }  // namespace test_has_data_and_size
 74 | 
 75 | namespace type_traits_impl {
 76 | 
 77 | // Determines if the given type is an enum that converts implicitly to
 78 | // an integral type.
 79 | template <typename T>
 80 | struct IsIntEnum {
 81 |  private:
 82 |   // This overload is used if the type is an enum, and unary plus
 83 |   // compiles and turns it into an integral type.
 84 |   template <typename X,
 85 |             typename std::enable_if<
 86 |                 std::is_enum<X>::value &&
 87 |                 std::is_integral<decltype(+std::declval<X>())>::value>::type* =
 88 |                 nullptr>
 89 |   static int Test(int);
 90 | 
 91 |   // Otherwise, this overload is used.
 92 |   template <typename>
 93 |   static char Test(...);
 94 | 
 95 |  public:
 96 |   static constexpr bool value =
 97 |       std::is_same<decltype(Test<typename std::remove_reference<T>::type>(0)),
 98 |                    int>::value;
 99 | };
100 | 
101 | }  // namespace type_traits_impl
102 | 
103 | // Determines if the given type is integral, or an enum that
104 | // converts implicitly to an integral type.
105 | template <typename T>
106 | struct IsIntlike {
107 |  private:
108 |   using X = typename std::remove_reference<T>::type;
109 | 
110 |  public:
111 |   static constexpr bool value =
112 |       std::is_integral<X>::value || type_traits_impl::IsIntEnum<X>::value;
113 | };
114 | 
115 | namespace test_enum_intlike {
116 | 
117 | enum E1 { e1 };
118 | enum { e2 };
119 | enum class E3 { e3 };
120 | struct S {};
121 | 
122 | static_assert(type_traits_impl::IsIntEnum<E1>::value, "");
123 | static_assert(type_traits_impl::IsIntEnum<decltype(e2)>::value, "");
124 | static_assert(!type_traits_impl::IsIntEnum<E3>::value, "");
125 | static_assert(!type_traits_impl::IsIntEnum<int>::value, "");
126 | static_assert(!type_traits_impl::IsIntEnum<float>::value, "");
127 | static_assert(!type_traits_impl::IsIntEnum<S>::value, "");
128 | 
129 | static_assert(IsIntlike<E1>::value, "");
130 | static_assert(IsIntlike<decltype(e2)>::value, "");
131 | static_assert(!IsIntlike<E3>::value, "");
132 | static_assert(IsIntlike<int>::value, "");
133 | static_assert(!IsIntlike<float>::value, "");
134 | static_assert(!IsIntlike<S>::value, "");
135 | 
136 | }  // namespace test_enum_intlike
137 | 
138 | }  // namespace rtc
139 | 
140 | #endif  // RTC_BASE_TYPE_TRAITS_H_
141 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/complex_bit_reverse.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 12 | 
 13 | /* Tables for data buffer indexes that are bit reversed and thus need to be
 14 |  * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
 15 |  * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
 16 |  * operation. Same for index_8.
 17 |  */
 18 | 
 19 | /* Indexes for the case of stages == 7. */
 20 | static const int16_t index_7[112] = {
 21 |   1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
 22 |   12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
 23 |   23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
 24 |   37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
 25 |   51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
 26 |   81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
 27 |   103, 115, 111, 123
 28 | };
 29 | 
 30 | /* Indexes for the case of stages == 8. */
 31 | static const int16_t index_8[240] = {
 32 |   1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
 33 |   11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
 34 |   40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
 35 |   30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
 36 |   148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
 37 |   51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
 38 |   124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
 39 |   75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
 40 |   234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
 41 |   166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
 42 |   115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
 43 |   193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
 44 |   149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
 45 |   213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
 46 |   203, 211, 207, 243, 215, 235, 223, 251, 239, 247
 47 | };
 48 | 
 49 | void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
 50 |   /* For any specific value of stages, we know exactly the indexes that are
 51 |    * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
 52 |    * stages are 7 and 8, so we use tables to save unnecessary iterations and
 53 |    * calculations for these two cases.
 54 |    */
 55 |   if (stages == 7 || stages == 8) {
 56 |     int m = 0;
 57 |     int length = 112;
 58 |     const int16_t* index = index_7;
 59 | 
 60 |     if (stages == 8) {
 61 |       length = 240;
 62 |       index = index_8;
 63 |     }
 64 | 
 65 |     /* Decimation in time. Swap the elements with bit-reversed indexes. */
 66 |     for (m = 0; m < length; m += 2) {
 67 |       /* We declare a int32_t* type pointer, to load both the 16-bit real
 68 |        * and imaginary elements from complex_data in one instruction, reducing
 69 |        * complexity.
 70 |        */
 71 |       int32_t* complex_data_ptr = (int32_t*)complex_data;
 72 |       int32_t temp = 0;
 73 | 
 74 |       temp = complex_data_ptr[index[m]];  /* Real and imaginary */
 75 |       complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
 76 |       complex_data_ptr[index[m + 1]] = temp;
 77 |     }
 78 |   }
 79 |   else {
 80 |     int m = 0, mr = 0, l = 0;
 81 |     int n = 1 << stages;
 82 |     int nn = n - 1;
 83 | 
 84 |     /* Decimation in time - re-order data */
 85 |     for (m = 1; m <= nn; ++m) {
 86 |       int32_t* complex_data_ptr = (int32_t*)complex_data;
 87 |       int32_t temp = 0;
 88 | 
 89 |       /* Find out indexes that are bit-reversed. */
 90 |       l = n;
 91 |       do {
 92 |         l >>= 1;
 93 |       } while (l > nn - mr);
 94 |       mr = (mr & (l - 1)) + l;
 95 | 
 96 |       if (mr <= m) {
 97 |         continue;
 98 |       }
 99 | 
100 |       /* Swap the elements with bit-reversed indexes.
101 |        * This is similar to the loop in the stages == 7 or 8 cases.
102 |        */
103 |       temp = complex_data_ptr[m];  /* Real and imaginary */
104 |       complex_data_ptr[m] = complex_data_ptr[mr];
105 |       complex_data_ptr[mr] = temp;
106 |     }
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Speech Recorder
  2 | 
  3 | speech-recorder is a cross-platform, native [node.js](https://nodejs.org) [addon](http://nodejs.org/api/addons.html) for getting a stream of audio from a device's microphone. Using speech-recorder, you can also get only the audio that corresponds to someone speaking.
  4 | 
  5 | This module is used for speech recognition in [Serenade](https://serenade.ai). Serenade enables you to write code through natural speech, rather than typing.
  6 | 
  7 | ## Installation
  8 | 
  9 | speech-recorder has been tested on Windows 10, macOS 10.14+, and Ubuntu 18.04+ (and may work on other platforms as well).
 10 | 
 11 | To install speech-recorder, run:
 12 | 
 13 |     yarn add speech-recorder
 14 | 
 15 | If you're using this library with Electron, you should probably use [electron-rebuild](https://github.com/electron/electron-rebuild).
 16 | 
 17 | ## Usage
 18 | 
 19 | This library uses two voice activity detection mechanisms: a fast first pass (the WebRTC VAD), and a slightly slower, but much more accurate, second pass (the Silero VAD). See below for the various options you can supply to each.
 20 | 
 21 | ### Streaming
 22 | 
 23 | When you start recording, you can register various callbacks. `onAudio` is called when any audio comes in from the microphone. `onChunkStart` is called when a chunk of speech begins, and `onChunkEnd` is called when speech ends.
 24 | 
 25 |     const { SpeechRecorder } = require("speech-recorder");
 26 | 
 27 |     const recorder = new SpeechRecorder({
 28 |       onChunkStart: ({ audio }) => {
 29 |         console.log(Date.now(), "Chunk start");
 30 |       },
 31 |       onAudio: ({ speaking, probability, volume }) => {
 32 |         console.log(Date.now(), speaking, probability, volume);
 33 |       },
 34 |       onChunkEnd: () => {
 35 |         console.log(Date.now(), "Chunk end");
 36 |       },
 37 |     });
 38 | 
 39 |     console.log("Recording for 5 seconds...");
 40 |     recorder.start();
 41 |     setTimeout(() => {
 42 |       console.log("Done!");
 43 |       recorder.stop();
 44 |     }, 5000);
 45 | 
 46 | You can write all audio from the microphone to a file with:
 47 | 
 48 |     const { SpeechRecorder } = require("speech-recorder");
 49 | 
 50 |     const writeStream = fs.createWriteStream("audio.raw");
 51 |     const recorder = new SpeechRecorder({
 52 |       onAudio: ({ audio }) => {
 53 |         writeStream.write(audio);
 54 |       }
 55 |     });
 56 | 
 57 | Or, just the speech with:
 58 | 
 59 |     const { SpeechRecorder } = require("speech-recorder");
 60 | 
 61 |     const writeStream = fs.createWriteStream("audio.raw");
 62 |     const recorder = new SpeechRecorder({
 63 |       onAudio: ({ audio, speech }) => {
 64 |         if (speech) {
 65 |           writeStream.write(audio);
 66 |         }
 67 |       }
 68 |     });
 69 | 
 70 | ### Devices
 71 | 
 72 | You can get a list of supported devices with:
 73 | 
 74 |     const { devices } = require("speech-recorder");
 75 | 
 76 |     console.log(devices());
 77 | 
 78 | ### Options
 79 | 
 80 | * `consecutiveFramesForSilence`: How many frames of audio must be silent before `onChunkEnd` is fired. Default `10`.
 81 | * `consecutiveFramesForSpeaking`: How many frames of audio must be speech before `onChunkStart` is fired. Default `1`.
 82 | * `device`: ID of the device to use for input (i.e., from the example above). Specify `-1` to use the system default. Default `-1`.
 83 | * `leadingBufferFrames`: How many frames of audio to keep in a buffer that's included in `onChunkStart`. Default `10`.
 84 | * `onChunkStart`: Callback to be executed when speech starts.
 85 | * `onAudio`: Callback to be executed when any audio comes in.
 86 | * `onChunkEnd`: Callback to be executed when speech ends.
 87 | * `samplesPerFrame`: How many audio samples to be included in each frame from the microphone. Default `480`.
 88 | * `sampleRate`: Audio sample rate. Default `16000`.
 89 | * `sileroVadBufferSize`: How many audio samples to pass to the VAD. Default `2000`.
 90 | * `sileroVadRateLimit`: Rate limit, in frames, for how frequently to call the VAD. Default `3`.
 91 | * `sileroVadSilenceThreshold`: Probability threshold for speech to transition to silence. Default `0.1`.
 92 | * `sileroVadSpeakingThreshold`: Probability threshold for silence to transition to speech. Default `0.3`.
 93 | * `webrtcVadLevel`: Aggressiveness for the first-pass VAD filter. `0` is least aggressive, and `3` is most aggressive. Default `3`.
 94 | * `webrtcVadBufferSize`: How many audio samples to pass to the first-pass VAD filter. Default `480`. Can only be `160`, `320`, or `480`.
 95 | * `webrtcVadResultsSize`: How many first-pass VAD filter results to keep in history. Default `10`.
 96 | 
 97 | ## Building SpeechRecorder
 98 | 
 99 | If you want to build speech-recorder from source, first install the necessary dependencies by running:
100 | 
101 |     ./setup.sh <arch>
102 | 
103 | Where `<arch>` specifies the architecture you'd like to build for and is one of `x86`, `x64`, or `arm64`. If you're not sure, you probably want `x64`.
104 | 
105 | Then, you can build speech-recorder with:
106 | 
107 |     ./build.sh <arch>
108 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/sanitizer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #ifndef RTC_BASE_SANITIZER_H_
 12 | #define RTC_BASE_SANITIZER_H_
 13 | 
 14 | #include <stddef.h>  // For size_t.
 15 | 
 16 | #ifdef __cplusplus
 17 | #include <type_traits>
 18 | #endif
 19 | 
 20 | #if defined(__has_feature)
 21 | #if __has_feature(address_sanitizer)
 22 | #define RTC_HAS_ASAN 1
 23 | #endif
 24 | #if __has_feature(memory_sanitizer)
 25 | #define RTC_HAS_MSAN 1
 26 | #endif
 27 | #endif
 28 | #ifndef RTC_HAS_ASAN
 29 | #define RTC_HAS_ASAN 0
 30 | #endif
 31 | #ifndef RTC_HAS_MSAN
 32 | #define RTC_HAS_MSAN 0
 33 | #endif
 34 | 
 35 | #if RTC_HAS_ASAN
 36 | #include <sanitizer/asan_interface.h>
 37 | #endif
 38 | #if RTC_HAS_MSAN
 39 | #include <sanitizer/msan_interface.h>
 40 | #endif
 41 | 
 42 | #ifdef __has_attribute
 43 | #if __has_attribute(no_sanitize)
 44 | #define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what)))
 45 | #endif
 46 | #endif
 47 | #ifndef RTC_NO_SANITIZE
 48 | #define RTC_NO_SANITIZE(what)
 49 | #endif
 50 | 
 51 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
 52 | // as being unaddressable, so that reads and writes are not allowed. ASan may
 53 | // narrow the range to the nearest alignment boundaries.
 54 | static inline void rtc_AsanPoison(const volatile void* ptr,
 55 |                                   size_t element_size,
 56 |                                   size_t num_elements) {
 57 | #if RTC_HAS_ASAN
 58 |   ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements);
 59 | #endif
 60 | }
 61 | 
 62 | // Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
 63 | // as being addressable, so that reads and writes are allowed. ASan may widen
 64 | // the range to the nearest alignment boundaries.
 65 | static inline void rtc_AsanUnpoison(const volatile void* ptr,
 66 |                                     size_t element_size,
 67 |                                     size_t num_elements) {
 68 | #if RTC_HAS_ASAN
 69 |   ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements);
 70 | #endif
 71 | }
 72 | 
 73 | // Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements)
 74 | // as being uninitialized.
 75 | static inline void rtc_MsanMarkUninitialized(const volatile void* ptr,
 76 |                                              size_t element_size,
 77 |                                              size_t num_elements) {
 78 | #if RTC_HAS_MSAN
 79 |   __msan_poison(ptr, element_size * num_elements);
 80 | #endif
 81 | }
 82 | 
 83 | // Force an MSan check (if any bits in the memory range [ptr, ptr +
 84 | // element_size * num_elements) are uninitialized the call will crash with an
 85 | // MSan report).
 86 | static inline void rtc_MsanCheckInitialized(const volatile void* ptr,
 87 |                                             size_t element_size,
 88 |                                             size_t num_elements) {
 89 | #if RTC_HAS_MSAN
 90 |   __msan_check_mem_is_initialized(ptr, element_size * num_elements);
 91 | #endif
 92 | }
 93 | 
 94 | #ifdef __cplusplus
 95 | 
 96 | namespace rtc {
 97 | namespace sanitizer_impl {
 98 | 
 99 | template <typename T>
100 | constexpr bool IsTriviallyCopyable() {
101 |   return static_cast<bool>(std::is_trivially_copy_constructible<T>::value &&
102 |                            (std::is_trivially_copy_assignable<T>::value ||
103 |                             !std::is_copy_assignable<T>::value) &&
104 |                            std::is_trivially_destructible<T>::value);
105 | }
106 | 
107 | }  // namespace sanitizer_impl
108 | 
109 | template <typename T>
110 | inline void AsanPoison(const T& mem) {
111 |   rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size());
112 | }
113 | 
114 | template <typename T>
115 | inline void AsanUnpoison(const T& mem) {
116 |   rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size());
117 | }
118 | 
119 | template <typename T>
120 | inline void MsanMarkUninitialized(const T& mem) {
121 |   rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
122 | }
123 | 
124 | template <typename T>
125 | inline T MsanUninitialized(T t) {
126 | #if RTC_HAS_MSAN
127 |   // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it
128 |   // becomes available in downstream projects.
129 |   static_assert(sanitizer_impl::IsTriviallyCopyable<T>(), "");
130 | #endif
131 |   rtc_MsanMarkUninitialized(&t, sizeof(T), 1);
132 |   return t;
133 | }
134 | 
135 | template <typename T>
136 | inline void MsanCheckInitialized(const T& mem) {
137 |   rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
138 | }
139 | 
140 | }  // namespace rtc
141 | 
142 | #endif  // __cplusplus
143 | 
144 | #endif  // RTC_BASE_SANITIZER_H_
145 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/spl_init.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /* The global function contained in this file initializes SPL function
 12 |  * pointers, currently only for ARM platforms.
 13 |  *
 14 |  * Some code came from common/rtcd.c in the WebM project.
 15 |  */
 16 | 
 17 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 18 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
 19 | 
 20 | /* Declare function pointers. */
 21 | MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
 22 | MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
 23 | MaxValueW16 WebRtcSpl_MaxValueW16;
 24 | MaxValueW32 WebRtcSpl_MaxValueW32;
 25 | MinValueW16 WebRtcSpl_MinValueW16;
 26 | MinValueW32 WebRtcSpl_MinValueW32;
 27 | CrossCorrelation WebRtcSpl_CrossCorrelation;
 28 | DownsampleFast WebRtcSpl_DownsampleFast;
 29 | ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
 30 | 
 31 | #if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
 32 | /* Initialize function pointers to the generic C version. */
 33 | static void InitPointersToC(void) {
 34 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
 35 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
 36 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
 37 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
 38 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
 39 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
 40 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
 41 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
 42 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 43 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 44 | }
 45 | #endif
 46 | 
 47 | #if defined(WEBRTC_HAS_NEON)
 48 | /* Initialize function pointers to the Neon version. */
 49 | static void InitPointersToNeon(void) {
 50 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
 51 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
 52 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
 53 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
 54 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
 55 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
 56 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
 57 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
 58 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 59 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 60 | }
 61 | #endif
 62 | 
 63 | #if defined(MIPS32_LE)
 64 | /* Initialize function pointers to the MIPS version. */
 65 | static void InitPointersToMIPS(void) {
 66 |   WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
 67 |   WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
 68 |   WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
 69 |   WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
 70 |   WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
 71 |   WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
 72 |   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
 73 | #if defined(MIPS_DSP_R1_LE)
 74 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
 75 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 76 |       WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
 77 | #else
 78 |   WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
 79 |   WebRtcSpl_ScaleAndAddVectorsWithRound =
 80 |       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
 81 | #endif
 82 | }
 83 | #endif
 84 | 
 85 | static void InitFunctionPointers(void) {
 86 | #if defined(WEBRTC_HAS_NEON)
 87 |   InitPointersToNeon();
 88 | #elif defined(MIPS32_LE)
 89 |   InitPointersToMIPS();
 90 | #else
 91 |   InitPointersToC();
 92 | #endif  /* WEBRTC_HAS_NEON */
 93 | }
 94 | 
 95 | #if defined(WEBRTC_POSIX)
 96 | #include <pthread.h>
 97 | 
 98 | static void once(void (*func)(void)) {
 99 |   static pthread_once_t lock = PTHREAD_ONCE_INIT;
100 |   pthread_once(&lock, func);
101 | }
102 | 
103 | #elif defined(_WIN32)
104 | #include <windows.h>
105 | 
106 | static void once(void (*func)(void)) {
107 |   /* Didn't use InitializeCriticalSection() since there's no race-free context
108 |    * in which to execute it.
109 |    *
110 |    * TODO(kma): Change to different implementation (e.g.
111 |    * InterlockedCompareExchangePointer) to avoid issues similar to
112 |    * http://code.google.com/p/webm/issues/detail?id=467.
113 |    */
114 |   static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
115 |   static int done = 0;
116 | 
117 |   EnterCriticalSection(&lock);
118 |   if (!done) {
119 |     func();
120 |     done = 1;
121 |   }
122 |   LeaveCriticalSection(&lock);
123 | }
124 | 
125 | /* There's no fallback version as an #else block here to ensure thread safety.
126 |  * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
127 |  * system should pick it up.
128 |  */
129 | #endif  /* WEBRTC_POSIX */
130 | 
131 | void WebRtcSpl_Init(void) {
132 |   once(InitFunctionPointers);
133 | }
134 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/typedefs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // This file contains platform-specific typedefs and defines.
 12 | // Much of it is derived from Chromium's build/build_config.h.
 13 | 
 14 | #ifndef WEBRTC_TYPEDEFS_H_
 15 | #define WEBRTC_TYPEDEFS_H_
 16 | 
 17 | // Processor architecture detection.  For more info on what's defined, see:
 18 | //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
 19 | //   http://www.agner.org/optimize/calling_conventions.pdf
 20 | //   or with gcc, run: "echo | gcc -E -dM -"
 21 | #if defined(_M_X64) || defined(__x86_64__)
 22 | #define WEBRTC_ARCH_X86_FAMILY
 23 | #define WEBRTC_ARCH_X86_64
 24 | #define WEBRTC_ARCH_64_BITS
 25 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 26 | #elif defined(__aarch64__)
 27 | #define WEBRTC_ARCH_64_BITS
 28 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 29 | #elif defined(_M_IX86) || defined(__i386__)
 30 | #define WEBRTC_ARCH_X86_FAMILY
 31 | #define WEBRTC_ARCH_X86
 32 | #define WEBRTC_ARCH_32_BITS
 33 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 34 | #elif defined(__ARMEL__)
 35 | // TODO(ajm): We'd prefer to control platform defines here, but this is
 36 | // currently provided by the Android makefiles. Commented to avoid duplicate
 37 | // definition warnings.
 38 | //#define WEBRTC_ARCH_ARM
 39 | // TODO(ajm): Chromium uses the following two defines. Should we switch?
 40 | //#define WEBRTC_ARCH_ARM_FAMILY
 41 | //#define WEBRTC_ARCH_ARMEL
 42 | #define WEBRTC_ARCH_32_BITS
 43 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 44 | #elif defined(__MIPSEL__)
 45 | #define WEBRTC_ARCH_32_BITS
 46 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 47 | #elif defined(__pnacl__)
 48 | #define WEBRTC_ARCH_32_BITS
 49 | #define WEBRTC_ARCH_LITTLE_ENDIAN
 50 | #else
 51 | #error Please add support for your architecture in typedefs.h
 52 | #endif
 53 | 
 54 | #if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
 55 | #error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
 56 | #endif
 57 | 
 58 | #if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) ||  \
 59 |     (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
 60 | #define WEBRTC_CPU_DETECTION
 61 | #endif
 62 | 
 63 | #if !defined(_MSC_VER)
 64 | #include <stdint.h>
 65 | #else
 66 | // Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
 67 | typedef signed char         int8_t;
 68 | typedef signed short        int16_t;
 69 | typedef signed int          int32_t;
 70 | typedef __int64             int64_t;
 71 | typedef unsigned char       uint8_t;
 72 | typedef unsigned short      uint16_t;
 73 | typedef unsigned int        uint32_t;
 74 | typedef unsigned __int64    uint64_t;
 75 | #endif
 76 | 
 77 | // Borrowed from Chromium's base/compiler_specific.h.
 78 | // Annotate a virtual method indicating it must be overriding a virtual
 79 | // method in the parent class.
 80 | // Use like:
 81 | //   virtual void foo() OVERRIDE;
 82 | #if defined(_MSC_VER)
 83 | #define OVERRIDE override
 84 | #elif defined(__clang__)
 85 | // Clang defaults to C++03 and warns about using override. Squelch that.
 86 | // Intentionally no push/pop here so all users of OVERRIDE ignore the warning
 87 | // too. This is like passing -Wno-c++11-extensions, except that GCC won't die
 88 | // (because it won't see this pragma).
 89 | #pragma clang diagnostic ignored "-Wc++11-extensions"
 90 | #define OVERRIDE override
 91 | #elif defined(__GNUC__) && __cplusplus >= 201103 && \
 92 |     (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700
 93 | // GCC 4.7 supports explicit virtual overrides when C++11 support is enabled.
 94 | #define OVERRIDE override
 95 | #else
 96 | #define OVERRIDE
 97 | #endif
 98 | 
 99 | // Annotate a function indicating the caller must examine the return value.
100 | // Use like:
101 | //   int foo() WARN_UNUSED_RESULT;
102 | // TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
103 | // libjingle are merged.
104 | #if !defined(WARN_UNUSED_RESULT)
105 | #if defined(__GNUC__)
106 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
107 | #else
108 | #define WARN_UNUSED_RESULT
109 | #endif
110 | #endif  // WARN_UNUSED_RESULT
111 | 
112 | // Put after a variable that might not be used, to prevent compiler warnings:
113 | //   int result ATTRIBUTE_UNUSED = DoSomething();
114 | //   assert(result == 17);
115 | #ifndef ATTRIBUTE_UNUSED
116 | #if defined(__GNUC__) || defined(__clang__)
117 | #define ATTRIBUTE_UNUSED __attribute__((unused))
118 | #else
119 | #define ATTRIBUTE_UNUSED
120 | #endif
121 | #endif
122 | 
123 | // Macro to be used for switch-case fallthrough (required for enabling
124 | // -Wimplicit-fallthrough warning on Clang).
125 | #ifndef FALLTHROUGH
126 | #if defined(__clang__)
127 | #define FALLTHROUGH() [[clang::fallthrough]]
128 | #else
129 | #define FALLTHROUGH() do { } while (0)
130 | #endif
131 | #endif
132 | 
133 | // Annotate a function that will not return control flow to the caller.
134 | #if defined(_MSC_VER)
135 | #define NO_RETURN __declspec(noreturn)
136 | #elif defined(__GNUC__)
137 | #define NO_RETURN __attribute__((noreturn))
138 | #else
139 | #define NO_RETURN
140 | #endif
141 | 
142 | #endif  // WEBRTC_TYPEDEFS_H_
143 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/checks.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2006 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // Most of this was borrowed (with minor modifications) from V8's and Chromium's
 12 | // src/base/logging.cc.
 13 | 
 14 | #include <cstdarg>
 15 | #include <cstdio>
 16 | #include <cstdlib>
 17 | 
 18 | #if defined(WEBRTC_ANDROID)
 19 | #define RTC_LOG_TAG_ANDROID "rtc"
 20 | #include <android/log.h>  // NOLINT
 21 | #endif
 22 | 
 23 | #if defined(WEBRTC_WIN)
 24 | #include <windows.h>
 25 | #endif
 26 | 
 27 | #if defined(WEBRTC_WIN)
 28 | #define LAST_SYSTEM_ERROR (::GetLastError())
 29 | #elif defined(__native_client__) && __native_client__
 30 | #define LAST_SYSTEM_ERROR (0)
 31 | #elif defined(WEBRTC_POSIX)
 32 | #include <errno.h>
 33 | #define LAST_SYSTEM_ERROR (errno)
 34 | #endif  // WEBRTC_WIN
 35 | 
 36 | #include "webrtc/rtc_base/checks.h"
 37 | 
 38 | namespace {
 39 | #if defined(__GNUC__)
 40 | __attribute__((__format__(__printf__, 2, 3)))
 41 | #endif
 42 |   void AppendFormat(std::string* s, const char* fmt, ...) {
 43 |   va_list args, copy;
 44 |   va_start(args, fmt);
 45 |   va_copy(copy, args);
 46 |   const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy);
 47 |   va_end(copy);
 48 | 
 49 |   if (predicted_length > 0) {
 50 |     const size_t size = s->size();
 51 |     s->resize(size + predicted_length);
 52 |     // Pass "+ 1" to vsnprintf to include space for the '\0'.
 53 |     std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args);
 54 |   }
 55 |   va_end(args);
 56 | }
 57 | }
 58 | 
 59 | namespace rtc {
 60 | namespace webrtc_checks_impl {
 61 | 
 62 | // Reads one argument from args, appends it to s and advances fmt.
 63 | // Returns true iff an argument was sucessfully parsed.
 64 | bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) {
 65 |   if (**fmt == CheckArgType::kEnd)
 66 |     return false;
 67 | 
 68 |   switch (**fmt) {
 69 |     case CheckArgType::kInt:
 70 |       AppendFormat(s, "%d", va_arg(*args, int));
 71 |       break;
 72 |     case CheckArgType::kLong:
 73 |       AppendFormat(s, "%ld", va_arg(*args, long));
 74 |       break;
 75 |     case CheckArgType::kLongLong:
 76 |       AppendFormat(s, "%lld", va_arg(*args, long long));
 77 |       break;
 78 |     case CheckArgType::kUInt:
 79 |       AppendFormat(s, "%u", va_arg(*args, unsigned));
 80 |       break;
 81 |     case CheckArgType::kULong:
 82 |       AppendFormat(s, "%lu", va_arg(*args, unsigned long));
 83 |       break;
 84 |     case CheckArgType::kULongLong:
 85 |       AppendFormat(s, "%llu", va_arg(*args, unsigned long long));
 86 |       break;
 87 |     case CheckArgType::kDouble:
 88 |       AppendFormat(s, "%g", va_arg(*args, double));
 89 |       break;
 90 |     case CheckArgType::kLongDouble:
 91 |       AppendFormat(s, "%Lg", va_arg(*args, long double));
 92 |       break;
 93 |     case CheckArgType::kCharP:
 94 |       s->append(va_arg(*args, const char*));
 95 |       break;
 96 |     case CheckArgType::kStdString:
 97 |       s->append(*va_arg(*args, const std::string*));
 98 |       break;
 99 |     case CheckArgType::kVoidP:
100 |       AppendFormat(s, "%p", va_arg(*args, const void*));
101 |       break;
102 |     default:
103 |       s->append("[Invalid CheckArgType]");
104 |       return false;
105 |   }
106 |   (*fmt)++;
107 |   return true;
108 | }
109 | 
110 | RTC_NORETURN void FatalLog(const char* file,
111 |                            int line,
112 |                            const char* message,
113 |                            const CheckArgType* fmt,
114 |                            ...) {
115 |   va_list args;
116 |   va_start(args, fmt);
117 | 
118 |   std::string s;
119 |   AppendFormat(&s,
120 |                "\n\n"
121 |                "#\n"
122 |                "# Fatal error in: %s, line %d\n"
123 |                "# last system error: %u\n"
124 |                "# Check failed: %s",
125 |                file, line, LAST_SYSTEM_ERROR, message);
126 | 
127 |   if (*fmt == CheckArgType::kCheckOp) {
128 |     // This log message was generated by RTC_CHECK_OP, so we have to complete
129 |     // the error message using the operands that have been passed as the first
130 |     // two arguments.
131 |     fmt++;
132 | 
133 |     std::string s1, s2;
134 |     if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2))
135 |       AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str());
136 |   } else {
137 |     s.append("\n# ");
138 |   }
139 | 
140 |   // Append all the user-supplied arguments to the message.
141 |   while (ParseArg(&args, &fmt, &s))
142 |     ;
143 | 
144 |   va_end(args);
145 | 
146 |   const char* output = s.c_str();
147 | 
148 | #if defined(WEBRTC_ANDROID)
149 |   __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output);
150 | #endif
151 | 
152 |   fflush(stdout);
153 |   fprintf(stderr, "%s", output);
154 |   fflush(stderr);
155 |   abort();
156 | }
157 | 
158 | }  // namespace webrtc_checks_impl
159 | }  // namespace rtc
160 | 
161 | // Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros.
162 | RTC_NORETURN void rtc_FatalMessage(const char* file, int line,
163 |                                    const char* msg) {
164 |   static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = {
165 |       rtc::webrtc_checks_impl::CheckArgType::kEnd};
166 |   FatalLog(file, line, msg, t);
167 | }
168 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/vector_scaling_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains implementations of the functions
 14 |  * WebRtcSpl_VectorBitShiftW16()
 15 |  * WebRtcSpl_VectorBitShiftW32()
 16 |  * WebRtcSpl_VectorBitShiftW32ToW16()
 17 |  * WebRtcSpl_ScaleVector()
 18 |  * WebRtcSpl_ScaleVectorWithSat()
 19 |  * WebRtcSpl_ScaleAndAddVectors()
 20 |  * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
 21 |  */
 22 | 
 23 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 24 | 
 25 | void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
 26 |                                  const int16_t *in, int16_t right_shifts)
 27 | {
 28 |     size_t i;
 29 | 
 30 |     if (right_shifts > 0)
 31 |     {
 32 |         for (i = length; i > 0; i--)
 33 |         {
 34 |             (*res++) = ((*in++) >> right_shifts);
 35 |         }
 36 |     } else
 37 |     {
 38 |         for (i = length; i > 0; i--)
 39 |         {
 40 |             (*res++) = ((*in++) * (1 << (-right_shifts)));
 41 |         }
 42 |     }
 43 | }
 44 | 
 45 | void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
 46 |                                  size_t vector_length,
 47 |                                  const int32_t *in_vector,
 48 |                                  int16_t right_shifts)
 49 | {
 50 |     size_t i;
 51 | 
 52 |     if (right_shifts > 0)
 53 |     {
 54 |         for (i = vector_length; i > 0; i--)
 55 |         {
 56 |             (*out_vector++) = ((*in_vector++) >> right_shifts);
 57 |         }
 58 |     } else
 59 |     {
 60 |         for (i = vector_length; i > 0; i--)
 61 |         {
 62 |             (*out_vector++) = ((*in_vector++) << (-right_shifts));
 63 |         }
 64 |     }
 65 | }
 66 | 
 67 | void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
 68 |                                       const int32_t* in, int right_shifts) {
 69 |   size_t i;
 70 |   int32_t tmp_w32;
 71 | 
 72 |   if (right_shifts >= 0) {
 73 |     for (i = length; i > 0; i--) {
 74 |       tmp_w32 = (*in++) >> right_shifts;
 75 |       (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
 76 |     }
 77 |   } else {
 78 |     int left_shifts = -right_shifts;
 79 |     for (i = length; i > 0; i--) {
 80 |       tmp_w32 = (*in++) << left_shifts;
 81 |       (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
 82 |     }
 83 |   }
 84 | }
 85 | 
 86 | void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
 87 |                            int16_t gain, size_t in_vector_length,
 88 |                            int16_t right_shifts)
 89 | {
 90 |     // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
 91 |     size_t i;
 92 |     const int16_t *inptr;
 93 |     int16_t *outptr;
 94 | 
 95 |     inptr = in_vector;
 96 |     outptr = out_vector;
 97 | 
 98 |     for (i = 0; i < in_vector_length; i++)
 99 |     {
100 |       *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
101 |     }
102 | }
103 | 
104 | void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
105 |                                  int16_t gain, size_t in_vector_length,
106 |                                  int16_t right_shifts)
107 | {
108 |     // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
109 |     size_t i;
110 |     const int16_t *inptr;
111 |     int16_t *outptr;
112 | 
113 |     inptr = in_vector;
114 |     outptr = out_vector;
115 | 
116 |     for (i = 0; i < in_vector_length; i++) {
117 |       *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
118 |     }
119 | }
120 | 
121 | void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
122 |                                   const int16_t *in2, int16_t gain2, int shift2,
123 |                                   int16_t *out, size_t vector_length)
124 | {
125 |     // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
126 |     size_t i;
127 |     const int16_t *in1ptr;
128 |     const int16_t *in2ptr;
129 |     int16_t *outptr;
130 | 
131 |     in1ptr = in1;
132 |     in2ptr = in2;
133 |     outptr = out;
134 | 
135 |     for (i = 0; i < vector_length; i++)
136 |     {
137 |       *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
138 |           (int16_t)((gain2 * *in2ptr++) >> shift2);
139 |     }
140 | }
141 | 
142 | // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
143 | int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
144 |                                            int16_t in_vector1_scale,
145 |                                            const int16_t* in_vector2,
146 |                                            int16_t in_vector2_scale,
147 |                                            int right_shifts,
148 |                                            int16_t* out_vector,
149 |                                            size_t length) {
150 |   size_t i = 0;
151 |   int round_value = (1 << right_shifts) >> 1;
152 | 
153 |   if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
154 |       length == 0 || right_shifts < 0) {
155 |     return -1;
156 |   }
157 | 
158 |   for (i = 0; i < length; i++) {
159 |     out_vector[i] = (int16_t)((
160 |         in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
161 |         round_value) >> right_shifts);
162 |   }
163 | 
164 |   return 0;
165 | }
166 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/spl_sqrt.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains the function WebRtcSpl_Sqrt().
 14 |  * The description header can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include "webrtc/rtc_base/checks.h"
 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 20 | 
 21 | int32_t WebRtcSpl_SqrtLocal(int32_t in);
 22 | 
 23 | int32_t WebRtcSpl_SqrtLocal(int32_t in)
 24 | {
 25 | 
 26 |     int16_t x_half, t16;
 27 |     int32_t A, B, x2;
 28 | 
 29 |     /* The following block performs:
 30 |      y=in/2
 31 |      x=y-2^30
 32 |      x_half=x/2^31
 33 |      t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
 34 |          + 0.875*((x_half)^5)
 35 |      */
 36 | 
 37 |     B = in / 2;
 38 | 
 39 |     B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
 40 |     x_half = (int16_t)(B >> 16);  // x_half = x/2 = (in-1)/2
 41 |     B = B + ((int32_t)0x40000000); // B = 1 + x/2
 42 |     B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
 43 | 
 44 |     x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
 45 |     A = -x2; // A = -(x/2)^2
 46 |     B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
 47 | 
 48 |     A >>= 16;
 49 |     A = A * A * 2; // A = (x/2)^4
 50 |     t16 = (int16_t)(A >> 16);
 51 |     B += -20480 * t16 * 2;  // B = B - 0.625*A
 52 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
 53 | 
 54 |     A = x_half * t16 * 2;  // A = (x/2)^5
 55 |     t16 = (int16_t)(A >> 16);
 56 |     B += 28672 * t16 * 2;  // B = B + 0.875*A
 57 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
 58 | 
 59 |     t16 = (int16_t)(x2 >> 16);
 60 |     A = x_half * t16 * 2;  // A = x/2^3
 61 | 
 62 |     B = B + (A >> 1); // B = B + 0.5*A
 63 |     // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
 64 | 
 65 |     B = B + ((int32_t)32768); // Round off bit
 66 | 
 67 |     return B;
 68 | }
 69 | 
 70 | int32_t WebRtcSpl_Sqrt(int32_t value)
 71 | {
 72 |     /*
 73 |      Algorithm:
 74 | 
 75 |      Six term Taylor Series is used here to compute the square root of a number
 76 |      y^0.5 = (1+x)^0.5 where x = y-1
 77 |      = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
 78 |      0.5 <= x < 1
 79 | 
 80 |      Example of how the algorithm works, with ut=sqrt(in), and
 81 |      with in=73632 and ut=271 (even shift value case):
 82 | 
 83 |      in=73632
 84 |      y= in/131072
 85 |      x=y-1
 86 |      t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
 87 |      ut=t*(1/sqrt(2))*512
 88 | 
 89 |      or:
 90 | 
 91 |      in=73632
 92 |      in2=73632*2^14
 93 |      y= in2/2^31
 94 |      x=y-1
 95 |      t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
 96 |      ut=t*(1/sqrt(2))
 97 |      ut2=ut*2^9
 98 | 
 99 |      which gives:
100 | 
101 |      in  = 73632
102 |      in2 = 1206386688
103 |      y   = 0.56176757812500
104 |      x   = -0.43823242187500
105 |      t   = 0.74973506527313
106 |      ut  = 0.53014274874797
107 |      ut2 = 2.714330873589594e+002
108 | 
109 |      or:
110 | 
111 |      in=73632
112 |      in2=73632*2^14
113 |      y=in2/2
114 |      x=y-2^30
115 |      x_half=x/2^31
116 |      t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
117 |          + 0.875*((x_half)^5)
118 |      ut=t*(1/sqrt(2))
119 |      ut2=ut*2^9
120 | 
121 |      which gives:
122 | 
123 |      in  = 73632
124 |      in2 = 1206386688
125 |      y   = 603193344
126 |      x   = -470548480
127 |      x_half =  -0.21911621093750
128 |      t   = 0.74973506527313
129 |      ut  = 0.53014274874797
130 |      ut2 = 2.714330873589594e+002
131 | 
132 |      */
133 | 
134 |     int16_t x_norm, nshift, t16, sh;
135 |     int32_t A;
136 | 
137 |     int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
138 | 
139 |     A = value;
140 | 
141 |     // The convention in this function is to calculate sqrt(abs(A)). Negate the
142 |     // input if it is negative.
143 |     if (A < 0) {
144 |         if (A == WEBRTC_SPL_WORD32_MIN) {
145 |             // This number cannot be held in an int32_t after negating.
146 |             // Map it to the maximum positive value.
147 |             A = WEBRTC_SPL_WORD32_MAX;
148 |         } else {
149 |             A = -A;
150 |         }
151 |     } else if (A == 0) {
152 |         return 0;  // sqrt(0) = 0
153 |     }
154 | 
155 |     sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
156 |     A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
157 |     if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
158 |     {
159 |         A = A + ((int32_t)32768); // Round off bit
160 |     } else
161 |     {
162 |         A = WEBRTC_SPL_WORD32_MAX;
163 |     }
164 | 
165 |     x_norm = (int16_t)(A >> 16);  // x_norm = AH
166 | 
167 |     nshift = (sh / 2);
168 |     RTC_DCHECK_GE(nshift, 0);
169 | 
170 |     A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
171 |     A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
172 |     A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
173 | 
174 |     if (2 * nshift == sh) {
175 |         // Even shift value case
176 | 
177 |         t16 = (int16_t)(A >> 16);  // t16 = AH
178 | 
179 |         A = k_sqrt_2 * t16 * 2;  // A = 1/sqrt(2)*t16
180 |         A = A + ((int32_t)32768); // Round off
181 |         A = A & ((int32_t)0x7fff0000); // Round off
182 | 
183 |         A >>= 15;  // A = A>>16
184 | 
185 |     } else
186 |     {
187 |         A >>= 16;  // A = A>>16
188 |     }
189 | 
190 |     A = A & ((int32_t)0x0000ffff);
191 |     A >>= nshift;  // De-normalize the result.
192 | 
193 |     return A;
194 | }
195 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/include/spl_inl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // This header file includes the inline functions in
 12 | // the fix point signal processing library.
 13 | 
 14 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
 15 | #define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
 16 | 
 17 | #include "webrtc/rtc_base/compile_assert_c.h"
 18 | 
 19 | extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
 20 | 
 21 | // Don't call this directly except in tests!
 22 | static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
 23 |   // Normalize n by rounding up to the nearest number that is a sequence of 0
 24 |   // bits followed by a sequence of 1 bits. This number has the same number of
 25 |   // leading zeros as the original n. There are exactly 33 such values.
 26 |   n |= n >> 1;
 27 |   n |= n >> 2;
 28 |   n |= n >> 4;
 29 |   n |= n >> 8;
 30 |   n |= n >> 16;
 31 | 
 32 |   // Multiply the modified n with a constant selected (by exhaustive search)
 33 |   // such that each of the 33 possible values of n give a product whose 6 most
 34 |   // significant bits are unique. Then look up the answer in the table.
 35 |   return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
 36 | }
 37 | 
 38 | // Don't call this directly except in tests!
 39 | static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
 40 |   const int leading_zeros = n >> 32 == 0 ? 32 : 0;
 41 |   return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
 42 |                              (uint32_t)(n >> (32 - leading_zeros)));
 43 | }
 44 | 
 45 | // Returns the number of leading zero bits in the argument.
 46 | static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
 47 | #ifdef __GNUC__
 48 |   RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
 49 |   return n == 0 ? 32 : __builtin_clz(n);
 50 | #else
 51 |   return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
 52 | #endif
 53 | }
 54 | 
 55 | // Returns the number of leading zero bits in the argument.
 56 | static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
 57 | #ifdef __GNUC__
 58 |   RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t));  // NOLINT
 59 |   return n == 0 ? 64 : __builtin_clzll(n);
 60 | #else
 61 |   return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
 62 | #endif
 63 | }
 64 | 
 65 | #ifdef WEBRTC_ARCH_ARM_V7
 66 | #include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
 67 | #else
 68 | 
 69 | #if defined(MIPS32_LE)
 70 | #include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
 71 | #endif
 72 | 
 73 | #if !defined(MIPS_DSP_R1_LE)
 74 | static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
 75 |   int16_t out16 = (int16_t)value32;
 76 | 
 77 |   if (value32 > 32767)
 78 |     out16 = 32767;
 79 |   else if (value32 < -32768)
 80 |     out16 = -32768;
 81 | 
 82 |   return out16;
 83 | }
 84 | 
 85 | static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
 86 |   // Do the addition in unsigned numbers, since signed overflow is undefined
 87 |   // behavior.
 88 |   const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
 89 | 
 90 |   // a + b can't overflow if a and b have different signs. If they have the
 91 |   // same sign, a + b also has the same sign iff it didn't overflow.
 92 |   if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
 93 |     // The direction of the overflow is obvious from the sign of a + b.
 94 |     return sum < 0 ? INT32_MAX : INT32_MIN;
 95 |   }
 96 |   return sum;
 97 | }
 98 | 
 99 | static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
100 |   // Do the subtraction in unsigned numbers, since signed overflow is undefined
101 |   // behavior.
102 |   const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
103 | 
104 |   // a - b can't overflow if a and b have the same sign. If they have different
105 |   // signs, a - b has the same sign as a iff it didn't overflow.
106 |   if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
107 |     // The direction of the overflow is obvious from the sign of a - b.
108 |     return diff < 0 ? INT32_MAX : INT32_MIN;
109 |   }
110 |   return diff;
111 | }
112 | 
113 | static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
114 |   return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
115 | }
116 | 
117 | static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
118 |   return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
119 | }
120 | #endif  // #if !defined(MIPS_DSP_R1_LE)
121 | 
122 | #if !defined(MIPS32_LE)
123 | static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
124 |   return 32 - WebRtcSpl_CountLeadingZeros32(n);
125 | }
126 | 
127 | // Return the number of steps a can be left-shifted without overflow,
128 | // or 0 if a == 0.
129 | static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
130 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
131 | }
132 | 
133 | // Return the number of steps a can be left-shifted without overflow,
134 | // or 0 if a == 0.
135 | static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
136 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
137 | }
138 | 
139 | // Return the number of steps a can be left-shifted without overflow,
140 | // or 0 if a == 0.
141 | static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
142 |   const int32_t a32 = a;
143 |   return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
144 | }
145 | 
146 | static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
147 |   return (a * b + c);
148 | }
149 | #endif  // #if !defined(MIPS32_LE)
150 | 
151 | #endif  // WEBRTC_ARCH_ARM_V7
152 | 
153 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
154 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/min_max_operations.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | /*
 12 |  * This file contains the implementation of functions
 13 |  * WebRtcSpl_MaxAbsValueW16C()
 14 |  * WebRtcSpl_MaxAbsValueW32C()
 15 |  * WebRtcSpl_MaxValueW16C()
 16 |  * WebRtcSpl_MaxValueW32C()
 17 |  * WebRtcSpl_MinValueW16C()
 18 |  * WebRtcSpl_MinValueW32C()
 19 |  * WebRtcSpl_MaxAbsIndexW16()
 20 |  * WebRtcSpl_MaxIndexW16()
 21 |  * WebRtcSpl_MaxIndexW32()
 22 |  * WebRtcSpl_MinIndexW16()
 23 |  * WebRtcSpl_MinIndexW32()
 24 |  *
 25 |  */
 26 | 
 27 | #include <stdlib.h>
 28 | 
 29 | #include "webrtc/rtc_base/checks.h"
 30 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 31 | 
 32 | // TODO(bjorn/kma): Consolidate function pairs (e.g. combine
 33 | //   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
 34 | // TODO(kma): Move the next six functions into min_max_operations_c.c.
 35 | 
 36 | // Maximum absolute value of word16 vector. C version for generic platforms.
 37 | int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
 38 |   size_t i = 0;
 39 |   int absolute = 0, maximum = 0;
 40 | 
 41 |   RTC_DCHECK_GT(length, 0);
 42 | 
 43 |   for (i = 0; i < length; i++) {
 44 |     absolute = abs((int)vector[i]);
 45 | 
 46 |     if (absolute > maximum) {
 47 |       maximum = absolute;
 48 |     }
 49 |   }
 50 | 
 51 |   // Guard the case for abs(-32768).
 52 |   if (maximum > WEBRTC_SPL_WORD16_MAX) {
 53 |     maximum = WEBRTC_SPL_WORD16_MAX;
 54 |   }
 55 | 
 56 |   return (int16_t)maximum;
 57 | }
 58 | 
 59 | // Maximum absolute value of word32 vector. C version for generic platforms.
 60 | int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
 61 |   // Use uint32_t for the local variables, to accommodate the return value
 62 |   // of abs(0x80000000), which is 0x80000000.
 63 | 
 64 |   uint32_t absolute = 0, maximum = 0;
 65 |   size_t i = 0;
 66 | 
 67 |   RTC_DCHECK_GT(length, 0);
 68 | 
 69 |   for (i = 0; i < length; i++) {
 70 |     absolute = abs((int)vector[i]);
 71 |     if (absolute > maximum) {
 72 |       maximum = absolute;
 73 |     }
 74 |   }
 75 | 
 76 |   maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
 77 | 
 78 |   return (int32_t)maximum;
 79 | }
 80 | 
 81 | // Maximum value of word16 vector. C version for generic platforms.
 82 | int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
 83 |   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
 84 |   size_t i = 0;
 85 | 
 86 |   RTC_DCHECK_GT(length, 0);
 87 | 
 88 |   for (i = 0; i < length; i++) {
 89 |     if (vector[i] > maximum)
 90 |       maximum = vector[i];
 91 |   }
 92 |   return maximum;
 93 | }
 94 | 
 95 | // Maximum value of word32 vector. C version for generic platforms.
 96 | int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
 97 |   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
 98 |   size_t i = 0;
 99 | 
100 |   RTC_DCHECK_GT(length, 0);
101 | 
102 |   for (i = 0; i < length; i++) {
103 |     if (vector[i] > maximum)
104 |       maximum = vector[i];
105 |   }
106 |   return maximum;
107 | }
108 | 
109 | // Minimum value of word16 vector. C version for generic platforms.
110 | int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
111 |   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
112 |   size_t i = 0;
113 | 
114 |   RTC_DCHECK_GT(length, 0);
115 | 
116 |   for (i = 0; i < length; i++) {
117 |     if (vector[i] < minimum)
118 |       minimum = vector[i];
119 |   }
120 |   return minimum;
121 | }
122 | 
123 | // Minimum value of word32 vector. C version for generic platforms.
124 | int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
125 |   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
126 |   size_t i = 0;
127 | 
128 |   RTC_DCHECK_GT(length, 0);
129 | 
130 |   for (i = 0; i < length; i++) {
131 |     if (vector[i] < minimum)
132 |       minimum = vector[i];
133 |   }
134 |   return minimum;
135 | }
136 | 
137 | // Index of maximum absolute value in a word16 vector.
138 | size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
139 |   // Use type int for local variables, to accomodate the value of abs(-32768).
140 | 
141 |   size_t i = 0, index = 0;
142 |   int absolute = 0, maximum = 0;
143 | 
144 |   RTC_DCHECK_GT(length, 0);
145 | 
146 |   for (i = 0; i < length; i++) {
147 |     absolute = abs((int)vector[i]);
148 | 
149 |     if (absolute > maximum) {
150 |       maximum = absolute;
151 |       index = i;
152 |     }
153 |   }
154 | 
155 |   return index;
156 | }
157 | 
158 | // Index of maximum value in a word16 vector.
159 | size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
160 |   size_t i = 0, index = 0;
161 |   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
162 | 
163 |   RTC_DCHECK_GT(length, 0);
164 | 
165 |   for (i = 0; i < length; i++) {
166 |     if (vector[i] > maximum) {
167 |       maximum = vector[i];
168 |       index = i;
169 |     }
170 |   }
171 | 
172 |   return index;
173 | }
174 | 
175 | // Index of maximum value in a word32 vector.
176 | size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
177 |   size_t i = 0, index = 0;
178 |   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
179 | 
180 |   RTC_DCHECK_GT(length, 0);
181 | 
182 |   for (i = 0; i < length; i++) {
183 |     if (vector[i] > maximum) {
184 |       maximum = vector[i];
185 |       index = i;
186 |     }
187 |   }
188 | 
189 |   return index;
190 | }
191 | 
192 | // Index of minimum value in a word16 vector.
193 | size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
194 |   size_t i = 0, index = 0;
195 |   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
196 | 
197 |   RTC_DCHECK_GT(length, 0);
198 | 
199 |   for (i = 0; i < length; i++) {
200 |     if (vector[i] < minimum) {
201 |       minimum = vector[i];
202 |       index = i;
203 |     }
204 |   }
205 | 
206 |   return index;
207 | }
208 | 
209 | // Index of minimum value in a word32 vector.
210 | size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
211 |   size_t i = 0, index = 0;
212 |   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
213 | 
214 |   RTC_DCHECK_GT(length, 0);
215 | 
216 |   for (i = 0; i < length; i++) {
217 |     if (vector[i] < minimum) {
218 |       minimum = vector[i];
219 |       index = i;
220 |     }
221 |   }
222 | 
223 |   return index;
224 | }
225 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/vad/vad_sp.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #include "webrtc/common_audio/vad/vad_sp.h"
 12 | 
 13 | #include "webrtc/rtc_base/checks.h"
 14 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 15 | #include "webrtc/common_audio/vad/vad_core.h"
 16 | 
 17 | // Allpass filter coefficients, upper and lower, in Q13.
 18 | // Upper: 0.64, Lower: 0.17.
 19 | static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13.
 20 | static const int16_t kSmoothingDown = 6553;  // 0.2 in Q15.
 21 | static const int16_t kSmoothingUp = 32439;  // 0.99 in Q15.
 22 | 
 23 | // TODO(bjornv): Move this function to vad_filterbank.c.
 24 | // Downsampling filter based on splitting filter and allpass functions.
 25 | void WebRtcVad_Downsampling(const int16_t* signal_in,
 26 |                             int16_t* signal_out,
 27 |                             int32_t* filter_state,
 28 |                             size_t in_length) {
 29 |   int16_t tmp16_1 = 0, tmp16_2 = 0;
 30 |   int32_t tmp32_1 = filter_state[0];
 31 |   int32_t tmp32_2 = filter_state[1];
 32 |   size_t n = 0;
 33 |   // Downsampling by 2 gives half length.
 34 |   size_t half_length = (in_length >> 1);
 35 | 
 36 |   // Filter coefficients in Q13, filter state in Q0.
 37 |   for (n = 0; n < half_length; n++) {
 38 |     // All-pass filtering upper branch.
 39 |     tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
 40 |         ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
 41 |     *signal_out = tmp16_1;
 42 |     tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
 43 | 
 44 |     // All-pass filtering lower branch.
 45 |     tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
 46 |         ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
 47 |     *signal_out++ += tmp16_2;
 48 |     tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
 49 |   }
 50 |   // Store the filter states.
 51 |   filter_state[0] = tmp32_1;
 52 |   filter_state[1] = tmp32_2;
 53 | }
 54 | 
 55 | // Inserts |feature_value| into |low_value_vector|, if it is one of the 16
 56 | // smallest values the last 100 frames. Then calculates and returns the median
 57 | // of the five smallest values.
 58 | int16_t WebRtcVad_FindMinimum(VadInstT* self,
 59 |                               int16_t feature_value,
 60 |                               int channel) {
 61 |   int i = 0, j = 0;
 62 |   int position = -1;
 63 |   // Offset to beginning of the 16 minimum values in memory.
 64 |   const int offset = (channel << 4);
 65 |   int16_t current_median = 1600;
 66 |   int16_t alpha = 0;
 67 |   int32_t tmp32 = 0;
 68 |   // Pointer to memory for the 16 minimum values and the age of each value of
 69 |   // the |channel|.
 70 |   int16_t* age = &self->index_vector[offset];
 71 |   int16_t* smallest_values = &self->low_value_vector[offset];
 72 | 
 73 |   RTC_DCHECK_LT(channel, kNumChannels);
 74 | 
 75 |   // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
 76 |   // remove old values.
 77 |   for (i = 0; i < 16; i++) {
 78 |     if (age[i] != 100) {
 79 |       age[i]++;
 80 |     } else {
 81 |       // Too old value. Remove from memory and shift larger values downwards.
 82 |       for (j = i; j < 16; j++) {
 83 |         smallest_values[j] = smallest_values[j + 1];
 84 |         age[j] = age[j + 1];
 85 |       }
 86 |       age[15] = 101;
 87 |       smallest_values[15] = 10000;
 88 |     }
 89 |   }
 90 | 
 91 |   // Check if |feature_value| is smaller than any of the values in
 92 |   // |smallest_values|. If so, find the |position| where to insert the new value
 93 |   // (|feature_value|).
 94 |   if (feature_value < smallest_values[7]) {
 95 |     if (feature_value < smallest_values[3]) {
 96 |       if (feature_value < smallest_values[1]) {
 97 |         if (feature_value < smallest_values[0]) {
 98 |           position = 0;
 99 |         } else {
100 |           position = 1;
101 |         }
102 |       } else if (feature_value < smallest_values[2]) {
103 |         position = 2;
104 |       } else {
105 |         position = 3;
106 |       }
107 |     } else if (feature_value < smallest_values[5]) {
108 |       if (feature_value < smallest_values[4]) {
109 |         position = 4;
110 |       } else {
111 |         position = 5;
112 |       }
113 |     } else if (feature_value < smallest_values[6]) {
114 |       position = 6;
115 |     } else {
116 |       position = 7;
117 |     }
118 |   } else if (feature_value < smallest_values[15]) {
119 |     if (feature_value < smallest_values[11]) {
120 |       if (feature_value < smallest_values[9]) {
121 |         if (feature_value < smallest_values[8]) {
122 |           position = 8;
123 |         } else {
124 |           position = 9;
125 |         }
126 |       } else if (feature_value < smallest_values[10]) {
127 |         position = 10;
128 |       } else {
129 |         position = 11;
130 |       }
131 |     } else if (feature_value < smallest_values[13]) {
132 |       if (feature_value < smallest_values[12]) {
133 |         position = 12;
134 |       } else {
135 |         position = 13;
136 |       }
137 |     } else if (feature_value < smallest_values[14]) {
138 |       position = 14;
139 |     } else {
140 |       position = 15;
141 |     }
142 |   }
143 | 
144 |   // If we have detected a new small value, insert it at the correct position
145 |   // and shift larger values up.
146 |   if (position > -1) {
147 |     for (i = 15; i > position; i--) {
148 |       smallest_values[i] = smallest_values[i - 1];
149 |       age[i] = age[i - 1];
150 |     }
151 |     smallest_values[position] = feature_value;
152 |     age[position] = 1;
153 |   }
154 | 
155 |   // Get |current_median|.
156 |   if (self->frame_counter > 2) {
157 |     current_median = smallest_values[2];
158 |   } else if (self->frame_counter > 0) {
159 |     current_median = smallest_values[0];
160 |   }
161 | 
162 |   // Smooth the median value.
163 |   if (self->frame_counter > 0) {
164 |     if (current_median < self->mean_value[channel]) {
165 |       alpha = kSmoothingDown;  // 0.2 in Q15.
166 |     } else {
167 |       alpha = kSmoothingUp;  // 0.99 in Q15.
168 |     }
169 |   }
170 |   tmp32 = (alpha + 1) * self->mean_value[channel];
171 |   tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
172 |   tmp32 += 16384;
173 |   self->mean_value[channel] = (int16_t) (tmp32 >> 15);
174 | 
175 |   return self->mean_value[channel];
176 | }
177 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/resample_48khz.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains resampling functions between 48 kHz and nb/wb.
 14 |  * The description header can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include <string.h>
 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 20 | #include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
 21 | 
 22 | ////////////////////////////
 23 | ///// 48 kHz -> 16 kHz /////
 24 | ////////////////////////////
 25 | 
 26 | // 48 -> 16 resampler
 27 | void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
 28 |                                     WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
 29 | {
 30 |     ///// 48 --> 48(LP) /////
 31 |     // int16_t  in[480]
 32 |     // int32_t out[480]
 33 |     /////
 34 |     WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
 35 | 
 36 |     ///// 48 --> 32 /////
 37 |     // int32_t  in[480]
 38 |     // int32_t out[320]
 39 |     /////
 40 |     // copy state to and from input array
 41 |     memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
 42 |     memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
 43 |     WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
 44 | 
 45 |     ///// 32 --> 16 /////
 46 |     // int32_t  in[320]
 47 |     // int16_t out[160]
 48 |     /////
 49 |     WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
 50 | }
 51 | 
 52 | // initialize state of 48 -> 16 resampler
 53 | void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
 54 | {
 55 |     memset(state->S_48_48, 0, 16 * sizeof(int32_t));
 56 |     memset(state->S_48_32, 0, 8 * sizeof(int32_t));
 57 |     memset(state->S_32_16, 0, 8 * sizeof(int32_t));
 58 | }
 59 | 
 60 | ////////////////////////////
 61 | ///// 16 kHz -> 48 kHz /////
 62 | ////////////////////////////
 63 | 
 64 | // 16 -> 48 resampler
 65 | void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
 66 |                                     WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
 67 | {
 68 |     ///// 16 --> 32 /////
 69 |     // int16_t  in[160]
 70 |     // int32_t out[320]
 71 |     /////
 72 |     WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
 73 | 
 74 |     ///// 32 --> 24 /////
 75 |     // int32_t  in[320]
 76 |     // int32_t out[240]
 77 |     // copy state to and from input array
 78 |     /////
 79 |     memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
 80 |     memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
 81 |     WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
 82 | 
 83 |     ///// 24 --> 48 /////
 84 |     // int32_t  in[240]
 85 |     // int16_t out[480]
 86 |     /////
 87 |     WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
 88 | }
 89 | 
 90 | // initialize state of 16 -> 48 resampler
 91 | void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
 92 | {
 93 |     memset(state->S_16_32, 0, 8 * sizeof(int32_t));
 94 |     memset(state->S_32_24, 0, 8 * sizeof(int32_t));
 95 |     memset(state->S_24_48, 0, 8 * sizeof(int32_t));
 96 | }
 97 | 
 98 | ////////////////////////////
 99 | ///// 48 kHz ->  8 kHz /////
100 | ////////////////////////////
101 | 
102 | // 48 -> 8 resampler
103 | void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
104 |                                    WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
105 | {
106 |     ///// 48 --> 24 /////
107 |     // int16_t  in[480]
108 |     // int32_t out[240]
109 |     /////
110 |     WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
111 | 
112 |     ///// 24 --> 24(LP) /////
113 |     // int32_t  in[240]
114 |     // int32_t out[240]
115 |     /////
116 |     WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
117 | 
118 |     ///// 24 --> 16 /////
119 |     // int32_t  in[240]
120 |     // int32_t out[160]
121 |     /////
122 |     // copy state to and from input array
123 |     memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
124 |     memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
125 |     WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
126 | 
127 |     ///// 16 --> 8 /////
128 |     // int32_t  in[160]
129 |     // int16_t out[80]
130 |     /////
131 |     WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
132 | }
133 | 
134 | // initialize state of 48 -> 8 resampler
135 | void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
136 | {
137 |     memset(state->S_48_24, 0, 8 * sizeof(int32_t));
138 |     memset(state->S_24_24, 0, 16 * sizeof(int32_t));
139 |     memset(state->S_24_16, 0, 8 * sizeof(int32_t));
140 |     memset(state->S_16_8, 0, 8 * sizeof(int32_t));
141 | }
142 | 
143 | ////////////////////////////
144 | /////  8 kHz -> 48 kHz /////
145 | ////////////////////////////
146 | 
147 | // 8 -> 48 resampler
148 | void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
149 |                                    WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
150 | {
151 |     ///// 8 --> 16 /////
152 |     // int16_t  in[80]
153 |     // int32_t out[160]
154 |     /////
155 |     WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
156 | 
157 |     ///// 16 --> 12 /////
158 |     // int32_t  in[160]
159 |     // int32_t out[120]
160 |     /////
161 |     // copy state to and from input array
162 |     memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
163 |     memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
164 |     WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
165 | 
166 |     ///// 12 --> 24 /////
167 |     // int32_t  in[120]
168 |     // int16_t out[240]
169 |     /////
170 |     WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
171 | 
172 |     ///// 24 --> 48 /////
173 |     // int32_t  in[240]
174 |     // int16_t out[480]
175 |     /////
176 |     WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
177 | }
178 | 
179 | // initialize state of 8 -> 48 resampler
180 | void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
181 | {
182 |     memset(state->S_8_16, 0, 8 * sizeof(int32_t));
183 |     memset(state->S_16_12, 0, 8 * sizeof(int32_t));
184 |     memset(state->S_12_24, 0, 8 * sizeof(int32_t));
185 |     memset(state->S_24_48, 0, 8 * sizeof(int32_t));
186 | }
187 | 


--------------------------------------------------------------------------------
/binding.gyp:
--------------------------------------------------------------------------------
  1 | {
  2 |     "targets": [
  3 |         {
  4 |             "target_name": "speechrecorder",
  5 |             "sources": ["src/speech_recorder.cpp"],
  6 |             "cflags!": [
  7 |                 "-fno-exceptions",
  8 |                 "-fno-rtti",
  9 |             ],
 10 |             "cflags_cc!": [
 11 |                 "-fno-exceptions",
 12 |                 "-fno-rtti",
 13 |             ],
 14 |             "include_dirs": [
 15 |                 "<!@(node -p \"require('node-addon-api').include\")",
 16 |                 "<(module_root_dir)/include",
 17 |                 "<(module_root_dir)/lib/include",
 18 |                 "<(module_root_dir)/lib/build/_deps/drwav-src",
 19 |                 "<(module_root_dir)/lib/build/_deps/readerwriterqueue-src",
 20 |                 "<(module_root_dir)/lib/3rd_party/webrtcvad",
 21 |                 "<(module_root_dir)/lib/3rd_party/portaudio/include",
 22 |                 "<(module_root_dir)/lib/3rd_party/onnxruntime/include",
 23 |             ],
 24 |             "defines": [
 25 |                 "NAPI_VERSION=<(napi_build_version)",
 26 |                 "NAPI_CPP_EXCEPTIONS",
 27 |             ],
 28 |             "conditions": [
 29 |                 [
 30 |                     'OS=="mac"',
 31 |                     {
 32 |                         "xcode_settings": {
 33 |                             "GCC_ENABLE_CPP_EXCEPTIONS": "YES",
 34 |                             "GCC_ENABLE_CPP_RTTI": "YES",
 35 |                             "MACOSX_DEPLOYMENT_TARGET": "10.14",
 36 |                             "OTHER_LDFLAGS": ["-Wl,-rpath,@loader_path/"],
 37 |                         },
 38 |                         "copies": [
 39 |                             {
 40 |                                 "destination": "<(module_root_dir)/build/Release",
 41 |                                 "files": [
 42 |                                     "<(module_root_dir)/lib/install/lib/libspeechrecorder.dylib",
 43 |                                     "<(module_root_dir)/lib/install/lib/libportaudio.dylib",
 44 |                                     "<(module_root_dir)/lib/install/lib/libonnxruntime.1.10.0.dylib",
 45 |                                 ],
 46 |                             }
 47 |                         ],
 48 |                         "libraries": [
 49 |                             "<(module_root_dir)/build/Release/libspeechrecorder.dylib",
 50 |                             "<(module_root_dir)/build/Release/libportaudio.dylib",
 51 |                             "<(module_root_dir)/build/Release/libonnxruntime.1.10.0.dylib",
 52 |                         ],
 53 |                     },
 54 |                 ],
 55 |                 [
 56 |                     'OS=="win"',
 57 |                     {
 58 |                         "msvs_settings": {
 59 |                             "VCCLCompilerTool": {
 60 |                                 "ExceptionHandling": 1,
 61 |                             },
 62 |                         },
 63 |                         "copies": [
 64 |                             {
 65 |                                 "destination": "<(module_root_dir)/build/Release",
 66 |                                 "files": [
 67 |                                     "<(module_root_dir)/lib/install/lib/speechrecorder.dll",
 68 |                                     "<(module_root_dir)/lib/install/lib/onnxruntime.dll",
 69 |                                     "<(module_root_dir)/lib/install/lib/onnxruntime_providers_shared.dll",
 70 |                                 ],
 71 |                             }
 72 |                         ],
 73 |                         "libraries": [
 74 |                             "<(module_root_dir)/lib/install/lib/speechrecorder.lib",
 75 |                             "<(module_root_dir)/lib/install/lib/onnxruntime.lib",
 76 |                             "<(module_root_dir)/lib/install/lib/onnxruntime_providers_shared.lib",
 77 |                         ],
 78 |                     },
 79 |                 ],
 80 |                 [
 81 |                     'OS=="win" and target_arch=="ia32"',
 82 |                     {
 83 |                         "copies": [
 84 |                             {
 85 |                                 "destination": "<(module_root_dir)/build/Release",
 86 |                                 "files": [
 87 |                                     "<(module_root_dir)/lib/install/lib/portaudio_x86.dll",
 88 |                                     "<(module_root_dir)/lib/3rd_party/vcruntime/x86/vcruntime140.dll",
 89 |                                 ],
 90 |                             }
 91 |                         ],
 92 |                         "libraries": [
 93 |                             "<(module_root_dir)/lib/install/lib/portaudio_x86.lib",
 94 |                         ],
 95 |                     },
 96 |                 ],
 97 |                 [
 98 |                     'OS=="win" and target_arch=="x64"',
 99 |                     {
100 |                         "copies": [
101 |                             {
102 |                                 "destination": "<(module_root_dir)/build/Release",
103 |                                 "files": [
104 |                                     "<(module_root_dir)/lib/install/lib/portaudio_x64.dll",
105 |                                     "<(module_root_dir)/lib/3rd_party/vcruntime/x64/vcruntime140.dll",
106 |                                     "<(module_root_dir)/lib/3rd_party/vcruntime/x64/vcruntime140_1.dll",
107 |                                 ],
108 |                             }
109 |                         ],
110 |                         "libraries": [
111 |                             "<(module_root_dir)/lib/install/lib/portaudio_x64.lib",
112 |                         ],
113 |                     },
114 |                 ],
115 |                 [
116 |                     'OS=="linux"',
117 |                     {
118 |                         "link_settings": {
119 |                             "libraries": [
120 |                                 "-Wl,-rpath,'$$ORIGIN'",
121 |                             ]
122 |                         },
123 |                         "copies": [
124 |                             {
125 |                                 "destination": "<(module_root_dir)/build/Release",
126 |                                 "files": [
127 |                                     "<(module_root_dir)/lib/install/lib/libspeechrecorder.so",
128 |                                     "<(module_root_dir)/lib/install/lib/libportaudio.so",
129 |                                     "<(module_root_dir)/lib/install/lib/libonnxruntime.so.1.10.0",
130 |                                 ],
131 |                             }
132 |                         ],
133 |                         "libraries": [
134 |                             "<(module_root_dir)/build/Release/libspeechrecorder.so",
135 |                             "<(module_root_dir)/build/Release/libportaudio.so",
136 |                             "<(module_root_dir)/build/Release/libonnxruntime.so.1.10.0",
137 |                         ],
138 |                     },
139 |                 ],
140 |             ],
141 |         }
142 |     ]
143 | }
144 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/rtc_base/numerics/safe_compare.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | // This file defines six constexpr functions:
 12 | //
 13 | //   rtc::SafeEq  // ==
 14 | //   rtc::SafeNe  // !=
 15 | //   rtc::SafeLt  // <
 16 | //   rtc::SafeLe  // <=
 17 | //   rtc::SafeGt  // >
 18 | //   rtc::SafeGe  // >=
 19 | //
 20 | // They each accept two arguments of arbitrary types, and in almost all cases,
 21 | // they simply call the appropriate comparison operator. However, if both
 22 | // arguments are integers, they don't compare them using C++'s quirky rules,
 23 | // but instead adhere to the true mathematical definitions. It is as if the
 24 | // arguments were first converted to infinite-range signed integers, and then
 25 | // compared, although of course nothing expensive like that actually takes
 26 | // place. In practice, for signed/signed and unsigned/unsigned comparisons and
 27 | // some mixed-signed comparisons with a compile-time constant, the overhead is
 28 | // zero; in the remaining cases, it is just a few machine instructions (no
 29 | // branches).
 30 | 
 31 | #ifndef RTC_BASE_NUMERICS_SAFE_COMPARE_H_
 32 | #define RTC_BASE_NUMERICS_SAFE_COMPARE_H_
 33 | 
 34 | #include <stddef.h>
 35 | #include <stdint.h>
 36 | 
 37 | #include <type_traits>
 38 | #include <utility>
 39 | 
 40 | #include "webrtc/rtc_base/type_traits.h"
 41 | 
 42 | namespace rtc {
 43 | 
 44 | namespace safe_cmp_impl {
 45 | 
 46 | template <size_t N>
 47 | struct LargerIntImpl : std::false_type {};
 48 | template <>
 49 | struct LargerIntImpl<sizeof(int8_t)> : std::true_type {
 50 |   using type = int16_t;
 51 | };
 52 | template <>
 53 | struct LargerIntImpl<sizeof(int16_t)> : std::true_type {
 54 |   using type = int32_t;
 55 | };
 56 | template <>
 57 | struct LargerIntImpl<sizeof(int32_t)> : std::true_type {
 58 |   using type = int64_t;
 59 | };
 60 | 
 61 | // LargerInt<T1, T2>::value is true iff there's a signed type that's larger
 62 | // than T1 (and no larger than the larger of T2 and int*, for performance
 63 | // reasons); and if there is such a type, LargerInt<T1, T2>::type is an alias
 64 | // for it.
 65 | template <typename T1, typename T2>
 66 | struct LargerInt
 67 |     : LargerIntImpl<sizeof(T1) < sizeof(T2) || sizeof(T1) < sizeof(int*)
 68 |                         ? sizeof(T1)
 69 |                         : 0> {};
 70 | 
 71 | template <typename T>
 72 | constexpr typename std::make_unsigned<T>::type MakeUnsigned(T a) {
 73 |   return static_cast<typename std::make_unsigned<T>::type>(a);
 74 | }
 75 | 
 76 | // Overload for when both T1 and T2 have the same signedness.
 77 | template <typename Op,
 78 |           typename T1,
 79 |           typename T2,
 80 |           typename std::enable_if<std::is_signed<T1>::value ==
 81 |                                   std::is_signed<T2>::value>::type* = nullptr>
 82 | constexpr bool Cmp(T1 a, T2 b) {
 83 |   return Op::Op(a, b);
 84 | }
 85 | 
 86 | // Overload for signed - unsigned comparison that can be promoted to a bigger
 87 | // signed type.
 88 | template <typename Op,
 89 |           typename T1,
 90 |           typename T2,
 91 |           typename std::enable_if<std::is_signed<T1>::value &&
 92 |                                   std::is_unsigned<T2>::value &&
 93 |                                   LargerInt<T2, T1>::value>::type* = nullptr>
 94 | constexpr bool Cmp(T1 a, T2 b) {
 95 |   return Op::Op(a, static_cast<typename LargerInt<T2, T1>::type>(b));
 96 | }
 97 | 
 98 | // Overload for unsigned - signed comparison that can be promoted to a bigger
 99 | // signed type.
100 | template <typename Op,
101 |           typename T1,
102 |           typename T2,
103 |           typename std::enable_if<std::is_unsigned<T1>::value &&
104 |                                   std::is_signed<T2>::value &&
105 |                                   LargerInt<T1, T2>::value>::type* = nullptr>
106 | constexpr bool Cmp(T1 a, T2 b) {
107 |   return Op::Op(static_cast<typename LargerInt<T1, T2>::type>(a), b);
108 | }
109 | 
110 | // Overload for signed - unsigned comparison that can't be promoted to a bigger
111 | // signed type.
112 | template <typename Op,
113 |           typename T1,
114 |           typename T2,
115 |           typename std::enable_if<std::is_signed<T1>::value &&
116 |                                   std::is_unsigned<T2>::value &&
117 |                                   !LargerInt<T2, T1>::value>::type* = nullptr>
118 | constexpr bool Cmp(T1 a, T2 b) {
119 |   return a < 0 ? Op::Op(-1, 0) : Op::Op(safe_cmp_impl::MakeUnsigned(a), b);
120 | }
121 | 
122 | // Overload for unsigned - signed comparison that can't be promoted to a bigger
123 | // signed type.
124 | template <typename Op,
125 |           typename T1,
126 |           typename T2,
127 |           typename std::enable_if<std::is_unsigned<T1>::value &&
128 |                                   std::is_signed<T2>::value &&
129 |                                   !LargerInt<T1, T2>::value>::type* = nullptr>
130 | constexpr bool Cmp(T1 a, T2 b) {
131 |   return b < 0 ? Op::Op(0, -1) : Op::Op(a, safe_cmp_impl::MakeUnsigned(b));
132 | }
133 | 
134 | #define RTC_SAFECMP_MAKE_OP(name, op)      \
135 |   struct name {                            \
136 |     template <typename T1, typename T2>    \
137 |     static constexpr bool Op(T1 a, T2 b) { \
138 |       return a op b;                       \
139 |     }                                      \
140 |   };
141 | RTC_SAFECMP_MAKE_OP(EqOp, ==)
142 | RTC_SAFECMP_MAKE_OP(NeOp, !=)
143 | RTC_SAFECMP_MAKE_OP(LtOp, <)
144 | RTC_SAFECMP_MAKE_OP(LeOp, <=)
145 | RTC_SAFECMP_MAKE_OP(GtOp, >)
146 | RTC_SAFECMP_MAKE_OP(GeOp, >=)
147 | #undef RTC_SAFECMP_MAKE_OP
148 | 
149 | }  // namespace safe_cmp_impl
150 | 
151 | #define RTC_SAFECMP_MAKE_FUN(name)                                            \
152 |   template <typename T1, typename T2>                                         \
153 |   constexpr                                                                   \
154 |       typename std::enable_if<IsIntlike<T1>::value && IsIntlike<T2>::value,   \
155 |                               bool>::type Safe##name(T1 a, T2 b) {            \
156 |     /* Unary plus here turns enums into real integral types. */               \
157 |     return safe_cmp_impl::Cmp<safe_cmp_impl::name##Op>(+a, +b);               \
158 |   }                                                                           \
159 |   template <typename T1, typename T2>                                         \
160 |   constexpr                                                                   \
161 |       typename std::enable_if<!IsIntlike<T1>::value || !IsIntlike<T2>::value, \
162 |                               bool>::type Safe##name(const T1& a,             \
163 |                                                      const T2& b) {           \
164 |     return safe_cmp_impl::name##Op::Op(a, b);                                 \
165 |   }
166 | RTC_SAFECMP_MAKE_FUN(Eq)
167 | RTC_SAFECMP_MAKE_FUN(Ne)
168 | RTC_SAFECMP_MAKE_FUN(Lt)
169 | RTC_SAFECMP_MAKE_FUN(Le)
170 | RTC_SAFECMP_MAKE_FUN(Gt)
171 | RTC_SAFECMP_MAKE_FUN(Ge)
172 | #undef RTC_SAFECMP_MAKE_FUN
173 | 
174 | }  // namespace rtc
175 | 
176 | #endif  // RTC_BASE_NUMERICS_SAFE_COMPARE_H_
177 | 


--------------------------------------------------------------------------------
/lib/src/chunk_processor.cpp:
--------------------------------------------------------------------------------
  1 | #include <algorithm>
  2 | #include <chrono>
  3 | #include <climits>
  4 | #include <cmath>
  5 | #include <iostream>
  6 | #include <memory>
  7 | 
  8 | #include "chunk_processor.h"
  9 | 
 10 | namespace speechrecorder {
 11 | 
 12 | static std::mutex ortMutex_;
 13 | static std::unique_ptr<Ort::Env> ortEnv_;
 14 | static std::unique_ptr<Ort::MemoryInfo> ortMemory_;
 15 | static std::unique_ptr<Ort::Session> ortSession_;
 16 | 
 17 | ChunkProcessor::ChunkProcessor(std::string modelPath,
 18 |                                ChunkProcessorOptions options)
 19 |     : options_(options),
 20 |       queue_(),
 21 |       stopped_(false),
 22 |       microphone_(options.device, options.samplesPerFrame, options.sampleRate,
 23 |                   &queue_),
 24 |       webrtcVad_(options.webrtcVadLevel, options.sampleRate) {
 25 |   queueThread_ = std::thread([&, modelPath] {
 26 |     ortMutex_.lock();
 27 |     if (!ortSession_) {
 28 |       ortEnv_ = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING,
 29 |                                            "SpeechRecorder::ChunkProcessor");
 30 |       ortMemory_ = std::make_unique<Ort::MemoryInfo>(Ort::MemoryInfo::CreateCpu(
 31 |           OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault));
 32 | 
 33 |       Ort::SessionOptions sessionOptions;
 34 |       sessionOptions.SetIntraOpNumThreads(1);
 35 | #ifdef _WIN32
 36 |       std::wstring wstring(modelPath.begin(), modelPath.end());
 37 |       ortSession_ = std::make_unique<Ort::Session>(*ortEnv_, wstring.c_str(),
 38 |                                                    sessionOptions);
 39 | 
 40 | #else
 41 |       ortSession_ = std::make_unique<Ort::Session>(*ortEnv_, modelPath.c_str(),
 42 |                                                    sessionOptions);
 43 | #endif
 44 |     }
 45 |     ortMutex_.unlock();
 46 |     while (true) {
 47 |       short* audio;
 48 |       queue_.wait_dequeue(audio);
 49 |       // null pointer means the destructor wants us to stop the thread.
 50 |       if (audio == nullptr) {
 51 |         return;
 52 |       }
 53 |       if (!stopped_) {
 54 |         Process(audio);
 55 |       }
 56 |     }
 57 |   });
 58 | }
 59 | 
 60 | ChunkProcessor::~ChunkProcessor() {
 61 |   // shutdown the queue thread.
 62 |   stopped_ = true;
 63 |   queue_.enqueue(nullptr); 
 64 |   queueThread_.join();
 65 | 
 66 |   if (stopThread_.joinable()) {
 67 |     stopThread_.join();
 68 |   }
 69 |   if (startThread_.joinable()) {
 70 |     startThread_.join();
 71 |   }
 72 | }
 73 | 
 74 | void ChunkProcessor::Process(short* input) {
 75 |   std::vector<short> frame;
 76 |   const short* iterator = (const short*)input;
 77 |   unsigned long long sum = 0;
 78 |   for (unsigned long i = 0; i < options_.samplesPerFrame; i++) {
 79 |     const short value = *iterator++;
 80 |     frame.push_back(value);
 81 |     leadingBuffer_.push_back(value);
 82 |     sileroBuffer_.push_back((float)value / (float)SHRT_MAX);
 83 |     webrtcVadBuffer_.push_back(value);
 84 |     sum += value * value;
 85 |   }
 86 | 
 87 |   double volume = sqrt((double)sum / (double)options_.samplesPerFrame);
 88 |   if (leadingBuffer_.size() >
 89 |       options_.leadingBufferFrames * options_.samplesPerFrame) {
 90 |     leadingBuffer_.erase(
 91 |         leadingBuffer_.begin(),
 92 |         leadingBuffer_.begin() +
 93 |             (leadingBuffer_.size() -
 94 |              (options_.leadingBufferFrames * options_.samplesPerFrame)));
 95 |   }
 96 | 
 97 |   if (sileroBuffer_.size() > options_.sileroVadBufferSize) {
 98 |     sileroBuffer_.erase(sileroBuffer_.begin(),
 99 |                         sileroBuffer_.begin() + (sileroBuffer_.size() -
100 |                                                  options_.sileroVadBufferSize));
101 |   }
102 | 
103 |   // typically, the number of samples per frame will be larger than the
104 |   // webrtcvad buffer size, so continually append the new audio to the end of
105 |   // the buffer, and process the buffer from left to right until it's too small
106 |   // for a webrtcvad call
107 |   while (webrtcVadBuffer_.size() >= options_.webrtcVadBufferSize) {
108 |     std::vector<short> buffer(
109 |         webrtcVadBuffer_.begin(),
110 |         webrtcVadBuffer_.begin() + options_.webrtcVadBufferSize);
111 |     webrtcVadResults_.push_back(
112 |         webrtcVad_.Process(buffer.data(), options_.webrtcVadBufferSize));
113 |     webrtcVadBuffer_.erase(
114 |         webrtcVadBuffer_.begin(),
115 |         webrtcVadBuffer_.begin() + options_.webrtcVadBufferSize);
116 |   }
117 | 
118 |   if (webrtcVadResults_.size() > options_.webrtcVadResultsSize) {
119 |     webrtcVadResults_.erase(
120 |         webrtcVadResults_.begin(),
121 |         webrtcVadResults_.begin() +
122 |             (webrtcVadResults_.size() - options_.webrtcVadResultsSize));
123 |   }
124 | 
125 |   if (framesUntilSileroVad_ > 0) {
126 |     framesUntilSileroVad_--;
127 |   }
128 | 
129 |   // if we're speaking or any past webrtcvad result within the window is true,
130 |   // then use the result from the silero vad
131 |   double probability = 0.0;
132 |   if (speaking_ || webrtcVadResults_.size() != options_.webrtcVadResultsSize ||
133 |       std::any_of(webrtcVadResults_.begin(), webrtcVadResults_.end(),
134 |                   [](bool e) { return e; })) {
135 |     if (framesUntilSileroVad_ == 0) {
136 |       framesUntilSileroVad_ = options_.sileroVadRateLimit;
137 | 
138 |       std::vector<int64_t> inputDimensions;
139 |       inputDimensions.push_back(1);
140 |       inputDimensions.push_back(sileroBuffer_.size());
141 | 
142 |       std::vector<Ort::Value> inputTensors;
143 |       inputTensors.push_back(Ort::Value::CreateTensor<float>(
144 |           *ortMemory_, sileroBuffer_.data(), sileroBuffer_.size(),
145 |           inputDimensions.data(), inputDimensions.size()));
146 | 
147 |       std::vector<float> outputTensorValues(2);
148 |       std::vector<int64_t> outputDimensions;
149 |       outputDimensions.push_back(1);
150 |       outputDimensions.push_back(2);
151 | 
152 |       std::vector<Ort::Value> outputTensors;
153 |       outputTensors.push_back(Ort::Value::CreateTensor<float>(
154 |           *ortMemory_, outputTensorValues.data(), outputTensorValues.size(),
155 |           outputDimensions.data(), outputDimensions.size()));
156 | 
157 |       std::vector<const char*> inputNames{"input"};
158 |       std::vector<const char*> outputNames{"output"};
159 |       ortSession_->Run(Ort::RunOptions{nullptr}, inputNames.data(),
160 |                        inputTensors.data(), 1, outputNames.data(),
161 |                        outputTensors.data(), 1);
162 | 
163 |       sileroVadProbability_ = outputTensorValues[1];
164 |     }
165 | 
166 |     probability = sileroVadProbability_;
167 |   }
168 | 
169 |   bool speaking = speaking_ ? probability > options_.sileroVadSilenceThreshold
170 |                             : probability > options_.sileroVadSpeakingThreshold;
171 |   if (speaking) {
172 |     consecutiveSilence_ = 0;
173 |     consecutiveSpeaking_++;
174 |   } else {
175 |     consecutiveSilence_++;
176 |     consecutiveSpeaking_ = 0;
177 |   }
178 | 
179 |   if (!speaking_ &&
180 |       consecutiveSpeaking_ == options_.consecutiveFramesForSpeaking) {
181 |     speaking_ = true;
182 |     if (options_.onChunkStart != nullptr) {
183 |       options_.onChunkStart(leadingBuffer_);
184 |     }
185 |   }
186 | 
187 |   if (options_.onAudio != nullptr) {
188 |     options_.onAudio(frame, speaking_, volume, speaking, probability,
189 |                      consecutiveSilence_);
190 |   }
191 | 
192 |   if (speaking_ &&
193 |       consecutiveSilence_ == options_.consecutiveFramesForSilence) {
194 |     speaking_ = false;
195 |     leadingBuffer_.clear();
196 |     if (options_.onChunkEnd != nullptr) {
197 |       options_.onChunkEnd();
198 |     }
199 |   }
200 | }
201 | 
202 | void ChunkProcessor::Reset() {
203 |   consecutiveSilence_ = 0;
204 |   consecutiveSpeaking_ = 0;
205 |   framesUntilSileroVad_ = 0;
206 |   leadingBuffer_.clear();
207 |   speaking_ = false;
208 |   webrtcVad_.Reset();
209 |   webrtcVadBuffer_.clear();
210 |   webrtcVadResults_.clear();
211 |   short* audio;
212 |   while (queue_.try_dequeue(audio)) {
213 |   }
214 | }
215 | 
216 | void ChunkProcessor::Start() {
217 |   toggleLock_.lock();
218 |   startThread_ = std::thread([&] {
219 |     Reset();
220 |     microphone_.Start();
221 |     stopped_ = false;
222 |     toggleLock_.unlock();
223 |   });
224 | }
225 | 
226 | void ChunkProcessor::Stop() {
227 |   toggleLock_.lock();
228 |   stopThread_ = std::thread([&] {
229 |     stopped_ = true;
230 |     microphone_.Stop();
231 |     toggleLock_.unlock();
232 |   });
233 | }
234 | 
235 | }  // namespace speechrecorder
236 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/resample_fractional.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains the resampling functions between 48, 44, 32 and 24 kHz.
 14 |  * The description headers can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 19 | 
 20 | // interpolation coefficients
 21 | static const int16_t kCoefficients48To32[2][8] = {
 22 |         {778, -2050, 1087, 23285, 12903, -3783, 441, 222},
 23 |         {222, 441, -3783, 12903, 23285, 1087, -2050, 778}
 24 | };
 25 | 
 26 | static const int16_t kCoefficients32To24[3][8] = {
 27 |         {767, -2362, 2434, 24406, 10620, -3838, 721, 90},
 28 |         {386, -381, -2646, 19062, 19062, -2646, -381, 386},
 29 |         {90, 721, -3838, 10620, 24406, 2434, -2362, 767}
 30 | };
 31 | 
 32 | static const int16_t kCoefficients44To32[4][9] = {
 33 |         {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
 34 |         {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
 35 |         {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
 36 |         {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
 37 | };
 38 | 
 39 | //   Resampling ratio: 2/3
 40 | // input:  int32_t (normalized, not saturated) :: size 3 * K
 41 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
 42 | //      K: number of blocks
 43 | 
 44 | void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
 45 | {
 46 |     /////////////////////////////////////////////////////////////
 47 |     // Filter operation:
 48 |     //
 49 |     // Perform resampling (3 input samples -> 2 output samples);
 50 |     // process in sub blocks of size 3 samples.
 51 |     int32_t tmp;
 52 |     size_t m;
 53 | 
 54 |     for (m = 0; m < K; m++)
 55 |     {
 56 |         tmp = 1 << 14;
 57 |         tmp += kCoefficients48To32[0][0] * In[0];
 58 |         tmp += kCoefficients48To32[0][1] * In[1];
 59 |         tmp += kCoefficients48To32[0][2] * In[2];
 60 |         tmp += kCoefficients48To32[0][3] * In[3];
 61 |         tmp += kCoefficients48To32[0][4] * In[4];
 62 |         tmp += kCoefficients48To32[0][5] * In[5];
 63 |         tmp += kCoefficients48To32[0][6] * In[6];
 64 |         tmp += kCoefficients48To32[0][7] * In[7];
 65 |         Out[0] = tmp;
 66 | 
 67 |         tmp = 1 << 14;
 68 |         tmp += kCoefficients48To32[1][0] * In[1];
 69 |         tmp += kCoefficients48To32[1][1] * In[2];
 70 |         tmp += kCoefficients48To32[1][2] * In[3];
 71 |         tmp += kCoefficients48To32[1][3] * In[4];
 72 |         tmp += kCoefficients48To32[1][4] * In[5];
 73 |         tmp += kCoefficients48To32[1][5] * In[6];
 74 |         tmp += kCoefficients48To32[1][6] * In[7];
 75 |         tmp += kCoefficients48To32[1][7] * In[8];
 76 |         Out[1] = tmp;
 77 | 
 78 |         // update pointers
 79 |         In += 3;
 80 |         Out += 2;
 81 |     }
 82 | }
 83 | 
 84 | //   Resampling ratio: 3/4
 85 | // input:  int32_t (normalized, not saturated) :: size 4 * K
 86 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
 87 | //      K: number of blocks
 88 | 
 89 | void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
 90 | {
 91 |     /////////////////////////////////////////////////////////////
 92 |     // Filter operation:
 93 |     //
 94 |     // Perform resampling (4 input samples -> 3 output samples);
 95 |     // process in sub blocks of size 4 samples.
 96 |     size_t m;
 97 |     int32_t tmp;
 98 | 
 99 |     for (m = 0; m < K; m++)
100 |     {
101 |         tmp = 1 << 14;
102 |         tmp += kCoefficients32To24[0][0] * In[0];
103 |         tmp += kCoefficients32To24[0][1] * In[1];
104 |         tmp += kCoefficients32To24[0][2] * In[2];
105 |         tmp += kCoefficients32To24[0][3] * In[3];
106 |         tmp += kCoefficients32To24[0][4] * In[4];
107 |         tmp += kCoefficients32To24[0][5] * In[5];
108 |         tmp += kCoefficients32To24[0][6] * In[6];
109 |         tmp += kCoefficients32To24[0][7] * In[7];
110 |         Out[0] = tmp;
111 | 
112 |         tmp = 1 << 14;
113 |         tmp += kCoefficients32To24[1][0] * In[1];
114 |         tmp += kCoefficients32To24[1][1] * In[2];
115 |         tmp += kCoefficients32To24[1][2] * In[3];
116 |         tmp += kCoefficients32To24[1][3] * In[4];
117 |         tmp += kCoefficients32To24[1][4] * In[5];
118 |         tmp += kCoefficients32To24[1][5] * In[6];
119 |         tmp += kCoefficients32To24[1][6] * In[7];
120 |         tmp += kCoefficients32To24[1][7] * In[8];
121 |         Out[1] = tmp;
122 | 
123 |         tmp = 1 << 14;
124 |         tmp += kCoefficients32To24[2][0] * In[2];
125 |         tmp += kCoefficients32To24[2][1] * In[3];
126 |         tmp += kCoefficients32To24[2][2] * In[4];
127 |         tmp += kCoefficients32To24[2][3] * In[5];
128 |         tmp += kCoefficients32To24[2][4] * In[6];
129 |         tmp += kCoefficients32To24[2][5] * In[7];
130 |         tmp += kCoefficients32To24[2][6] * In[8];
131 |         tmp += kCoefficients32To24[2][7] * In[9];
132 |         Out[2] = tmp;
133 | 
134 |         // update pointers
135 |         In += 4;
136 |         Out += 3;
137 |     }
138 | }
139 | 
140 | //
141 | // fractional resampling filters
142 | //   Fout = 11/16 * Fin
143 | //   Fout =  8/11 * Fin
144 | //
145 | 
146 | // compute two inner-products and store them to output array
147 | static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
148 |                                        const int16_t *coef_ptr, int32_t *out1,
149 |                                        int32_t *out2)
150 | {
151 |     int32_t tmp1 = 16384;
152 |     int32_t tmp2 = 16384;
153 |     int16_t coef;
154 | 
155 |     coef = coef_ptr[0];
156 |     tmp1 += coef * in1[0];
157 |     tmp2 += coef * in2[-0];
158 | 
159 |     coef = coef_ptr[1];
160 |     tmp1 += coef * in1[1];
161 |     tmp2 += coef * in2[-1];
162 | 
163 |     coef = coef_ptr[2];
164 |     tmp1 += coef * in1[2];
165 |     tmp2 += coef * in2[-2];
166 | 
167 |     coef = coef_ptr[3];
168 |     tmp1 += coef * in1[3];
169 |     tmp2 += coef * in2[-3];
170 | 
171 |     coef = coef_ptr[4];
172 |     tmp1 += coef * in1[4];
173 |     tmp2 += coef * in2[-4];
174 | 
175 |     coef = coef_ptr[5];
176 |     tmp1 += coef * in1[5];
177 |     tmp2 += coef * in2[-5];
178 | 
179 |     coef = coef_ptr[6];
180 |     tmp1 += coef * in1[6];
181 |     tmp2 += coef * in2[-6];
182 | 
183 |     coef = coef_ptr[7];
184 |     tmp1 += coef * in1[7];
185 |     tmp2 += coef * in2[-7];
186 | 
187 |     coef = coef_ptr[8];
188 |     *out1 = tmp1 + coef * in1[8];
189 |     *out2 = tmp2 + coef * in2[-8];
190 | }
191 | 
192 | //   Resampling ratio: 8/11
193 | // input:  int32_t (normalized, not saturated) :: size 11 * K
194 | // output: int32_t (shifted 15 positions to the left, + offset 16384) :: size  8 * K
195 | //      K: number of blocks
196 | 
197 | void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
198 | {
199 |     /////////////////////////////////////////////////////////////
200 |     // Filter operation:
201 |     //
202 |     // Perform resampling (11 input samples -> 8 output samples);
203 |     // process in sub blocks of size 11 samples.
204 |     int32_t tmp;
205 |     size_t m;
206 | 
207 |     for (m = 0; m < K; m++)
208 |     {
209 |         tmp = 1 << 14;
210 | 
211 |         // first output sample
212 |         Out[0] = ((int32_t)In[3] << 15) + tmp;
213 | 
214 |         // sum and accumulate filter coefficients and input samples
215 |         tmp += kCoefficients44To32[3][0] * In[5];
216 |         tmp += kCoefficients44To32[3][1] * In[6];
217 |         tmp += kCoefficients44To32[3][2] * In[7];
218 |         tmp += kCoefficients44To32[3][3] * In[8];
219 |         tmp += kCoefficients44To32[3][4] * In[9];
220 |         tmp += kCoefficients44To32[3][5] * In[10];
221 |         tmp += kCoefficients44To32[3][6] * In[11];
222 |         tmp += kCoefficients44To32[3][7] * In[12];
223 |         tmp += kCoefficients44To32[3][8] * In[13];
224 |         Out[4] = tmp;
225 | 
226 |         // sum and accumulate filter coefficients and input samples
227 |         WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
228 | 
229 |         // sum and accumulate filter coefficients and input samples
230 |         WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
231 | 
232 |         // sum and accumulate filter coefficients and input samples
233 |         WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
234 | 
235 |         // update pointers
236 |         In += 11;
237 |         Out += 8;
238 |     }
239 | }
240 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/complex_fft_tables.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | #ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
 12 | #define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
 13 | 
 14 | #include <stdint.h>
 15 | 
 16 | static const int16_t kSinTable1024[] = {
 17 |     0,      201,    402,    603,    804,    1005,   1206,   1406,   1607,
 18 |     1808,   2009,   2209,   2410,   2610,   2811,   3011,   3211,   3411,
 19 |     3611,   3811,   4011,   4210,   4409,   4608,   4807,   5006,   5205,
 20 |     5403,   5601,   5799,   5997,   6195,   6392,   6589,   6786,   6982,
 21 |     7179,   7375,   7571,   7766,   7961,   8156,   8351,   8545,   8739,
 22 |     8932,   9126,   9319,   9511,   9703,   9895,   10087,  10278,  10469,
 23 |     10659,  10849,  11038,  11227,  11416,  11604,  11792,  11980,  12166,
 24 |     12353,  12539,  12724,  12909,  13094,  13278,  13462,  13645,  13827,
 25 |     14009,  14191,  14372,  14552,  14732,  14911,  15090,  15268,  15446,
 26 |     15623,  15799,  15975,  16150,  16325,  16499,  16672,  16845,  17017,
 27 |     17189,  17360,  17530,  17699,  17868,  18036,  18204,  18371,  18537,
 28 |     18702,  18867,  19031,  19194,  19357,  19519,  19680,  19840,  20000,
 29 |     20159,  20317,  20474,  20631,  20787,  20942,  21096,  21249,  21402,
 30 |     21554,  21705,  21855,  22004,  22153,  22301,  22448,  22594,  22739,
 31 |     22883,  23027,  23169,  23311,  23452,  23592,  23731,  23869,  24006,
 32 |     24143,  24278,  24413,  24546,  24679,  24811,  24942,  25072,  25201,
 33 |     25329,  25456,  25582,  25707,  25831,  25954,  26077,  26198,  26318,
 34 |     26437,  26556,  26673,  26789,  26905,  27019,  27132,  27244,  27355,
 35 |     27466,  27575,  27683,  27790,  27896,  28001,  28105,  28208,  28309,
 36 |     28410,  28510,  28608,  28706,  28802,  28897,  28992,  29085,  29177,
 37 |     29268,  29358,  29446,  29534,  29621,  29706,  29790,  29873,  29955,
 38 |     30036,  30116,  30195,  30272,  30349,  30424,  30498,  30571,  30643,
 39 |     30713,  30783,  30851,  30918,  30984,  31049,  31113,  31175,  31236,
 40 |     31297,  31356,  31413,  31470,  31525,  31580,  31633,  31684,  31735,
 41 |     31785,  31833,  31880,  31926,  31970,  32014,  32056,  32097,  32137,
 42 |     32176,  32213,  32249,  32284,  32318,  32350,  32382,  32412,  32441,
 43 |     32468,  32495,  32520,  32544,  32567,  32588,  32609,  32628,  32646,
 44 |     32662,  32678,  32692,  32705,  32717,  32727,  32736,  32744,  32751,
 45 |     32757,  32761,  32764,  32766,  32767,  32766,  32764,  32761,  32757,
 46 |     32751,  32744,  32736,  32727,  32717,  32705,  32692,  32678,  32662,
 47 |     32646,  32628,  32609,  32588,  32567,  32544,  32520,  32495,  32468,
 48 |     32441,  32412,  32382,  32350,  32318,  32284,  32249,  32213,  32176,
 49 |     32137,  32097,  32056,  32014,  31970,  31926,  31880,  31833,  31785,
 50 |     31735,  31684,  31633,  31580,  31525,  31470,  31413,  31356,  31297,
 51 |     31236,  31175,  31113,  31049,  30984,  30918,  30851,  30783,  30713,
 52 |     30643,  30571,  30498,  30424,  30349,  30272,  30195,  30116,  30036,
 53 |     29955,  29873,  29790,  29706,  29621,  29534,  29446,  29358,  29268,
 54 |     29177,  29085,  28992,  28897,  28802,  28706,  28608,  28510,  28410,
 55 |     28309,  28208,  28105,  28001,  27896,  27790,  27683,  27575,  27466,
 56 |     27355,  27244,  27132,  27019,  26905,  26789,  26673,  26556,  26437,
 57 |     26318,  26198,  26077,  25954,  25831,  25707,  25582,  25456,  25329,
 58 |     25201,  25072,  24942,  24811,  24679,  24546,  24413,  24278,  24143,
 59 |     24006,  23869,  23731,  23592,  23452,  23311,  23169,  23027,  22883,
 60 |     22739,  22594,  22448,  22301,  22153,  22004,  21855,  21705,  21554,
 61 |     21402,  21249,  21096,  20942,  20787,  20631,  20474,  20317,  20159,
 62 |     20000,  19840,  19680,  19519,  19357,  19194,  19031,  18867,  18702,
 63 |     18537,  18371,  18204,  18036,  17868,  17699,  17530,  17360,  17189,
 64 |     17017,  16845,  16672,  16499,  16325,  16150,  15975,  15799,  15623,
 65 |     15446,  15268,  15090,  14911,  14732,  14552,  14372,  14191,  14009,
 66 |     13827,  13645,  13462,  13278,  13094,  12909,  12724,  12539,  12353,
 67 |     12166,  11980,  11792,  11604,  11416,  11227,  11038,  10849,  10659,
 68 |     10469,  10278,  10087,  9895,   9703,   9511,   9319,   9126,   8932,
 69 |     8739,   8545,   8351,   8156,   7961,   7766,   7571,   7375,   7179,
 70 |     6982,   6786,   6589,   6392,   6195,   5997,   5799,   5601,   5403,
 71 |     5205,   5006,   4807,   4608,   4409,   4210,   4011,   3811,   3611,
 72 |     3411,   3211,   3011,   2811,   2610,   2410,   2209,   2009,   1808,
 73 |     1607,   1406,   1206,   1005,   804,    603,    402,    201,    0,
 74 |     -201,   -402,   -603,   -804,   -1005,  -1206,  -1406,  -1607,  -1808,
 75 |     -2009,  -2209,  -2410,  -2610,  -2811,  -3011,  -3211,  -3411,  -3611,
 76 |     -3811,  -4011,  -4210,  -4409,  -4608,  -4807,  -5006,  -5205,  -5403,
 77 |     -5601,  -5799,  -5997,  -6195,  -6392,  -6589,  -6786,  -6982,  -7179,
 78 |     -7375,  -7571,  -7766,  -7961,  -8156,  -8351,  -8545,  -8739,  -8932,
 79 |     -9126,  -9319,  -9511,  -9703,  -9895,  -10087, -10278, -10469, -10659,
 80 |     -10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
 81 |     -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009,
 82 |     -14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623,
 83 |     -15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189,
 84 |     -17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702,
 85 |     -18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159,
 86 |     -20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554,
 87 |     -21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883,
 88 |     -23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
 89 |     -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329,
 90 |     -25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437,
 91 |     -26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466,
 92 |     -27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410,
 93 |     -28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268,
 94 |     -29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036,
 95 |     -30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713,
 96 |     -30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
 97 |     -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785,
 98 |     -31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176,
 99 |     -32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468,
100 |     -32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662,
101 |     -32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757,
102 |     -32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751,
103 |     -32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646,
104 |     -32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
105 |     -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137,
106 |     -32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735,
107 |     -31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236,
108 |     -31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643,
109 |     -30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955,
110 |     -29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177,
111 |     -29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309,
112 |     -28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
113 |     -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318,
114 |     -26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201,
115 |     -25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006,
116 |     -23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739,
117 |     -22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402,
118 |     -21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000,
119 |     -19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537,
120 |     -18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
121 |     -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446,
122 |     -15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827,
123 |     -13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166,
124 |     -11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469,
125 |     -10278, -10087, -9895,  -9703,  -9511,  -9319,  -9126,  -8932,  -8739,
126 |     -8545,  -8351,  -8156,  -7961,  -7766,  -7571,  -7375,  -7179,  -6982,
127 |     -6786,  -6589,  -6392,  -6195,  -5997,  -5799,  -5601,  -5403,  -5205,
128 |     -5006,  -4807,  -4608,  -4409,  -4210,  -4011,  -3811,  -3611,  -3411,
129 |     -3211,  -3011,  -2811,  -2610,  -2410,  -2209,  -2009,  -1808,  -1607,
130 |     -1406,  -1206,  -1005,  -804,   -603,   -402,   -201};
131 | 
132 | #endif  // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
133 | 


--------------------------------------------------------------------------------
/lib/3rd_party/webrtcvad/webrtc/common_audio/signal_processing/complex_fft.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
  3 |  *
  4 |  *  Use of this source code is governed by a BSD-style license
  5 |  *  that can be found in the LICENSE file in the root of the source
  6 |  *  tree. An additional intellectual property rights grant can be found
  7 |  *  in the file PATENTS.  All contributing project authors may
  8 |  *  be found in the AUTHORS file in the root of the source tree.
  9 |  */
 10 | 
 11 | 
 12 | /*
 13 |  * This file contains the function WebRtcSpl_ComplexFFT().
 14 |  * The description header can be found in signal_processing_library.h
 15 |  *
 16 |  */
 17 | 
 18 | #include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
 19 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 20 | #include "webrtc/rtc_base/system/arch.h"
 21 | 
 22 | #define CFFTSFT 14
 23 | #define CFFTRND 1
 24 | #define CFFTRND2 16384
 25 | 
 26 | #define CIFFTSFT 14
 27 | #define CIFFTRND 1
 28 | 
 29 | 
 30 | int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
 31 | {
 32 |     int i, j, l, k, istep, n, m;
 33 |     int16_t wr, wi;
 34 |     int32_t tr32, ti32, qr32, qi32;
 35 | 
 36 |     /* The 1024-value is a constant given from the size of kSinTable1024[],
 37 |      * and should not be changed depending on the input parameter 'stages'
 38 |      */
 39 |     n = 1 << stages;
 40 |     if (n > 1024)
 41 |         return -1;
 42 | 
 43 |     l = 1;
 44 |     k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
 45 |          depending on the input parameter 'stages' */
 46 | 
 47 |     if (mode == 0)
 48 |     {
 49 |         // mode==0: Low-complexity and Low-accuracy mode
 50 |         while (l < n)
 51 |         {
 52 |             istep = l << 1;
 53 | 
 54 |             for (m = 0; m < l; ++m)
 55 |             {
 56 |                 j = m << k;
 57 | 
 58 |                 /* The 256-value is a constant given as 1/4 of the size of
 59 |                  * kSinTable1024[], and should not be changed depending on the input
 60 |                  * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
 61 |                  */
 62 |                 wr = kSinTable1024[j + 256];
 63 |                 wi = -kSinTable1024[j];
 64 | 
 65 |                 for (i = m; i < n; i += istep)
 66 |                 {
 67 |                     j = i + l;
 68 | 
 69 |                     tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
 70 | 
 71 |                     ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
 72 | 
 73 |                     qr32 = (int32_t)frfi[2 * i];
 74 |                     qi32 = (int32_t)frfi[2 * i + 1];
 75 |                     frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
 76 |                     frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
 77 |                     frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
 78 |                     frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
 79 |                 }
 80 |             }
 81 | 
 82 |             --k;
 83 |             l = istep;
 84 | 
 85 |         }
 86 | 
 87 |     } else
 88 |     {
 89 |         // mode==1: High-complexity and High-accuracy mode
 90 |         while (l < n)
 91 |         {
 92 |             istep = l << 1;
 93 | 
 94 |             for (m = 0; m < l; ++m)
 95 |             {
 96 |                 j = m << k;
 97 | 
 98 |                 /* The 256-value is a constant given as 1/4 of the size of
 99 |                  * kSinTable1024[], and should not be changed depending on the input
100 |                  * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
101 |                  */
102 |                 wr = kSinTable1024[j + 256];
103 |                 wi = -kSinTable1024[j];
104 | 
105 | #ifdef WEBRTC_ARCH_ARM_V7
106 |                 int32_t wri = 0;
107 |                 __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
108 |                     "r"((int32_t)wr), "r"((int32_t)wi));
109 | #endif
110 | 
111 |                 for (i = m; i < n; i += istep)
112 |                 {
113 |                     j = i + l;
114 | 
115 | #ifdef WEBRTC_ARCH_ARM_V7
116 |                     register int32_t frfi_r;
117 |                     __asm __volatile(
118 |                         "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
119 |                         " lsl #16\n\t"
120 |                         "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
121 |                         "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
122 |                         :[frfi_r]"=&r"(frfi_r),
123 |                          [tr32]"=&r"(tr32),
124 |                          [ti32]"=r"(ti32)
125 |                         :[frfi_even]"r"((int32_t)frfi[2*j]),
126 |                          [frfi_odd]"r"((int32_t)frfi[2*j +1]),
127 |                          [wri]"r"(wri),
128 |                          [cfftrnd]"r"(CFFTRND));
129 | #else
130 |                     tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
131 | 
132 |                     ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
133 | #endif
134 | 
135 |                     tr32 >>= 15 - CFFTSFT;
136 |                     ti32 >>= 15 - CFFTSFT;
137 | 
138 |                     qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
139 |                     qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
140 | 
141 |                     frfi[2 * j] = (int16_t)(
142 |                         (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
143 |                     frfi[2 * j + 1] = (int16_t)(
144 |                         (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
145 |                     frfi[2 * i] = (int16_t)(
146 |                         (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
147 |                     frfi[2 * i + 1] = (int16_t)(
148 |                         (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
149 |                 }
150 |             }
151 | 
152 |             --k;
153 |             l = istep;
154 |         }
155 |     }
156 |     return 0;
157 | }
158 | 
159 | int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
160 | {
161 |     size_t i, j, l, istep, n, m;
162 |     int k, scale, shift;
163 |     int16_t wr, wi;
164 |     int32_t tr32, ti32, qr32, qi32;
165 |     int32_t tmp32, round2;
166 | 
167 |     /* The 1024-value is a constant given from the size of kSinTable1024[],
168 |      * and should not be changed depending on the input parameter 'stages'
169 |      */
170 |     n = ((size_t)1) << stages;
171 |     if (n > 1024)
172 |         return -1;
173 | 
174 |     scale = 0;
175 | 
176 |     l = 1;
177 |     k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
178 |          depending on the input parameter 'stages' */
179 | 
180 |     while (l < n)
181 |     {
182 |         // variable scaling, depending upon data
183 |         shift = 0;
184 |         round2 = 8192;
185 | 
186 |         tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
187 |         if (tmp32 > 13573)
188 |         {
189 |             shift++;
190 |             scale++;
191 |             round2 <<= 1;
192 |         }
193 |         if (tmp32 > 27146)
194 |         {
195 |             shift++;
196 |             scale++;
197 |             round2 <<= 1;
198 |         }
199 | 
200 |         istep = l << 1;
201 | 
202 |         if (mode == 0)
203 |         {
204 |             // mode==0: Low-complexity and Low-accuracy mode
205 |             for (m = 0; m < l; ++m)
206 |             {
207 |                 j = m << k;
208 | 
209 |                 /* The 256-value is a constant given as 1/4 of the size of
210 |                  * kSinTable1024[], and should not be changed depending on the input
211 |                  * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
212 |                  */
213 |                 wr = kSinTable1024[j + 256];
214 |                 wi = kSinTable1024[j];
215 | 
216 |                 for (i = m; i < n; i += istep)
217 |                 {
218 |                     j = i + l;
219 | 
220 |                     tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
221 | 
222 |                     ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
223 | 
224 |                     qr32 = (int32_t)frfi[2 * i];
225 |                     qi32 = (int32_t)frfi[2 * i + 1];
226 |                     frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
227 |                     frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
228 |                     frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
229 |                     frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
230 |                 }
231 |             }
232 |         } else
233 |         {
234 |             // mode==1: High-complexity and High-accuracy mode
235 | 
236 |             for (m = 0; m < l; ++m)
237 |             {
238 |                 j = m << k;
239 | 
240 |                 /* The 256-value is a constant given as 1/4 of the size of
241 |                  * kSinTable1024[], and should not be changed depending on the input
242 |                  * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
243 |                  */
244 |                 wr = kSinTable1024[j + 256];
245 |                 wi = kSinTable1024[j];
246 | 
247 | #ifdef WEBRTC_ARCH_ARM_V7
248 |                 int32_t wri = 0;
249 |                 __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
250 |                     "r"((int32_t)wr), "r"((int32_t)wi));
251 | #endif
252 | 
253 |                 for (i = m; i < n; i += istep)
254 |                 {
255 |                     j = i + l;
256 | 
257 | #ifdef WEBRTC_ARCH_ARM_V7
258 |                     register int32_t frfi_r;
259 |                     __asm __volatile(
260 |                       "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
261 |                       "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
262 |                       "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
263 |                       :[frfi_r]"=&r"(frfi_r),
264 |                        [tr32]"=&r"(tr32),
265 |                        [ti32]"=r"(ti32)
266 |                       :[frfi_even]"r"((int32_t)frfi[2*j]),
267 |                        [frfi_odd]"r"((int32_t)frfi[2*j +1]),
268 |                        [wri]"r"(wri),
269 |                        [cifftrnd]"r"(CIFFTRND)
270 |                     );
271 | #else
272 | 
273 |                     tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
274 | 
275 |                     ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
276 | #endif
277 |                     tr32 >>= 15 - CIFFTSFT;
278 |                     ti32 >>= 15 - CIFFTSFT;
279 | 
280 |                     qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
281 |                     qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
282 | 
283 |                     frfi[2 * j] = (int16_t)(
284 |                         (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
285 |                     frfi[2 * j + 1] = (int16_t)(
286 |                         (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
287 |                     frfi[2 * i] = (int16_t)(
288 |                         (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
289 |                     frfi[2 * i + 1] = (int16_t)(
290 |                         (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
291 |                 }
292 |             }
293 | 
294 |         }
295 |         --k;
296 |         l = istep;
297 |     }
298 |     return scale;
299 | }
300 | 


--------------------------------------------------------------------------------
/src/speech_recorder.cpp:
--------------------------------------------------------------------------------
  1 | #include <napi.h>
  2 | 
  3 | #include <atomic>
  4 | #include <chrono>
  5 | #include <memory>
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | #include "chunk_processor.h"
 10 | #include "devices.h"
 11 | #include "portaudio.h"
 12 | #include "speech_recorder.h"
 13 | 
 14 | #define DR_WAV_IMPLEMENTATION
 15 | #include "dr_wav.h"
 16 | 
 17 | Napi::Object SpeechRecorder::Init(Napi::Env env, Napi::Object exports) {
 18 |   Napi::Function f = DefineClass(
 19 |       env, "SpeechRecorder",
 20 |       {
 21 |           InstanceMethod<&SpeechRecorder::ProcessFile>(
 22 |               "processFile", static_cast<napi_property_attributes>(
 23 |                                  napi_writable | napi_configurable)),
 24 |           InstanceMethod<&SpeechRecorder::Start>(
 25 |               "start", static_cast<napi_property_attributes>(
 26 |                            napi_writable | napi_configurable)),
 27 |           InstanceMethod<&SpeechRecorder::Stop>(
 28 |               "stop", static_cast<napi_property_attributes>(napi_writable |
 29 |                                                             napi_configurable)),
 30 |       });
 31 | 
 32 |   Napi::FunctionReference* constructor = new Napi::FunctionReference();
 33 |   *constructor = Napi::Persistent(f);
 34 | 
 35 |   exports.Set("SpeechRecorder", f);
 36 |   env.SetInstanceData<Napi::FunctionReference>(constructor);
 37 | 
 38 |   exports.Set(Napi::String::New(env, "devices"),
 39 |               Napi::Function::New(env, GetDevices));
 40 |   return exports;
 41 | }
 42 | 
 43 | SpeechRecorder::SpeechRecorder(const Napi::CallbackInfo& info)
 44 |     : Napi::ObjectWrap<SpeechRecorder>(info),
 45 |       stopped_(true),
 46 |       queue_(),
 47 |       callback_(Napi::Persistent(info[1].As<Napi::Function>())),
 48 |       threadSafeFunctionCallback_([&](Napi::Env env, Napi::Function jsCallback,
 49 |                                       SpeechRecorderCallbackData* data) {
 50 |         Napi::Object object = Napi::Object::New(env);
 51 |         object.Set("speaking", Napi::Boolean::New(env, data->speaking));
 52 |         object.Set("volume", Napi::Number::New(env, data->volume));
 53 |         object.Set("speech", Napi::Boolean::New(env, data->speech));
 54 |         object.Set("probability", Napi::Number::New(env, data->probability));
 55 |         object.Set("consecutiveSilence",
 56 |                    Napi::Number::New(env, (double)data->consecutiveSilence));
 57 | 
 58 |         if (data->audio.size() > 0) {
 59 |           Napi::Int16Array buffer =
 60 |               Napi::Int16Array::New(env, data->audio.size());
 61 |           for (size_t i = 0; i < data->audio.size(); i++) {
 62 |             buffer[i] = data->audio[i];
 63 |           }
 64 | 
 65 |           object.Set("audio", buffer);
 66 |         }
 67 | 
 68 |         jsCallback.Call({Napi::String::New(env, data->event), object});
 69 |         delete data;
 70 |       }),
 71 |       modelPath_(info[0].As<Napi::String>().Utf8Value()),
 72 |       options_({
 73 |           info[2]
 74 |               .As<Napi::Object>()
 75 |               .Get("consecutiveFramesForSilence")
 76 |               .As<Napi::Number>()
 77 |               .Int32Value(),
 78 |           info[2]
 79 |               .As<Napi::Object>()
 80 |               .Get("consecutiveFramesForSpeaking")
 81 |               .As<Napi::Number>()
 82 |               .Int32Value(),
 83 |           info[2]
 84 |               .As<Napi::Object>()
 85 |               .Get("device")
 86 |               .As<Napi::Number>()
 87 |               .Int32Value(),
 88 |           info[2]
 89 |               .As<Napi::Object>()
 90 |               .Get("leadingBufferFrames")
 91 |               .As<Napi::Number>()
 92 |               .Int32Value(),
 93 |           [&](std::vector<short> audio) {
 94 |             SpeechRecorderCallbackData* data = new SpeechRecorderCallbackData();
 95 |             data->event = "chunkStart";
 96 |             data->audio = audio;
 97 |             queue_.enqueue(data);
 98 |           },
 99 |           [&](std::vector<short> audio, bool speaking, double volume,
100 |               bool speech, double probability, int consecutiveSilence) {
101 |             SpeechRecorderCallbackData* data = new SpeechRecorderCallbackData();
102 |             data->event = "audio";
103 |             data->audio = audio;
104 |             data->speaking = speaking;
105 |             data->volume = volume;
106 |             data->speech = speech;
107 |             data->probability = probability;
108 |             data->consecutiveSilence = consecutiveSilence;
109 |             queue_.enqueue(data);
110 |           },
111 |           [&]() {
112 |             SpeechRecorderCallbackData* data = new SpeechRecorderCallbackData();
113 |             data->event = "chunkEnd";
114 |             queue_.enqueue(data);
115 |           },
116 |           info[2]
117 |               .As<Napi::Object>()
118 |               .Get("samplesPerFrame")
119 |               .As<Napi::Number>()
120 |               .Int32Value(),
121 |           info[2]
122 |               .As<Napi::Object>()
123 |               .Get("sampleRate")
124 |               .As<Napi::Number>()
125 |               .Int32Value(),
126 |           info[2]
127 |               .As<Napi::Object>()
128 |               .Get("sileroVadBufferSize")
129 |               .As<Napi::Number>()
130 |               .Int32Value(),
131 |           info[2]
132 |               .As<Napi::Object>()
133 |               .Get("sileroVadRateLimit")
134 |               .As<Napi::Number>()
135 |               .Int32Value(),
136 |           info[2]
137 |               .As<Napi::Object>()
138 |               .Get("sileroVadSilenceThreshold")
139 |               .As<Napi::Number>()
140 |               .DoubleValue(),
141 |           info[2]
142 |               .As<Napi::Object>()
143 |               .Get("sileroVadSpeakingThreshold")
144 |               .As<Napi::Number>()
145 |               .DoubleValue(),
146 |           info[2]
147 |               .As<Napi::Object>()
148 |               .Get("webrtcVadLevel")
149 |               .As<Napi::Number>()
150 |               .Int32Value(),
151 |           info[2]
152 |               .As<Napi::Object>()
153 |               .Get("webrtcVadBufferSize")
154 |               .As<Napi::Number>()
155 |               .Int32Value(),
156 |           info[2]
157 |               .As<Napi::Object>()
158 |               .Get("webrtcVadResultsSize")
159 |               .As<Napi::Number>()
160 |               .Int32Value(),
161 |       }),
162 |       processor_(modelPath_, options_) {}
163 | 
164 | void SpeechRecorder::ProcessFile(const Napi::CallbackInfo& info) {
165 |   Napi::Env env = info.Env();
166 |   std::string path = info[0].As<Napi::String>().Utf8Value();
167 | 
168 |   // we don't want to create two processors on startup, because loading the
169 |   // silero model is expensive, so lazily create this instance only if this
170 |   // method is actually called (which is probably not common)
171 |   if (!processFileProcessor_) {
172 |     speechrecorder::ChunkProcessorOptions options = options_;
173 | 
174 |     options.onChunkStart = [&](std::vector<short> audio) {
175 |       Napi::Object object = Napi::Object::New(env);
176 |       if (audio.size() > 0) {
177 |         Napi::Int16Array buffer = Napi::Int16Array::New(env, audio.size());
178 |         for (size_t i = 0; i < audio.size(); i++) {
179 |           buffer[i] = audio[i];
180 |         }
181 | 
182 |         object.Set("audio", buffer);
183 |       }
184 | 
185 |       callback_.Value().Call({Napi::String::New(env, "chunkStart"), object});
186 |     };
187 | 
188 |     options.onAudio = [&](std::vector<short> audio, bool speaking,
189 |                           double volume, bool speech, double probability,
190 |                           int consecutiveSilence) {
191 |       Napi::Object object = Napi::Object::New(env);
192 |       object.Set("speaking", Napi::Boolean::New(env, speaking));
193 |       object.Set("volume", Napi::Number::New(env, volume));
194 |       object.Set("speech", Napi::Boolean::New(env, speech));
195 |       object.Set("probability", Napi::Number::New(env, probability));
196 |       object.Set("consecutiveSilence",
197 |                  Napi::Number::New(env, (double)consecutiveSilence));
198 | 
199 |       if (audio.size() > 0) {
200 |         Napi::Int16Array buffer = Napi::Int16Array::New(env, audio.size());
201 |         for (size_t i = 0; i < audio.size(); i++) {
202 |           buffer[i] = audio[i];
203 |         }
204 | 
205 |         object.Set("audio", buffer);
206 |         callback_.Value().Call({Napi::String::New(env, "audio"), object});
207 |       }
208 |     };
209 | 
210 |     options.onChunkEnd = [&] {
211 |       callback_.Value().Call({Napi::String::New(env, "chunkEnd")});
212 |     };
213 | 
214 |     processFileProcessor_ =
215 |         std::make_unique<speechrecorder::ChunkProcessor>(modelPath_, options);
216 |   }
217 | 
218 |   unsigned int channels;
219 |   unsigned int sampleRate;
220 |   drwav_uint64 frames;
221 |   short* data = drwav_open_file_and_read_pcm_frames_s16(
222 |       path.c_str(), &channels, &sampleRate, &frames, nullptr);
223 | 
224 |   processFileProcessor_->Reset();
225 |   int size = (int)frames;
226 |   for (int i = 0; i < size; i += options_.samplesPerFrame) {
227 |     std::vector<short> buffer;
228 |     for (int j = 0; j < options_.samplesPerFrame; j++) {
229 |       if (i + j < size) {
230 |         buffer.push_back(data[i + j]);
231 |       }
232 |     }
233 | 
234 |     if (buffer.size() == (size_t)options_.samplesPerFrame) {
235 |       processFileProcessor_->Process(buffer.data());
236 |     }
237 |   }
238 | 
239 |   drwav_free(data, nullptr);
240 | }
241 | 
242 | void SpeechRecorder::Start(const Napi::CallbackInfo& info) {
243 |   stopped_ = false;
244 |   threadSafeFunction_ = Napi::ThreadSafeFunction::New(
245 |       info.Env(), callback_.Value(), "Speech Recorder Start", 0, 1,
246 |       [&](Napi::Env env) {
247 |         thread_.join();
248 |       });
249 | 
250 |   thread_ = std::thread([&] {
251 |     while (!stopped_) {
252 |       SpeechRecorderCallbackData* data;
253 |       bool element = queue_.try_dequeue(data);
254 |       if (element) {
255 |         threadSafeFunction_.BlockingCall(data, threadSafeFunctionCallback_);
256 |       }
257 | 
258 |       std::this_thread::sleep_for(std::chrono::milliseconds(10));
259 |     }
260 | 
261 |     threadSafeFunction_.Release();
262 |   });
263 | 
264 |   processor_.Start();
265 | }
266 | 
267 | void SpeechRecorder::Stop(const Napi::CallbackInfo& info) {
268 |   stopped_ = true;
269 |   processor_.Stop();
270 | }
271 | 
272 | Napi::Value GetDevices(const Napi::CallbackInfo& info) {
273 |   Napi::Env env = info.Env();
274 | 
275 |   std::vector<speechrecorder::Device> devices = speechrecorder::GetDevices();
276 |   Napi::Array result = Napi::Array::New(env, devices.size());
277 |   for (size_t i = 0; i < devices.size(); i++) {
278 |     Napi::Object e = Napi::Object::New(env);
279 |     e.Set("id", devices[i].id);
280 |     e.Set("name", devices[i].name);
281 |     e.Set("apiName", devices[i].apiName);
282 |     e.Set("maxInputChannels", devices[i].maxInputChannels);
283 |     e.Set("maxOutputChannels", devices[i].maxOutputChannels);
284 |     e.Set("defaultSampleRate", devices[i].defaultSampleRate);
285 |     e.Set("isDefaultInput", devices[i].isDefaultInput);
286 |     e.Set("isDefaultOutput", devices[i].isDefaultOutput);
287 |     result[i] = e;
288 |   }
289 | 
290 |   return result;
291 | }
292 | 
293 | Napi::Object Init(Napi::Env env, Napi::Object exports) {
294 |   SpeechRecorder::Init(env, exports);
295 |   return exports;
296 | }
297 | 
298 | NODE_API_MODULE(addon, Init);
299 | 


--------------------------------------------------------------------------------