├── .github └── FUNDING.yml ├── AUTHORS ├── CMakeLists.txt ├── COPYING ├── README.md ├── doc └── Doxyfile.in ├── examples ├── CMakeLists.txt ├── dr_mp3.h ├── dr_wav.h ├── rnnoise_demo.c ├── rnnoise_demo_simple.c └── sample.wav ├── include └── rnnoise.h ├── m4 └── attributes.m4 ├── src ├── CMakeLists.txt ├── arch.h ├── celt_lpc.c ├── celt_lpc.h ├── common.h ├── denoise.c ├── noise.wav ├── opus_types.h ├── pitch.c ├── pitch.h ├── rnn.c ├── rnn.h ├── rnn_data.c ├── rnn_data.h ├── rnn_train.py ├── speech.wav ├── stb_fft.h └── train_compile.sh └── training ├── bin2hdf5.py ├── dump_rnn.py └── rnn_train.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: https://www.paypal.com/paypalme/cpuimage/ 13 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Jean-Marc Valin 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(RnNoise LANGUAGES C) 3 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 4 | SET(CMAKE_BUILD_TYPE "Release") 5 | 6 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s") 7 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") 8 | 9 | # specify the cross compiler 10 | #SET(CMAKE_C_COMPILER /opt/gcc-armv7l-meego-linux-gnueabi-2016.01_linux/cross/bin/armv7l-meego-linux-gnueabi-gcc) 11 | #SET(CMAKE_CXX_COMPILER /opt/gcc-armv7l-meego-linux-gnueabi-2016.01_linux/cross/bin/armv7l-meego-linux-gnueabi-g++) 12 | 13 | # where is the target environment 14 | #SET(CMAKE_FIND_ROOT_PATH /opt/gcc-armv7l-meego-linux-gnueabi-2016.01_linux/cross/armv7l-meego-linux-gnueabi/sys-root/) 15 | #SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) 16 | #SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) 17 | #SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) 18 | 19 | # specify the compiler flag 20 | #SET(CMAKE_C_FLAGS "-std=gnu99 -O3 -mfpu=neon") 21 | #SET(CMAKE_CXX_FLAGS "-std=c++11 -O3 -mfpu=neon") 22 | 23 | add_subdirectory(src) 24 | add_subdirectory(examples) 25 | 26 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Mozilla 2 | Copyright (c) 2007-2017, Jean-Marc Valin 3 | Copyright (c) 2005-2017, Xiph.Org Foundation 4 | Copyright (c) 2003-2004, Mark Borgerding 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 10 | - Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | - Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | - Neither the name of the Xiph.Org Foundation nor the names of its 18 | contributors may be used to endorse or promote products derived from 19 | this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RNNoise is a noise suppression library based on a recurrent neural network 2 | 3 | 4 | ## Quick Demo application 5 | While it is meant to be used as a library, a simple command-line tool is 6 | provided as an example. 7 | 8 | ### build librnnoise & rnnoise_demo with CMake 9 | 10 | ```shell 11 | # mkdir build 12 | # cd build 13 | # cmake .. 14 | # make 15 | ``` 16 | 17 | It operates on wav and mp3 files, which can be used as: 18 | ```shell 19 | # ./rnnoise_demo input.wav 20 | # ./rnnoise_demo input.mp3 21 | ``` 22 | 23 | the output filename is "input_out.wav" 24 | or: 25 | 26 | specify the output filename 27 | ```shell 28 | # ./rnnoise_demo input.wav output.wav 29 | # ./rnnoise_demo input.mp3 output.wav 30 | ``` 31 | 32 | ## Training Process 33 | 34 | ### Audio feature extract 35 | Build audio feature extraction tool 36 | ```shell 37 | # cd src 38 | # ./train_compile.sh 39 | ``` 40 | Use generated "denoise_training" to get the audio feature array from speech & noise audio clip 41 | ```shell 42 | # ./denoise_training 43 | usage: ./denoise_training 44 | # ./denoise_training speech.wav noise.wav 50000 feature.dat 45 | matrix size: 50000 x 87 46 | ``` 47 | 48 | ### RNN model traning 49 | Pick feature array to "training" dir and go through the training process 50 | ```shell 51 | # cd training 52 | # mv ../src/feature.dat . 53 | # python bin2hdf5.py --bin_file feature.dat --matrix_shape 50000x87 54 | # python rnn_train.py 55 | # python dump_rnn.py 56 | ``` 57 | Training process will generate the RNN model weight code file (default is rnn_data.c) and layer definition header file (default is rnn_data.h). They can be used to refresh the "src/rnn_data.c", "src/rnn_data.h" and rebuild the rnnoise lib & demo app. 58 | 59 | # References and Resources: 60 | - [david8862/rnnoise](https://github.com/david8862/rnnoise) 61 | - [RNNoise: Learning Noise Suppression](https://people.xiph.org/~jm/demo/rnnoise/) 62 | - [RNNoise: Learning Noise Suppression(深度学习噪声抑制)](https://blog.csdn.net/dakeboy/article/details/88039977) 63 | - [基于RNN的音频降噪算法](https://cloud.tencent.com/developer/article/1094567) 64 | 65 | # Donating 66 | 67 | If you found this project useful, consider buying me a coffee 68 | 69 | Buy Me A Coffee 70 | -------------------------------------------------------------------------------- /doc/Doxyfile.in: -------------------------------------------------------------------------------- 1 | # Process with doxygen to generate API documentation 2 | 3 | PROJECT_NAME = @PACKAGE_NAME@ 4 | PROJECT_NUMBER = @PACKAGE_VERSION@ 5 | PROJECT_BRIEF = "RNN-based noise suppressor." 6 | INPUT = @top_srcdir@/include/rnnoise.h 7 | OPTIMIZE_OUTPUT_FOR_C = YES 8 | 9 | QUIET = YES 10 | WARNINGS = YES 11 | WARN_IF_UNDOCUMENTED = YES 12 | WARN_IF_DOC_ERROR = YES 13 | WARN_NO_PARAMDOC = YES 14 | 15 | JAVADOC_AUTOBRIEF = YES 16 | SORT_MEMBER_DOCS = NO 17 | 18 | HAVE_DOT = @HAVE_DOT@ 19 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | set(RN_NOISE_DEMO_SRC 4 | rnnoise_demo.c) 5 | 6 | set(RN_NOISE_DEMO_SIMPLE_SRC 7 | rnnoise_demo_simple.c) 8 | 9 | set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin") 10 | include_directories("../include") 11 | link_directories("${PROJECT_BINARY_DIR}/lib") 12 | add_executable(rnnoise_demo ${RN_NOISE_DEMO_SRC}) 13 | target_link_libraries(rnnoise_demo -lrnnoise -lm -lrt) 14 | add_executable(rnnoise_demo_simple ${RN_NOISE_DEMO_SIMPLE_SRC}) 15 | target_link_libraries(rnnoise_demo_simple -lrnnoise -lm -lrt) 16 | -------------------------------------------------------------------------------- /examples/rnnoise_demo.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include "rnnoise.h" 4 | #include 5 | #include 6 | 7 | #define DR_MP3_IMPLEMENTATION 8 | 9 | #include "dr_mp3.h" 10 | 11 | #define DR_WAV_IMPLEMENTATION 12 | 13 | #include "dr_wav.h" 14 | 15 | #if defined(__APPLE__) 16 | # include 17 | #elif defined(_WIN32) 18 | # define WIN32_LEAN_AND_MEAN 19 | 20 | # include 21 | 22 | #else // __linux 23 | 24 | # include 25 | 26 | # ifndef CLOCK_MONOTONIC //_RAW 27 | # define CLOCK_MONOTONIC CLOCK_REALTIME 28 | # endif 29 | #endif 30 | 31 | static 32 | uint64_t nanotimer() { 33 | static int ever = 0; 34 | #if defined(__APPLE__) 35 | static mach_timebase_info_data_t frequency; 36 | if (!ever) { 37 | if (mach_timebase_info(&frequency) != KERN_SUCCESS) { 38 | return 0; 39 | } 40 | ever = 1; 41 | } 42 | return (mach_absolute_time() * frequency.numer / frequency.denom); 43 | #elif defined(_WIN32) 44 | static LARGE_INTEGER frequency; 45 | if (!ever) { 46 | QueryPerformanceFrequency(&frequency); 47 | ever = 1; 48 | } 49 | LARGE_INTEGER t; 50 | QueryPerformanceCounter(&t); 51 | return (t.QuadPart * (uint64_t) 1e9) / frequency.QuadPart; 52 | #else // __linux 53 | struct timespec t = {0}; 54 | if (!ever) { 55 | if (clock_gettime(CLOCK_MONOTONIC, &t) != 0) { 56 | return 0; 57 | } 58 | ever = 1; 59 | } 60 | clock_gettime(CLOCK_MONOTONIC, &t); 61 | return (t.tv_sec * (uint64_t) 1e9) + t.tv_nsec; 62 | #endif 63 | } 64 | 65 | static double now() { 66 | static uint64_t epoch = 0; 67 | if (!epoch) { 68 | epoch = nanotimer(); 69 | } 70 | return (nanotimer() - epoch) / 1e9; 71 | }; 72 | 73 | static double calcElapsed(double start, double end) { 74 | double took = -start; 75 | return took + end; 76 | } 77 | 78 | void wavWrite_s16(char *filename, float *buffer, int sampleRate, uint32_t totalSampleCount, uint32_t channels) { 79 | drwav_data_format format; 80 | format.container = drwav_container_riff; 81 | format.format = DR_WAVE_FORMAT_PCM; 82 | format.channels = channels; 83 | format.sampleRate = (drwav_uint32) sampleRate; 84 | format.bitsPerSample = 16; 85 | short *buffer_16= (short*) buffer; 86 | for (int32_t i = 0; i < totalSampleCount; ++i) { 87 | buffer_16[i] = drwav_clamp(buffer[i], -32768, 32767); 88 | } 89 | drwav *pWav = drwav_open_file_write(filename, &format); 90 | if (pWav) { 91 | drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer); 92 | drwav_uninit(pWav); 93 | if (samplesWritten != totalSampleCount) { 94 | fprintf(stderr, "write file [%s] error.\n", filename); 95 | exit(1); 96 | } 97 | } 98 | } 99 | 100 | void wavWrite_f32(char *filename, float *buffer, int sampleRate, uint32_t totalSampleCount, uint32_t channels) { 101 | drwav_data_format format; 102 | format.container = drwav_container_riff; 103 | format.format = DR_WAVE_FORMAT_IEEE_FLOAT; 104 | format.channels = channels; 105 | format.sampleRate = (drwav_uint32) sampleRate; 106 | format.bitsPerSample = 32; 107 | for (int32_t i = 0; i < totalSampleCount; ++i) { 108 | buffer[i] = drwav_clamp(buffer[i], -32768, 32767) * (1.0f / 32768.0f); 109 | } 110 | drwav *pWav = drwav_open_file_write(filename, &format); 111 | if (pWav) { 112 | drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer); 113 | drwav_uninit(pWav); 114 | if (samplesWritten != totalSampleCount) { 115 | fprintf(stderr, "write file [%s] error.\n", filename); 116 | exit(1); 117 | } 118 | } 119 | } 120 | 121 | float *wavRead_f32(const char *filename, uint32_t *sampleRate, uint64_t *sampleCount, uint32_t *channels) { 122 | drwav_uint64 totalSampleCount = 0; 123 | float *input = drwav_open_file_and_read_pcm_frames_f32(filename, channels, sampleRate, &totalSampleCount); 124 | if (input == NULL) { 125 | drmp3_config pConfig; 126 | input = drmp3_open_file_and_read_f32(filename, &pConfig, &totalSampleCount); 127 | if (input != NULL) { 128 | *channels = pConfig.outputChannels; 129 | *sampleRate = pConfig.outputSampleRate; 130 | } 131 | } 132 | if (input == NULL) { 133 | fprintf(stderr, "read file [%s] error.\n", filename); 134 | exit(1); 135 | } 136 | *sampleCount = totalSampleCount * (*channels); 137 | for (int32_t i = 0; i < *sampleCount; ++i) { 138 | input[i] = input[i] * 32768.0f; 139 | } 140 | return input; 141 | } 142 | 143 | 144 | void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) { 145 | const char *end; 146 | const char *p; 147 | const char *s; 148 | if (path[0] && path[1] == ':') { 149 | if (drv) { 150 | *drv++ = *path++; 151 | *drv++ = *path++; 152 | *drv = '\0'; 153 | } 154 | } else if (drv) 155 | *drv = '\0'; 156 | for (end = path; *end && *end != ':';) 157 | end++; 158 | for (p = end; p > path && *--p != '\\' && *p != '/';) 159 | if (*p == '.') { 160 | end = p; 161 | break; 162 | } 163 | if (ext) 164 | for (s = end; (*ext = *s++);) 165 | ext++; 166 | for (p = end; p > path;) 167 | if (*--p == '\\' || *p == '/') { 168 | p++; 169 | break; 170 | } 171 | if (name) { 172 | for (s = p; s < end;) 173 | *name++ = *s++; 174 | *name = '\0'; 175 | } 176 | if (dir) { 177 | for (s = path; s < p;) 178 | *dir++ = *s++; 179 | *dir = '\0'; 180 | } 181 | } 182 | 183 | 184 | uint64_t Resample_f32(const float *input, float *output, int inSampleRate, int outSampleRate, uint64_t inputSize, 185 | uint32_t channels 186 | ) { 187 | if (input == NULL) 188 | return 0; 189 | uint64_t outputSize = inputSize * outSampleRate / inSampleRate; 190 | if (output == NULL) 191 | return outputSize; 192 | double stepDist = ((double) inSampleRate / (double) outSampleRate); 193 | const uint64_t fixedFraction = (1LL << 32); 194 | const double normFixed = (1.0 / (1LL << 32)); 195 | uint64_t step = ((uint64_t) (stepDist * fixedFraction + 0.5)); 196 | uint64_t curOffset = 0; 197 | for (uint32_t i = 0; i < outputSize; i += 1) { 198 | for (uint32_t c = 0; c < channels; c += 1) { 199 | *output++ = (float) (input[c] + (input[c + channels] - input[c]) * ( 200 | (double) (curOffset >> 32) + ((curOffset & (fixedFraction - 1)) * normFixed) 201 | ) 202 | ); 203 | } 204 | curOffset += step; 205 | input += (curOffset >> 32) * channels; 206 | curOffset &= (fixedFraction - 1); 207 | } 208 | return outputSize; 209 | } 210 | 211 | void denoise_proc(float *input, uint64_t sampleCount, uint32_t sampleRate, uint32_t channels) { 212 | uint32_t targetFrameSize = 480; 213 | uint32_t targetSampleRate = 48000; 214 | uint32_t perFrameSize = sampleRate / 100; 215 | float *frameBuffer = (float *) malloc(sizeof(*frameBuffer) * (channels + 1) * targetFrameSize); 216 | float *processBuffer = frameBuffer + targetFrameSize * channels; 217 | DenoiseState **sts = malloc(channels * sizeof(DenoiseState *)); 218 | if (sts == NULL || frameBuffer == NULL) { 219 | if (sts) 220 | free(sts); 221 | if (frameBuffer) 222 | free(frameBuffer); 223 | fprintf(stderr, "malloc error.\n"); 224 | return; 225 | } 226 | for (int i = 0; i < channels; i++) { 227 | sts[i] = rnnoise_create(); 228 | if (sts[i] == NULL) { 229 | for (int x = 0; x < i; x++) { 230 | if (sts[x]) { 231 | rnnoise_destroy(sts[x]); 232 | } 233 | } 234 | free(sts); 235 | free(frameBuffer); 236 | return; 237 | } 238 | } 239 | size_t frameStep = channels * perFrameSize; 240 | uint64_t frames = sampleCount / frameStep; 241 | uint64_t lastFrameSize = (sampleCount % frameStep) / channels; 242 | for (int i = 0; i < frames; ++i) { 243 | Resample_f32(input, frameBuffer, sampleRate, targetSampleRate, 244 | perFrameSize, channels); 245 | for (int c = 0; c < channels; c++) { 246 | for (int k = 0; k < targetFrameSize; k++) 247 | processBuffer[k] = frameBuffer[k * channels + c]; 248 | rnnoise_process_frame(sts[c], processBuffer, processBuffer); 249 | for (int k = 0; k < targetFrameSize; k++) 250 | frameBuffer[k * channels + c] = processBuffer[k]; 251 | } 252 | Resample_f32(frameBuffer, input, targetSampleRate, sampleRate, targetFrameSize, channels); 253 | input += frameStep; 254 | } 255 | if (lastFrameSize != 0) { 256 | memset(frameBuffer, 0, targetFrameSize * channels * sizeof(float)); 257 | uint64_t lastReasmpleSize = Resample_f32(input, frameBuffer, sampleRate, 258 | targetSampleRate, 259 | lastFrameSize, channels); 260 | for (int c = 0; c < channels; c++) { 261 | for (int k = 0; k < targetFrameSize; k++) 262 | processBuffer[k] = frameBuffer[k * channels + c]; 263 | rnnoise_process_frame(sts[c], processBuffer, processBuffer); 264 | for (int k = 0; k < targetFrameSize; k++) 265 | frameBuffer[k * channels + c] = processBuffer[k]; 266 | } 267 | Resample_f32(frameBuffer, input, targetSampleRate, sampleRate, lastReasmpleSize, 268 | channels); 269 | } 270 | for (int i = 0; i < channels; i++) { 271 | if (sts[i]) { 272 | rnnoise_destroy(sts[i]); 273 | } 274 | } 275 | free(sts); 276 | free(frameBuffer); 277 | } 278 | 279 | void rnnDeNoise(char *in_file, char *out_file) { 280 | uint32_t sampleRate = 0; 281 | uint64_t sampleCount = 0; 282 | uint32_t channels = 0; 283 | float *buffer = wavRead_f32(in_file, &sampleRate, &sampleCount, &channels); 284 | if (buffer != NULL) { 285 | double startTime = now(); 286 | denoise_proc(buffer, sampleCount, sampleRate, channels); 287 | double time_interval = calcElapsed(startTime, now()); 288 | printf("time interval: %f ms\n ", (time_interval * 1000)); 289 | wavWrite_s16(out_file, buffer, sampleRate, (uint32_t) sampleCount, channels); 290 | free(buffer); 291 | } 292 | } 293 | 294 | 295 | int main(int argc, char **argv) { 296 | printf("Audio Noise Reduction\n"); 297 | printf("blog:http://cpuimage.cnblogs.com/\n"); 298 | printf("e-mail:gaozhihan@vip.qq.com\n"); 299 | 300 | if (argc < 2) { 301 | printf("usage:\n"); 302 | printf("./rnnoise input.wav\n"); 303 | printf("./rnnoise input.mp3\n"); 304 | printf("or\n"); 305 | printf("./rnnoise input.wav output.wav\n"); 306 | printf("./rnnoise input.mp3 output.wav\n"); 307 | return -1; 308 | } 309 | char *in_file = argv[1]; 310 | if (argc > 2) { 311 | char *out_file = argv[2]; 312 | rnnDeNoise(in_file, out_file); 313 | } else { 314 | char drive[3]; 315 | char dir[256]; 316 | char fname[256]; 317 | char ext[256]; 318 | char out_file[1024]; 319 | splitpath(in_file, drive, dir, fname, ext); 320 | sprintf(out_file, "%s%s%s_out.wav", drive, dir, fname); 321 | rnnDeNoise(in_file, out_file); 322 | } 323 | printf("press any key to exit.\n"); 324 | getchar(); 325 | return 0; 326 | } 327 | -------------------------------------------------------------------------------- /examples/rnnoise_demo_simple.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2017 Mozilla */ 2 | /* 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | - Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #include 28 | #include "rnnoise.h" 29 | 30 | #define FRAME_SIZE 480 31 | 32 | int main(int argc, char **argv) { 33 | int i; 34 | int first = 1; 35 | float x[FRAME_SIZE]; 36 | FILE *f1, *fout; 37 | DenoiseState *st; 38 | st = rnnoise_create(); 39 | if (argc!=3) { 40 | fprintf(stderr, "usage: %s \n", argv[0]); 41 | return 1; 42 | } 43 | f1 = fopen(argv[1], "r"); 44 | fout = fopen(argv[2], "w"); 45 | while (1) { 46 | short tmp[FRAME_SIZE]; 47 | fread(tmp, sizeof(short), FRAME_SIZE, f1); 48 | if (feof(f1)) break; 49 | for (i=0;i 3 | dnl Copyright (c) 2006-2007 xine project 4 | dnl 5 | dnl This program is free software; you can redistribute it and/or modify 6 | dnl it under the terms of the GNU General Public License as published by 7 | dnl the Free Software Foundation; either version 2, or (at your option) 8 | dnl any later version. 9 | dnl 10 | dnl This program is distributed in the hope that it will be useful, 11 | dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | dnl GNU General Public License for more details. 14 | dnl 15 | dnl You should have received a copy of the GNU General Public License 16 | dnl along with this program; if not, write to the Free Software 17 | dnl Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 18 | dnl 02110-1301, USA. 19 | dnl 20 | dnl As a special exception, the copyright owners of the 21 | dnl macro gives unlimited permission to copy, distribute and modify the 22 | dnl configure scripts that are the output of Autoconf when processing the 23 | dnl Macro. You need not follow the terms of the GNU General Public 24 | dnl License when using or distributing such scripts, even though portions 25 | dnl of the text of the Macro appear in them. The GNU General Public 26 | dnl License (GPL) does govern all other use of the material that 27 | dnl constitutes the Autoconf Macro. 28 | dnl 29 | dnl This special exception to the GPL applies to versions of the 30 | dnl Autoconf Macro released by this project. When you make and 31 | dnl distribute a modified version of the Autoconf Macro, you may extend 32 | dnl this special exception to the GPL to apply to your modified version as 33 | dnl well. 34 | 35 | dnl Check if the flag is supported by compiler 36 | dnl CC_CHECK_CFLAGS_SILENT([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) 37 | 38 | AC_DEFUN([CC_CHECK_CFLAGS_SILENT], [ 39 | AC_CACHE_VAL(AS_TR_SH([cc_cv_cflags_$1]), 40 | [ac_save_CFLAGS="$CFLAGS" 41 | CFLAGS="$CFLAGS $1" 42 | AC_LINK_IFELSE([AC_LANG_SOURCE([int main() { return 0; }])], 43 | [eval "AS_TR_SH([cc_cv_cflags_$1])='yes'"], 44 | [eval "AS_TR_SH([cc_cv_cflags_$1])='no'"]) 45 | CFLAGS="$ac_save_CFLAGS" 46 | ]) 47 | 48 | AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes], 49 | [$2], [$3]) 50 | ]) 51 | 52 | dnl Check if the flag is supported by compiler (cacheable) 53 | dnl CC_CHECK_CFLAGS([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) 54 | 55 | AC_DEFUN([CC_CHECK_CFLAGS], [ 56 | AC_CACHE_CHECK([if $CC supports $1 flag], 57 | AS_TR_SH([cc_cv_cflags_$1]), 58 | CC_CHECK_CFLAGS_SILENT([$1]) dnl Don't execute actions here! 59 | ) 60 | 61 | AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes], 62 | [$2], [$3]) 63 | ]) 64 | 65 | dnl CC_CHECK_CFLAG_APPEND(FLAG, [action-if-found], [action-if-not-found]) 66 | dnl Check for CFLAG and appends them to CFLAGS if supported 67 | AC_DEFUN([CC_CHECK_CFLAG_APPEND], [ 68 | AC_CACHE_CHECK([if $CC supports $1 flag], 69 | AS_TR_SH([cc_cv_cflags_$1]), 70 | CC_CHECK_CFLAGS_SILENT([$1]) dnl Don't execute actions here! 71 | ) 72 | 73 | AS_IF([eval test x$]AS_TR_SH([cc_cv_cflags_$1])[ = xyes], 74 | [CFLAGS="$CFLAGS $1"; $2], [$3]) 75 | ]) 76 | 77 | dnl CC_CHECK_CFLAGS_APPEND([FLAG1 FLAG2], [action-if-found], [action-if-not]) 78 | AC_DEFUN([CC_CHECK_CFLAGS_APPEND], [ 79 | for flag in $1; do 80 | CC_CHECK_CFLAG_APPEND($flag, [$2], [$3]) 81 | done 82 | ]) 83 | 84 | dnl Check if the flag is supported by linker (cacheable) 85 | dnl CC_CHECK_LDFLAGS([FLAG], [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND]) 86 | 87 | AC_DEFUN([CC_CHECK_LDFLAGS], [ 88 | AC_CACHE_CHECK([if $CC supports $1 flag], 89 | AS_TR_SH([cc_cv_ldflags_$1]), 90 | [ac_save_LDFLAGS="$LDFLAGS" 91 | LDFLAGS="$LDFLAGS $1" 92 | AC_LINK_IFELSE([AC_LANG_SOURCE([int main() { return 1; }])], 93 | [eval "AS_TR_SH([cc_cv_ldflags_$1])='yes'"], 94 | [eval "AS_TR_SH([cc_cv_ldflags_$1])="]) 95 | LDFLAGS="$ac_save_LDFLAGS" 96 | ]) 97 | 98 | AS_IF([eval test x$]AS_TR_SH([cc_cv_ldflags_$1])[ = xyes], 99 | [$2], [$3]) 100 | ]) 101 | 102 | dnl define the LDFLAGS_NOUNDEFINED variable with the correct value for 103 | dnl the current linker to avoid undefined references in a shared object. 104 | AC_DEFUN([CC_NOUNDEFINED], [ 105 | dnl We check $host for which systems to enable this for. 106 | AC_REQUIRE([AC_CANONICAL_HOST]) 107 | 108 | case $host in 109 | dnl FreeBSD (et al.) does not complete linking for shared objects when pthreads 110 | dnl are requested, as different implementations are present; to avoid problems 111 | dnl use -Wl,-z,defs only for those platform not behaving this way. 112 | *-freebsd* | *-openbsd*) ;; 113 | *) 114 | dnl First of all check for the --no-undefined variant of GNU ld. This allows 115 | dnl for a much more readable commandline, so that people can understand what 116 | dnl it does without going to look for what the heck -z defs does. 117 | for possible_flags in "-Wl,--no-undefined" "-Wl,-z,defs"; do 118 | CC_CHECK_LDFLAGS([$possible_flags], [LDFLAGS_NOUNDEFINED="$possible_flags"]) 119 | break 120 | done 121 | ;; 122 | esac 123 | 124 | AC_SUBST([LDFLAGS_NOUNDEFINED]) 125 | ]) 126 | 127 | dnl Check for a -Werror flag or equivalent. -Werror is the GCC 128 | dnl and ICC flag that tells the compiler to treat all the warnings 129 | dnl as fatal. We usually need this option to make sure that some 130 | dnl constructs (like attributes) are not simply ignored. 131 | dnl 132 | dnl Other compilers don't support -Werror per se, but they support 133 | dnl an equivalent flag: 134 | dnl - Sun Studio compiler supports -errwarn=%all 135 | AC_DEFUN([CC_CHECK_WERROR], [ 136 | AC_CACHE_CHECK( 137 | [for $CC way to treat warnings as errors], 138 | [cc_cv_werror], 139 | [CC_CHECK_CFLAGS_SILENT([-Werror], [cc_cv_werror=-Werror], 140 | [CC_CHECK_CFLAGS_SILENT([-errwarn=%all], [cc_cv_werror=-errwarn=%all])]) 141 | ]) 142 | ]) 143 | 144 | AC_DEFUN([CC_CHECK_ATTRIBUTE], [ 145 | AC_REQUIRE([CC_CHECK_WERROR]) 146 | AC_CACHE_CHECK([if $CC supports __attribute__(( ifelse([$2], , [$1], [$2]) ))], 147 | AS_TR_SH([cc_cv_attribute_$1]), 148 | [ac_save_CFLAGS="$CFLAGS" 149 | CFLAGS="$CFLAGS $cc_cv_werror" 150 | AC_COMPILE_IFELSE([AC_LANG_SOURCE([$3])], 151 | [eval "AS_TR_SH([cc_cv_attribute_$1])='yes'"], 152 | [eval "AS_TR_SH([cc_cv_attribute_$1])='no'"]) 153 | CFLAGS="$ac_save_CFLAGS" 154 | ]) 155 | 156 | AS_IF([eval test x$]AS_TR_SH([cc_cv_attribute_$1])[ = xyes], 157 | [AC_DEFINE( 158 | AS_TR_CPP([SUPPORT_ATTRIBUTE_$1]), 1, 159 | [Define this if the compiler supports __attribute__(( ifelse([$2], , [$1], [$2]) ))] 160 | ) 161 | $4], 162 | [$5]) 163 | ]) 164 | 165 | AC_DEFUN([CC_ATTRIBUTE_CONSTRUCTOR], [ 166 | CC_CHECK_ATTRIBUTE( 167 | [constructor],, 168 | [extern void foo(); 169 | void __attribute__((constructor)) ctor() { foo(); }], 170 | [$1], [$2]) 171 | ]) 172 | 173 | AC_DEFUN([CC_ATTRIBUTE_DESTRUCTOR], [ 174 | CC_CHECK_ATTRIBUTE( 175 | [destructor],, 176 | [extern void foo(); 177 | void __attribute__((destructor)) dtor() { foo(); }], 178 | [$1], [$2]) 179 | ]) 180 | 181 | AC_DEFUN([CC_ATTRIBUTE_FORMAT], [ 182 | CC_CHECK_ATTRIBUTE( 183 | [format], [format(printf, n, n)], 184 | [void __attribute__((format(printf, 1, 2))) printflike(const char *fmt, ...) { fmt = (void *)0; }], 185 | [$1], [$2]) 186 | ]) 187 | 188 | AC_DEFUN([CC_ATTRIBUTE_FORMAT_ARG], [ 189 | CC_CHECK_ATTRIBUTE( 190 | [format_arg], [format_arg(printf)], 191 | [char *__attribute__((format_arg(1))) gettextlike(const char *fmt) { fmt = (void *)0; }], 192 | [$1], [$2]) 193 | ]) 194 | 195 | AC_DEFUN([CC_ATTRIBUTE_VISIBILITY], [ 196 | CC_CHECK_ATTRIBUTE( 197 | [visibility_$1], [visibility("$1")], 198 | [void __attribute__((visibility("$1"))) $1_function() { }], 199 | [$2], [$3]) 200 | ]) 201 | 202 | AC_DEFUN([CC_ATTRIBUTE_NONNULL], [ 203 | CC_CHECK_ATTRIBUTE( 204 | [nonnull], [nonnull()], 205 | [void __attribute__((nonnull())) some_function(void *foo, void *bar) { foo = (void*)0; bar = (void*)0; }], 206 | [$1], [$2]) 207 | ]) 208 | 209 | AC_DEFUN([CC_ATTRIBUTE_UNUSED], [ 210 | CC_CHECK_ATTRIBUTE( 211 | [unused], , 212 | [void some_function(void *foo, __attribute__((unused)) void *bar);], 213 | [$1], [$2]) 214 | ]) 215 | 216 | AC_DEFUN([CC_ATTRIBUTE_SENTINEL], [ 217 | CC_CHECK_ATTRIBUTE( 218 | [sentinel], , 219 | [void some_function(void *foo, ...) __attribute__((sentinel));], 220 | [$1], [$2]) 221 | ]) 222 | 223 | AC_DEFUN([CC_ATTRIBUTE_DEPRECATED], [ 224 | CC_CHECK_ATTRIBUTE( 225 | [deprecated], , 226 | [void some_function(void *foo, ...) __attribute__((deprecated));], 227 | [$1], [$2]) 228 | ]) 229 | 230 | AC_DEFUN([CC_ATTRIBUTE_ALIAS], [ 231 | CC_CHECK_ATTRIBUTE( 232 | [alias], [weak, alias], 233 | [void other_function(void *foo) { } 234 | void some_function(void *foo) __attribute__((weak, alias("other_function")));], 235 | [$1], [$2]) 236 | ]) 237 | 238 | AC_DEFUN([CC_ATTRIBUTE_MALLOC], [ 239 | CC_CHECK_ATTRIBUTE( 240 | [malloc], , 241 | [void * __attribute__((malloc)) my_alloc(int n);], 242 | [$1], [$2]) 243 | ]) 244 | 245 | AC_DEFUN([CC_ATTRIBUTE_PACKED], [ 246 | CC_CHECK_ATTRIBUTE( 247 | [packed], , 248 | [struct astructure { char a; int b; long c; void *d; } __attribute__((packed)); 249 | char assert@<:@(sizeof(struct astructure) == (sizeof(char)+sizeof(int)+sizeof(long)+sizeof(void*)))-1@:>@;], 250 | [$1], [$2]) 251 | ]) 252 | 253 | AC_DEFUN([CC_ATTRIBUTE_CONST], [ 254 | CC_CHECK_ATTRIBUTE( 255 | [const], , 256 | [int __attribute__((const)) twopow(int n) { return 1 << n; } ], 257 | [$1], [$2]) 258 | ]) 259 | 260 | AC_DEFUN([CC_FLAG_VISIBILITY], [ 261 | AC_REQUIRE([CC_CHECK_WERROR]) 262 | AC_CACHE_CHECK([if $CC supports -fvisibility=hidden], 263 | [cc_cv_flag_visibility], 264 | [cc_flag_visibility_save_CFLAGS="$CFLAGS" 265 | CFLAGS="$CFLAGS $cc_cv_werror" 266 | CC_CHECK_CFLAGS_SILENT([-fvisibility=hidden], 267 | cc_cv_flag_visibility='yes', 268 | cc_cv_flag_visibility='no') 269 | CFLAGS="$cc_flag_visibility_save_CFLAGS"]) 270 | 271 | AS_IF([test "x$cc_cv_flag_visibility" = "xyes"], 272 | [AC_DEFINE([SUPPORT_FLAG_VISIBILITY], 1, 273 | [Define this if the compiler supports the -fvisibility flag]) 274 | $1], 275 | [$2]) 276 | ]) 277 | 278 | AC_DEFUN([CC_FUNC_EXPECT], [ 279 | AC_REQUIRE([CC_CHECK_WERROR]) 280 | AC_CACHE_CHECK([if compiler has __builtin_expect function], 281 | [cc_cv_func_expect], 282 | [ac_save_CFLAGS="$CFLAGS" 283 | CFLAGS="$CFLAGS $cc_cv_werror" 284 | AC_COMPILE_IFELSE([AC_LANG_SOURCE( 285 | [int some_function() { 286 | int a = 3; 287 | return (int)__builtin_expect(a, 3); 288 | }])], 289 | [cc_cv_func_expect=yes], 290 | [cc_cv_func_expect=no]) 291 | CFLAGS="$ac_save_CFLAGS" 292 | ]) 293 | 294 | AS_IF([test "x$cc_cv_func_expect" = "xyes"], 295 | [AC_DEFINE([SUPPORT__BUILTIN_EXPECT], 1, 296 | [Define this if the compiler supports __builtin_expect() function]) 297 | $1], 298 | [$2]) 299 | ]) 300 | 301 | AC_DEFUN([CC_ATTRIBUTE_ALIGNED], [ 302 | AC_REQUIRE([CC_CHECK_WERROR]) 303 | AC_CACHE_CHECK([highest __attribute__ ((aligned ())) supported], 304 | [cc_cv_attribute_aligned], 305 | [ac_save_CFLAGS="$CFLAGS" 306 | CFLAGS="$CFLAGS $cc_cv_werror" 307 | for cc_attribute_align_try in 64 32 16 8 4 2; do 308 | AC_COMPILE_IFELSE([AC_LANG_SOURCE([ 309 | int main() { 310 | static char c __attribute__ ((aligned($cc_attribute_align_try))) = 0; 311 | return c; 312 | }])], [cc_cv_attribute_aligned=$cc_attribute_align_try; break]) 313 | done 314 | CFLAGS="$ac_save_CFLAGS" 315 | ]) 316 | 317 | if test "x$cc_cv_attribute_aligned" != "x"; then 318 | AC_DEFINE_UNQUOTED([ATTRIBUTE_ALIGNED_MAX], [$cc_cv_attribute_aligned], 319 | [Define the highest alignment supported]) 320 | fi 321 | ]) 322 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | 3 | set(RN_NOISE_SRC 4 | denoise.c 5 | celt_lpc.c 6 | pitch.c 7 | rnn.c 8 | rnn_data.c) 9 | 10 | set(LIBRARY_OUTPUT_PATH "${PROJECT_BINARY_DIR}/lib") 11 | include_directories("../include") 12 | add_library(rnnoise SHARED ${RN_NOISE_SRC}) 13 | add_library(rnnoise_static STATIC ${RN_NOISE_SRC}) 14 | set_target_properties(rnnoise_static PROPERTIES OUTPUT_NAME "rnnoise") 15 | -------------------------------------------------------------------------------- /src/arch.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2003-2008 Jean-Marc Valin 2 | Copyright (c) 2007-2008 CSIRO 3 | Copyright (c) 2007-2009 Xiph.Org Foundation 4 | Written by Jean-Marc Valin */ 5 | /** 6 | @file arch.h 7 | @brief Various architecture definitions for CELT 8 | */ 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef ARCH_H 35 | #define ARCH_H 36 | 37 | #include "opus_types.h" 38 | #include "common.h" 39 | 40 | # if !defined(__GNUC_PREREQ) 41 | # if defined(__GNUC__)&&defined(__GNUC_MINOR__) 42 | # define __GNUC_PREREQ(_maj,_min) \ 43 | ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) 44 | # else 45 | # define __GNUC_PREREQ(_maj,_min) 0 46 | # endif 47 | # endif 48 | 49 | #define CELT_SIG_SCALE 32768.f 50 | 51 | #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); 52 | #ifdef ENABLE_ASSERTIONS 53 | #include 54 | #include 55 | #ifdef __GNUC__ 56 | __attribute__((noreturn)) 57 | #endif 58 | static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) 59 | { 60 | fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); 61 | abort(); 62 | } 63 | #define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} 64 | #define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} 65 | #else 66 | #define celt_assert(cond) 67 | #define celt_assert2(cond, message) 68 | #endif 69 | 70 | #define IMUL32(a,b) ((a)*(b)) 71 | 72 | #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ 73 | #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ 74 | #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ 75 | #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ 76 | #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ 77 | #define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ 78 | #define UADD32(a,b) ((a)+(b)) 79 | #define USUB32(a,b) ((a)-(b)) 80 | 81 | /* Set this if opus_int64 is a native type of the CPU. */ 82 | /* Assume that all LP64 architectures have fast 64-bit types; also x86_64 83 | (which can be ILP32 for x32) and Win64 (which is LLP64). */ 84 | #if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) 85 | #define OPUS_FAST_INT64 1 86 | #else 87 | #define OPUS_FAST_INT64 0 88 | #endif 89 | 90 | #define PRINT_MIPS(file) 91 | 92 | #ifdef FIXED_POINT 93 | 94 | typedef opus_int16 opus_val16; 95 | typedef opus_int32 opus_val32; 96 | typedef opus_int64 opus_val64; 97 | 98 | typedef opus_val32 celt_sig; 99 | typedef opus_val16 celt_norm; 100 | typedef opus_val32 celt_ener; 101 | 102 | #define Q15ONE 32767 103 | 104 | #define SIG_SHIFT 12 105 | /* Safe saturation value for 32-bit signals. Should be less than 106 | 2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/ 107 | #define SIG_SAT (300000000) 108 | 109 | #define NORM_SCALING 16384 110 | 111 | #define DB_SHIFT 10 112 | 113 | #define EPSILON 1 114 | #define VERY_SMALL 0 115 | #define VERY_LARGE16 ((opus_val16)32767) 116 | #define Q15_ONE ((opus_val16)32767) 117 | 118 | #define SCALEIN(a) (a) 119 | #define SCALEOUT(a) (a) 120 | 121 | #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) 122 | #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) 123 | 124 | static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 125 | return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 126 | } 127 | 128 | #ifdef FIXED_DEBUG 129 | #include "fixed_debug.h" 130 | #else 131 | 132 | #include "fixed_generic.h" 133 | 134 | #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR 135 | #include "arm/fixed_arm64.h" 136 | #elif OPUS_ARM_INLINE_EDSP 137 | #include "arm/fixed_armv5e.h" 138 | #elif defined (OPUS_ARM_INLINE_ASM) 139 | #include "arm/fixed_armv4.h" 140 | #elif defined (BFIN_ASM) 141 | #include "fixed_bfin.h" 142 | #elif defined (TI_C5X_ASM) 143 | #include "fixed_c5x.h" 144 | #elif defined (TI_C6X_ASM) 145 | #include "fixed_c6x.h" 146 | #endif 147 | 148 | #endif 149 | 150 | #else /* FIXED_POINT */ 151 | 152 | typedef float opus_val16; 153 | typedef float opus_val32; 154 | typedef float opus_val64; 155 | 156 | typedef float celt_sig; 157 | typedef float celt_norm; 158 | typedef float celt_ener; 159 | 160 | #ifdef FLOAT_APPROX 161 | /* This code should reliably detect NaN/inf even when -ffast-math is used. 162 | Assumes IEEE 754 format. */ 163 | static OPUS_INLINE int celt_isnan(float x) 164 | { 165 | union {float f; opus_uint32 i;} in; 166 | in.f = x; 167 | return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; 168 | } 169 | #else 170 | #ifdef __FAST_MATH__ 171 | #error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input 172 | #endif 173 | #define celt_isnan(x) ((x)!=(x)) 174 | #endif 175 | 176 | #define Q15ONE 1.0f 177 | 178 | #define NORM_SCALING 1.f 179 | 180 | #define EPSILON 1e-15f 181 | #define VERY_SMALL 1e-30f 182 | #define VERY_LARGE16 1e15f 183 | #define Q15_ONE ((opus_val16)1.f) 184 | 185 | /* This appears to be the same speed as C99's fabsf() but it's more portable. */ 186 | #define ABS16(x) ((float)fabs(x)) 187 | #define ABS32(x) ((float)fabs(x)) 188 | 189 | #define QCONST16(x,bits) (x) 190 | #define QCONST32(x,bits) (x) 191 | 192 | #define NEG16(x) (-(x)) 193 | #define NEG32(x) (-(x)) 194 | #define NEG32_ovflw(x) (-(x)) 195 | #define EXTRACT16(x) (x) 196 | #define EXTEND32(x) (x) 197 | #define SHR16(a,shift) (a) 198 | #define SHL16(a,shift) (a) 199 | #define SHR32(a,shift) (a) 200 | #define SHL32(a,shift) (a) 201 | #define PSHR32(a,shift) (a) 202 | #define VSHR32(a,shift) (a) 203 | 204 | #define PSHR(a,shift) (a) 205 | #define SHR(a,shift) (a) 206 | #define SHL(a,shift) (a) 207 | #define SATURATE(x,a) (x) 208 | #define SATURATE16(x) (x) 209 | 210 | #define ROUND16(a,shift) (a) 211 | #define SROUND16(a,shift) (a) 212 | #define HALF16(x) (.5f*(x)) 213 | #define HALF32(x) (.5f*(x)) 214 | 215 | #define ADD16(a,b) ((a)+(b)) 216 | #define SUB16(a,b) ((a)-(b)) 217 | #define ADD32(a,b) ((a)+(b)) 218 | #define SUB32(a,b) ((a)-(b)) 219 | #define ADD32_ovflw(a,b) ((a)+(b)) 220 | #define SUB32_ovflw(a,b) ((a)-(b)) 221 | #define MULT16_16_16(a,b) ((a)*(b)) 222 | #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) 223 | #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) 224 | 225 | #define MULT16_32_Q15(a,b) ((a)*(b)) 226 | #define MULT16_32_Q16(a,b) ((a)*(b)) 227 | 228 | #define MULT32_32_Q31(a,b) ((a)*(b)) 229 | 230 | #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) 231 | #define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) 232 | 233 | #define MULT16_16_Q11_32(a,b) ((a)*(b)) 234 | #define MULT16_16_Q11(a,b) ((a)*(b)) 235 | #define MULT16_16_Q13(a,b) ((a)*(b)) 236 | #define MULT16_16_Q14(a,b) ((a)*(b)) 237 | #define MULT16_16_Q15(a,b) ((a)*(b)) 238 | #define MULT16_16_P15(a,b) ((a)*(b)) 239 | #define MULT16_16_P13(a,b) ((a)*(b)) 240 | #define MULT16_16_P14(a,b) ((a)*(b)) 241 | #define MULT16_32_P16(a,b) ((a)*(b)) 242 | 243 | #define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) 244 | #define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) 245 | 246 | #define SCALEIN(a) ((a)*CELT_SIG_SCALE) 247 | #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) 248 | 249 | #define SIG2WORD16(x) (x) 250 | 251 | #endif /* !FIXED_POINT */ 252 | 253 | #ifndef GLOBAL_STACK_SIZE 254 | #ifdef FIXED_POINT 255 | #define GLOBAL_STACK_SIZE 120000 256 | #else 257 | #define GLOBAL_STACK_SIZE 120000 258 | #endif 259 | #endif 260 | 261 | #endif /* ARCH_H */ 262 | -------------------------------------------------------------------------------- /src/celt_lpc.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009-2010 Xiph.Org Foundation 2 | Written by Jean-Marc Valin */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifdef HAVE_CONFIG_H 29 | #include "config.h" 30 | #endif 31 | 32 | #include "celt_lpc.h" 33 | #include "arch.h" 34 | #include "common.h" 35 | #include "pitch.h" 36 | 37 | void _celt_lpc( 38 | opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ 39 | const opus_val32 *ac, /* in: [0...p] autocorrelation values */ 40 | int p 41 | ) { 42 | int i, j; 43 | opus_val32 r; 44 | opus_val32 error = ac[0]; 45 | #ifdef FIXED_POINT 46 | opus_val32 lpc[LPC_ORDER]; 47 | #else 48 | float *lpc = _lpc; 49 | #endif 50 | memset(lpc, 0, p * sizeof(*lpc)); 51 | if (ac[0] != 0) { 52 | for (i = 0; i < p; i++) { 53 | /* Sum up this iteration's reflection coefficient */ 54 | opus_val32 rr = 0; 55 | for (j = 0; j < i; j++) 56 | rr += MULT32_32_Q31(lpc[j], ac[i - j]); 57 | rr += SHR32(ac[i + 1], 3); 58 | r = -SHL32(rr, 3) / error; 59 | /* Update LPC coefficients and total error */ 60 | lpc[i] = SHR32(r, 3); 61 | for (j = 0; j < (i + 1) >> 1; j++) { 62 | opus_val32 tmp1, tmp2; 63 | tmp1 = lpc[j]; 64 | tmp2 = lpc[i - 1 - j]; 65 | lpc[j] = tmp1 + MULT32_32_Q31(r, tmp2); 66 | lpc[i - 1 - j] = tmp2 + MULT32_32_Q31(r, tmp1); 67 | } 68 | 69 | error = error - MULT32_32_Q31(MULT32_32_Q31(r, r), error); 70 | /* Bail out once we get 30 dB gain */ 71 | #ifdef FIXED_POINT 72 | if (error < SHR32(ac[0], 10)) 73 | break; 74 | #else 75 | if (error < .001f * ac[0]) 76 | break; 77 | #endif 78 | } 79 | } 80 | #ifdef FIXED_POINT 81 | for (i = 0; i < p; i++) 82 | _lpc[i] = ROUND16(lpc[i], 16); 83 | #endif 84 | } 85 | 86 | 87 | void celt_fir( 88 | const opus_val16 *x, 89 | const opus_val16 *num, 90 | opus_val16 *y, 91 | int N, 92 | int ord) { 93 | int i, j; 94 | opus_val16 *rnum = rnnoise_alloc(ord * sizeof(opus_val16)); 95 | if (rnum == NULL) { 96 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 97 | return; 98 | } 99 | 100 | for (i = 0; i < ord; i++) 101 | rnum[i] = num[ord - i - 1]; 102 | for (i = 0; i < N - 3; i += 4) { 103 | opus_val32 sum[4]; 104 | sum[0] = SHL32(EXTEND32(x[i]), SIG_SHIFT); 105 | sum[1] = SHL32(EXTEND32(x[i + 1]), SIG_SHIFT), 106 | sum[2] = SHL32(EXTEND32(x[i + 2]), SIG_SHIFT); 107 | sum[3] = SHL32(EXTEND32(x[i + 3]), SIG_SHIFT); 108 | xcorr_kernel(rnum, x + i - ord, sum, ord); 109 | y[i] = ROUND16(sum[0], SIG_SHIFT); 110 | y[i + 1] = ROUND16(sum[1], SIG_SHIFT); 111 | y[i + 2] = ROUND16(sum[2], SIG_SHIFT); 112 | y[i + 3] = ROUND16(sum[3], SIG_SHIFT); 113 | } 114 | for (; i < N; i++) { 115 | opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); 116 | for (j = 0; j < ord; j++) 117 | sum = MAC16_16(sum, rnum[j], x[i + j - ord]); 118 | y[i] = ROUND16(sum, SIG_SHIFT); 119 | } 120 | rnnoise_free(rnum); 121 | } 122 | 123 | void celt_iir(const opus_val32 *_x, 124 | const opus_val16 *den, 125 | opus_val32 *_y, 126 | int N, 127 | int ord, 128 | opus_val16 *mem) { 129 | #ifdef SMALL_FOOTPRINT 130 | int i, j; 131 | for (i = 0; i < N; i++) 132 | { 133 | opus_val32 sum = _x[i]; 134 | for (j = 0; j < ord; j++) 135 | { 136 | sum -= MULT16_16(den[j], mem[j]); 137 | } 138 | for (j = ord - 1; j >= 1; j--) 139 | { 140 | mem[j] = mem[j - 1]; 141 | } 142 | mem[0] = SROUND16(sum, SIG_SHIFT); 143 | _y[i] = sum; 144 | } 145 | #else 146 | int i, j; 147 | celt_assert((ord & 3) == 0); 148 | opus_val16 *rden = rnnoise_alloc(ord * sizeof(opus_val16)); 149 | opus_val16 *y = rnnoise_alloc((N + ord) * sizeof(opus_val16)); 150 | if (rden == NULL || y == NULL) { 151 | rnnoise_free(rden); 152 | rnnoise_free(y); 153 | 154 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 155 | return; 156 | } 157 | for (i = 0; i < ord; i++) 158 | rden[i] = den[ord - i - 1]; 159 | for (i = 0; i < ord; i++) 160 | y[i] = -mem[ord - i - 1]; 161 | for (; i < N + ord; i++) 162 | y[i] = 0; 163 | for (i = 0; i < N - 3; i += 4) { 164 | /* Unroll by 4 as if it were an FIR filter */ 165 | opus_val32 sum[4]; 166 | sum[0] = _x[i]; 167 | sum[1] = _x[i + 1]; 168 | sum[2] = _x[i + 2]; 169 | sum[3] = _x[i + 3]; 170 | xcorr_kernel(rden, y + i, sum, ord); 171 | 172 | /* Patch up the result to compensate for the fact that this is an IIR */ 173 | y[i + ord] = -SROUND16(sum[0], SIG_SHIFT); 174 | _y[i] = sum[0]; 175 | sum[1] = MAC16_16(sum[1], y[i + ord], den[0]); 176 | y[i + ord + 1] = -SROUND16(sum[1], SIG_SHIFT); 177 | _y[i + 1] = sum[1]; 178 | sum[2] = MAC16_16(sum[2], y[i + ord + 1], den[0]); 179 | sum[2] = MAC16_16(sum[2], y[i + ord], den[1]); 180 | y[i + ord + 2] = -SROUND16(sum[2], SIG_SHIFT); 181 | _y[i + 2] = sum[2]; 182 | 183 | sum[3] = MAC16_16(sum[3], y[i + ord + 2], den[0]); 184 | sum[3] = MAC16_16(sum[3], y[i + ord + 1], den[1]); 185 | sum[3] = MAC16_16(sum[3], y[i + ord], den[2]); 186 | y[i + ord + 3] = -SROUND16(sum[3], SIG_SHIFT); 187 | _y[i + 3] = sum[3]; 188 | } 189 | for (; i < N; i++) { 190 | opus_val32 sum = _x[i]; 191 | for (j = 0; j < ord; j++) 192 | sum -= MULT16_16(rden[j], y[i + j]); 193 | y[i + ord] = SROUND16(sum, SIG_SHIFT); 194 | _y[i] = sum; 195 | } 196 | for (i = 0; i < ord; i++) 197 | mem[i] = _y[N - i - 1]; 198 | #endif 199 | rnnoise_free(rden); 200 | rnnoise_free(y); 201 | } 202 | 203 | int _celt_autocorr( 204 | const opus_val16 *x, /* in: [0...n-1] samples x */ 205 | opus_val32 *ac, /* out: [0...lag-1] ac values */ 206 | const opus_val16 *window, 207 | int overlap, 208 | int lag, 209 | int n) { 210 | opus_val32 d; 211 | int i, k; 212 | int fastN = n - lag; 213 | int shift; 214 | const opus_val16 *xptr; 215 | opus_val16 *xx = rnnoise_alloc(n * sizeof(opus_val16)); 216 | if (xx == NULL) { 217 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 218 | return 0; 219 | } 220 | celt_assert(n > 0); 221 | celt_assert(overlap >= 0); 222 | if (overlap == 0) { 223 | xptr = x; 224 | } else { 225 | memcpy(xx, x, sizeof(opus_val16) * n); 226 | for (i = 0; i < overlap; i++) { 227 | xx[i] = MULT16_16_Q15(x[i], window[i]); 228 | xx[n - i - 1] = MULT16_16_Q15(x[n - i - 1], window[i]); 229 | } 230 | xptr = xx; 231 | } 232 | shift = 0; 233 | #ifdef FIXED_POINT 234 | { 235 | opus_val32 ac0; 236 | ac0 = 1 + (n << 7); 237 | if (n & 1) ac0 += SHR32(MULT16_16(xptr[0], xptr[0]), 9); 238 | for (i = (n & 1); i < n; i += 2) 239 | { 240 | ac0 += SHR32(MULT16_16(xptr[i], xptr[i]), 9); 241 | ac0 += SHR32(MULT16_16(xptr[i + 1], xptr[i + 1]), 9); 242 | } 243 | 244 | shift = celt_ilog2(ac0) - 30 + 10; 245 | shift = (shift) / 2; 246 | if (shift > 0) 247 | { 248 | for (i = 0; i < n; i++) 249 | xx[i] = PSHR32(xptr[i], shift); 250 | xptr = xx; 251 | } 252 | else 253 | shift = 0; 254 | } 255 | #endif 256 | celt_pitch_xcorr(xptr, xptr, ac, fastN, lag + 1); 257 | for (k = 0; k <= lag; k++) { 258 | for (i = k + fastN, d = 0; i < n; i++) 259 | d = MAC16_16(d, xptr[i], xptr[i - k]); 260 | ac[k] += d; 261 | } 262 | #ifdef FIXED_POINT 263 | shift = 2 * shift; 264 | if (shift <= 0) 265 | ac[0] += SHL32((opus_int32)1, -shift); 266 | if (ac[0] < 268435456) 267 | { 268 | int shift2 = 29 - EC_ILOG(ac[0]); 269 | for (i = 0; i <= lag; i++) 270 | ac[i] = SHL32(ac[i], shift2); 271 | shift -= shift2; 272 | } 273 | else if (ac[0] >= 536870912) 274 | { 275 | int shift2 = 1; 276 | if (ac[0] >= 1073741824) 277 | shift2++; 278 | for (i = 0; i <= lag; i++) 279 | ac[i] = SHR32(ac[i], shift2); 280 | shift += shift2; 281 | } 282 | #endif 283 | rnnoise_free(xx); 284 | return shift; 285 | } 286 | -------------------------------------------------------------------------------- /src/celt_lpc.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009-2010 Xiph.Org Foundation 2 | Written by Jean-Marc Valin */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifndef PLC_H 29 | #define PLC_H 30 | 31 | #include "arch.h" 32 | #include "common.h" 33 | 34 | #if defined(OPUS_X86_MAY_HAVE_SSE4_1) 35 | #include "x86/celt_lpc_sse.h" 36 | #endif 37 | 38 | #define LPC_ORDER 24 39 | 40 | void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); 41 | 42 | void celt_fir( 43 | const opus_val16 *x, 44 | const opus_val16 *num, 45 | opus_val16 *y, 46 | int N, 47 | int ord); 48 | 49 | void celt_iir(const opus_val32 *x, 50 | const opus_val16 *den, 51 | opus_val32 *y, 52 | int N, 53 | int ord, 54 | opus_val16 *mem); 55 | 56 | int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, 57 | const opus_val16 *window, int overlap, int lag, int n); 58 | 59 | #endif /* PLC_H */ 60 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef COMMON_H 4 | #define COMMON_H 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define RNN_INLINE inline 11 | #define OPUS_INLINE inline 12 | #ifndef M_PI 13 | #define M_PI 3.14159265358979323846 14 | #endif 15 | /** RNNoise wrapper for malloc(). To do your own dynamic allocation, all you need t 16 | o do is replace this function and rnnoise_free */ 17 | #ifndef OVERRIDE_RNNOISE_ALLOC 18 | 19 | static RNN_INLINE void *rnnoise_alloc(size_t size) { 20 | return malloc(size); 21 | } 22 | 23 | #endif 24 | 25 | /** RNNoise wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and rnnoise_alloc */ 26 | #ifndef OVERRIDE_RNNOISE_FREE 27 | 28 | static RNN_INLINE void rnnoise_free(void *ptr) { 29 | if (ptr) 30 | free(ptr); 31 | } 32 | 33 | #endif 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/denoise.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2017 Mozilla */ 2 | /* 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | - Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifdef HAVE_CONFIG_H 28 | #include "config.h" 29 | #endif 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include "common.h" 37 | #include 38 | #include "rnnoise.h" 39 | #include "pitch.h" 40 | #include "arch.h" 41 | #include "rnn.h" 42 | #include "rnn_data.h" 43 | 44 | #define STB_FFT_IMPLEMENTAION 45 | 46 | #include "stb_fft.h" 47 | 48 | #define FRAME_SIZE_SHIFT 2 49 | #define FRAME_SIZE (120<analysis_mem, sizeof(float) * FRAME_SIZE); 337 | memcpy(x + FRAME_SIZE, in, sizeof(float) * FRAME_SIZE); 338 | memcpy(st->analysis_mem, in, sizeof(float) * FRAME_SIZE); 339 | forward_transform(X, x); 340 | #if TRAINING 341 | for (int i = lowpass; i < FREQ_SIZE; i++) 342 | X[i].real = X[i].imag = 0; 343 | #endif 344 | compute_band_energy(Ex, X); 345 | rnnoise_free(x); 346 | } 347 | 348 | static int compute_frame_features(DenoiseState *st, cmplx *X, cmplx *P, 349 | float *Ex, float *Ep, float *Exp, float *features, const float *in) { 350 | int i; 351 | float E = 0; 352 | float *ceps_0, *ceps_1, *ceps_2; 353 | float spec_variability = 0; 354 | float Ly[NB_BANDS]; 355 | float *p = rnnoise_alloc(WINDOW_SIZE * sizeof(float)); 356 | if (p == NULL) { 357 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 358 | return -1; 359 | } 360 | float pitch_buf[PITCH_BUF_SIZE >> 1]; 361 | int pitch_index; 362 | float gain; 363 | float *(pre[1]); 364 | float tmp[NB_BANDS]; 365 | float follow, logMax; 366 | frame_analysis(st, X, Ex, in); 367 | memmove(st->pitch_buf, st->pitch_buf + FRAME_SIZE, (PITCH_BUF_SIZE - FRAME_SIZE) * sizeof(*st->pitch_buf)); 368 | memcpy(&st->pitch_buf[PITCH_BUF_SIZE - FRAME_SIZE], in, FRAME_SIZE * sizeof(*st->pitch_buf)); 369 | pre[0] = &st->pitch_buf[0]; 370 | pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1); 371 | pitch_search(pitch_buf + (PITCH_MAX_PERIOD >> 1), pitch_buf, PITCH_FRAME_SIZE, 372 | PITCH_MAX_PERIOD - 3 * PITCH_MIN_PERIOD, &pitch_index); 373 | pitch_index = PITCH_MAX_PERIOD - pitch_index; 374 | 375 | gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD, 376 | PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain); 377 | st->last_period = pitch_index; 378 | st->last_gain = gain; 379 | memcpy(p, st->pitch_buf + PITCH_BUF_SIZE - WINDOW_SIZE - pitch_index, sizeof(float) * WINDOW_SIZE); 380 | forward_transform(P, p); 381 | compute_band_energy(Ep, P); 382 | compute_band_corr(Exp, X, P); 383 | for (i = 0; i < NB_BANDS; i++) 384 | Exp[i] = Exp[i] * (1.0f / sqrtf(.001f + Ex[i] * Ep[i])); 385 | dct(tmp, Exp); 386 | memcpy(features + NB_BANDS + 2 * NB_DELTA_CEPS, tmp, sizeof(float) * NB_DELTA_CEPS); 387 | features[NB_BANDS + 2 * NB_DELTA_CEPS] -= 1.3f; 388 | features[NB_BANDS + 2 * NB_DELTA_CEPS + 1] -= 0.9f; 389 | features[NB_BANDS + 3 * NB_DELTA_CEPS] = .01f * (pitch_index - 300); 390 | logMax = -2; 391 | follow = -2; 392 | for (i = 0; i < NB_BANDS; i++) { 393 | Ly[i] = log10f(1e-2f + Ex[i]); 394 | Ly[i] = MAX16(logMax - 7, MAX16(follow - 1.5f, Ly[i])); 395 | logMax = MAX16(logMax, Ly[i]); 396 | follow = MAX16(follow - 1.5f, Ly[i]); 397 | E += Ex[i]; 398 | } 399 | if (!TRAINING && E < 0.04f) { 400 | /* If there's no audio, avoid messing up the state. */ 401 | memset(features, 0, (NB_FEATURES) * sizeof(*features)); 402 | rnnoise_free(p); 403 | return 1; 404 | } 405 | dct(features, Ly); 406 | features[0] -= 12; 407 | features[1] -= 4; 408 | ceps_0 = st->cepstral_mem[st->memid]; 409 | ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM + st->memid - 1] : st->cepstral_mem[st->memid - 1]; 410 | ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM + st->memid - 2] : st->cepstral_mem[st->memid - 2]; 411 | memcpy(ceps_0, features, sizeof(float) * NB_BANDS); 412 | st->memid++; 413 | for (i = 0; i < NB_DELTA_CEPS; i++) { 414 | features[i] = ceps_0[i] + ceps_2[i]; 415 | features[NB_BANDS + i] = ceps_0[i] - ceps_2[i]; 416 | features[NB_BANDS + NB_DELTA_CEPS + i] = features[i] - 2 * ceps_1[i]; 417 | features[i] += ceps_1[i]; 418 | } 419 | /* Spectral variability features. */ 420 | if (st->memid == CEPS_MEM) st->memid = 0; 421 | for (i = 0; i < CEPS_MEM; i++) { 422 | int j; 423 | float mindist = 1e15f; 424 | for (j = 0; j < CEPS_MEM; j++) { 425 | int k; 426 | float dist = 0; 427 | for (k = 0; k < NB_BANDS; k++) { 428 | float tmp; 429 | tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k]; 430 | dist += tmp * tmp; 431 | } 432 | if (j != i) 433 | mindist = MIN32(mindist, dist); 434 | } 435 | spec_variability += mindist; 436 | } 437 | features[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = spec_variability / CEPS_MEM - 2.1f; 438 | rnnoise_free(p); 439 | return TRAINING && E < 0.1f; 440 | } 441 | 442 | static void frame_synthesis(DenoiseState *st, float *out, const cmplx *input) { 443 | float *x = rnnoise_alloc(WINDOW_SIZE * sizeof(float)); 444 | if (x == NULL) { 445 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 446 | return; 447 | } 448 | int i; 449 | inverse_transform(x, input); 450 | for (i = 0; i < FRAME_SIZE; i++) 451 | out[i] = x[i] + st->synthesis_mem[i]; 452 | memcpy(st->synthesis_mem, x + FRAME_SIZE, FRAME_SIZE * sizeof(float)); 453 | rnnoise_free(x); 454 | } 455 | 456 | static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) { 457 | int i; 458 | for (i = 0; i < N; i++) { 459 | float xi, yi; 460 | xi = x[i]; 461 | yi = x[i] + mem[0]; 462 | mem[0] = mem[1] + (b[0] * xi - a[0] * yi); 463 | mem[1] = (b[1] * xi - a[1] * yi); 464 | y[i] = yi; 465 | } 466 | } 467 | 468 | void pitch_filter(cmplx *X, const cmplx *P, const float *Ex, const float *Ep, 469 | const float *Exp, const float *g) { 470 | int i; 471 | float r[NB_BANDS]; 472 | float *rf = rnnoise_alloc(FREQ_SIZE * sizeof(float)); 473 | if (rf == NULL) { 474 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 475 | return; 476 | } 477 | memset(rf, 0, FREQ_SIZE * sizeof(float)); 478 | 479 | for (i = 0; i < NB_BANDS; i++) { 480 | #if 0 481 | if (Exp[i]>g[i]) r[i] = 1; 482 | else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i])); 483 | r[i] = MIN16(1, MAX16(0, r[i])); 484 | #else 485 | if (Exp[i] > g[i]) 486 | r[i] = 1; 487 | else 488 | r[i] = SQUARE(Exp[i]) * (1 - SQUARE(g[i])) / (.001f + SQUARE(g[i]) * (1 - SQUARE(Exp[i]))); 489 | r[i] = sqrtf(MIN16(1, MAX16(0, r[i]))); 490 | #endif 491 | r[i] *= sqrtf(Ex[i] / (1e-8f + Ep[i])); 492 | } 493 | interp_band_gain(rf, r); 494 | for (i = 0; i < FREQ_SIZE; i++) { 495 | X[i].real += rf[i] * P[i].real; 496 | X[i].imag += rf[i] * P[i].imag; 497 | } 498 | float newE[NB_BANDS]; 499 | compute_band_energy(newE, X); 500 | float norm[NB_BANDS]; 501 | float *normf = rnnoise_alloc(FREQ_SIZE * sizeof(float)); 502 | if (normf == NULL) { 503 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 504 | rnnoise_free(rf); 505 | return; 506 | } 507 | for (i = 0; i < NB_BANDS; i++) { 508 | norm[i] = sqrtf(Ex[i] / (1e-8 + newE[i])); 509 | } 510 | interp_band_gain(normf, norm); 511 | for (i = 0; i < FREQ_SIZE; i++) { 512 | X[i].real *= normf[i]; 513 | X[i].imag *= normf[i]; 514 | } 515 | rnnoise_free(rf); 516 | rnnoise_free(normf); 517 | } 518 | 519 | float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) { 520 | int i; 521 | cmplx *input = rnnoise_alloc(FREQ_SIZE * sizeof(cmplx)); 522 | cmplx *P = rnnoise_alloc(WINDOW_SIZE * sizeof(cmplx)); 523 | float *x = rnnoise_alloc(FRAME_SIZE * sizeof(float)); 524 | if ((input == NULL) || (P == NULL) || (x == NULL)) { 525 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 526 | rnnoise_free(P); 527 | rnnoise_free(x); 528 | rnnoise_free(input); 529 | return 0; 530 | } 531 | float Ex[NB_BANDS], Ep[NB_BANDS]; 532 | float Exp[NB_BANDS]; 533 | float features[NB_FEATURES]; 534 | float g[NB_BANDS]; 535 | float gf[FREQ_SIZE] = {1}; 536 | float vad_prob = 0; 537 | int silence; 538 | static const float a_hp[2] = {-1.99599, 0.99600}; 539 | static const float b_hp[2] = {-2, 1}; 540 | biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE); 541 | silence = compute_frame_features(st, input, P, Ex, Ep, Exp, features, x); 542 | 543 | if (!silence) { 544 | compute_rnn(&st->rnn, g, &vad_prob, features); 545 | pitch_filter(input, P, Ex, Ep, Exp, g); 546 | for (i = 0; i < NB_BANDS; i++) { 547 | float alpha = .6f; 548 | g[i] = MAX16(g[i], alpha * st->lastg[i]); 549 | st->lastg[i] = g[i]; 550 | } 551 | interp_band_gain(gf, g); 552 | #if 1 553 | for (i = 0; i < FREQ_SIZE; i++) { 554 | input[i].real *= gf[i]; 555 | input[i].imag *= gf[i]; 556 | } 557 | #endif 558 | } 559 | 560 | frame_synthesis(st, out, input); 561 | rnnoise_free(input); 562 | rnnoise_free(P); 563 | rnnoise_free(x); 564 | return vad_prob; 565 | } 566 | 567 | #if TRAINING 568 | 569 | static float uni_rand() { 570 | return rand()/(double)RAND_MAX-.5; 571 | } 572 | 573 | static void rand_resp(float *a, float *b) { 574 | a[0] = .75*uni_rand(); 575 | a[1] = .75*uni_rand(); 576 | b[0] = .75*uni_rand(); 577 | b[1] = .75*uni_rand(); 578 | } 579 | 580 | int main(int argc, char **argv) { 581 | int i; 582 | int count=0; 583 | static const float a_hp[2] = {-1.99599, 0.99600}; 584 | static const float b_hp[2] = {-2, 1}; 585 | float a_noise[2] = {0}; 586 | float b_noise[2] = {0}; 587 | float a_sig[2] = {0}; 588 | float b_sig[2] = {0}; 589 | float mem_hp_x[2]={0}; 590 | float mem_hp_n[2]={0}; 591 | float mem_resp_x[2]={0}; 592 | float mem_resp_n[2]={0}; 593 | float x[FRAME_SIZE]; 594 | float n[FRAME_SIZE]; 595 | float xn[FRAME_SIZE]; 596 | int vad_cnt=0; 597 | int gain_change_count=0; 598 | float speech_gain = 1, noise_gain = 1; 599 | FILE *f1, *f2, *fout; 600 | DenoiseState *st; 601 | DenoiseState *noise_state; 602 | DenoiseState *noisy; 603 | st = rnnoise_create(); 604 | noise_state = rnnoise_create(); 605 | noisy = rnnoise_create(); 606 | if (argc!=5) { 607 | fprintf(stderr, "usage: %s \n", argv[0]); 608 | return 1; 609 | } 610 | f1 = fopen(argv[1], "r"); 611 | f2 = fopen(argv[2], "r"); 612 | fout = fopen(argv[4], "w"); 613 | for(i=0;i<150;i++) { 614 | short tmp[FRAME_SIZE]; 615 | fread(tmp, sizeof(short), FRAME_SIZE, f2); 616 | } 617 | while (1) { 618 | cmplx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE]; 619 | float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS]; 620 | float Exp[NB_BANDS]; 621 | float Ln[NB_BANDS]; 622 | float features[NB_FEATURES]; 623 | float g[NB_BANDS]; 624 | float gf[FREQ_SIZE]={1}; 625 | short tmp[FRAME_SIZE]; 626 | float vad=0; 627 | float vad_prob; 628 | float E=0; 629 | if (count == atoi(argv[3])) break; 630 | if (++gain_change_count > 2821) { 631 | speech_gain = pow(10., (-40+(rand()%60))/20.); 632 | noise_gain = pow(10., (-30+(rand()%50))/20.); 633 | if (rand()%10==0) noise_gain = 0; 634 | noise_gain *= speech_gain; 635 | if (rand()%10==0) speech_gain = 0; 636 | gain_change_count = 0; 637 | rand_resp(a_noise, b_noise); 638 | rand_resp(a_sig, b_sig); 639 | lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX); 640 | for (i=0;i lowpass) { 642 | band_lp = i; 643 | break; 644 | } 645 | } 646 | } 647 | if (speech_gain != 0) { 648 | fread(tmp, sizeof(short), FRAME_SIZE, f1); 649 | if (feof(f1)) { 650 | rewind(f1); 651 | fread(tmp, sizeof(short), FRAME_SIZE, f1); 652 | } 653 | for (i=0;i 1e9f) { 675 | vad_cnt=0; 676 | } else if (E > 1e8f) { 677 | vad_cnt -= 5; 678 | } else if (E > 1e7f) { 679 | vad_cnt++; 680 | } else { 681 | vad_cnt+=2; 682 | } 683 | if (vad_cnt < 0) vad_cnt = 0; 684 | if (vad_cnt > 15) vad_cnt = 15; 685 | 686 | if (vad_cnt >= 10) vad = 0; 687 | else if (vad_cnt > 0) vad = 0.5f; 688 | else vad = 1.f; 689 | 690 | frame_analysis(st, Y, Ey, x); 691 | frame_analysis(noise_state, N, En, n); 692 | for (i=0;ilast_gain, noisy->last_period); 696 | for (i=0;i 1) g[i] = 1; 699 | if (silence || i > band_lp) g[i] = -1; 700 | if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1; 701 | if (vad==0 && noise_gain==0) g[i] = -1; 702 | } 703 | count++; 704 | #if 0 705 | for (i=0;irnn, g, &vad_prob, features); 718 | interp_band_gain(gf, g); 719 | #if 1 720 | for (i=0;i= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) 38 | #include 39 | 40 | typedef int16_t opus_int16; 41 | typedef uint16_t opus_uint16; 42 | typedef int32_t opus_int32; 43 | typedef uint32_t opus_uint32; 44 | #elif defined(_WIN32) 45 | 46 | # if defined(__CYGWIN__) 47 | # include <_G_config.h> 48 | typedef _G_int32_t opus_int32; 49 | typedef _G_uint32_t opus_uint32; 50 | typedef _G_int16 opus_int16; 51 | typedef _G_uint16 opus_uint16; 52 | # elif defined(__MINGW32__) 53 | typedef short opus_int16; 54 | typedef unsigned short opus_uint16; 55 | typedef int opus_int32; 56 | typedef unsigned int opus_uint32; 57 | # elif defined(__MWERKS__) 58 | typedef int opus_int32; 59 | typedef unsigned int opus_uint32; 60 | typedef short opus_int16; 61 | typedef unsigned short opus_uint16; 62 | # else 63 | /* MSVC/Borland */ 64 | typedef __int32 opus_int32; 65 | typedef unsigned __int32 opus_uint32; 66 | typedef __int16 opus_int16; 67 | typedef unsigned __int16 opus_uint16; 68 | # endif 69 | 70 | #elif defined(__MACOS__) 71 | 72 | # include 73 | typedef SInt16 opus_int16; 74 | typedef UInt16 opus_uint16; 75 | typedef SInt32 opus_int32; 76 | typedef UInt32 opus_uint32; 77 | 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ 79 | 80 | # include 81 | typedef int16_t opus_int16; 82 | typedef u_int16_t opus_uint16; 83 | typedef int32_t opus_int32; 84 | typedef u_int32_t opus_uint32; 85 | 86 | #elif defined(__BEOS__) 87 | 88 | /* Be */ 89 | # include 90 | typedef int16 opus_int16; 91 | typedef u_int16 opus_uint16; 92 | typedef int32_t opus_int32; 93 | typedef u_int32_t opus_uint32; 94 | 95 | #elif defined (__EMX__) 96 | 97 | /* OS/2 GCC */ 98 | typedef short opus_int16; 99 | typedef unsigned short opus_uint16; 100 | typedef int opus_int32; 101 | typedef unsigned int opus_uint32; 102 | 103 | #elif defined (DJGPP) 104 | 105 | /* DJGPP */ 106 | typedef short opus_int16; 107 | typedef unsigned short opus_uint16; 108 | typedef int opus_int32; 109 | typedef unsigned int opus_uint32; 110 | 111 | #elif defined(R5900) 112 | 113 | /* PS2 EE */ 114 | typedef int opus_int32; 115 | typedef unsigned opus_uint32; 116 | typedef short opus_int16; 117 | typedef unsigned short opus_uint16; 118 | 119 | #elif defined(__SYMBIAN32__) 120 | 121 | /* Symbian GCC */ 122 | typedef signed short opus_int16; 123 | typedef unsigned short opus_uint16; 124 | typedef signed int opus_int32; 125 | typedef unsigned int opus_uint32; 126 | 127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) 128 | 129 | typedef short opus_int16; 130 | typedef unsigned short opus_uint16; 131 | typedef long opus_int32; 132 | typedef unsigned long opus_uint32; 133 | 134 | #elif defined(CONFIG_TI_C6X) 135 | 136 | typedef short opus_int16; 137 | typedef unsigned short opus_uint16; 138 | typedef int opus_int32; 139 | typedef unsigned int opus_uint32; 140 | 141 | #else 142 | 143 | /* Give up, take a reasonable guess */ 144 | typedef short opus_int16; 145 | typedef unsigned short opus_uint16; 146 | typedef int opus_int32; 147 | typedef unsigned int opus_uint32; 148 | 149 | #endif 150 | 151 | #define opus_int int /* used for counters etc; at least 16 bits */ 152 | #define opus_int64 long long 153 | #define opus_int8 signed char 154 | 155 | #define opus_uint unsigned int /* used for counters etc; at least 16 bits */ 156 | #define opus_uint64 unsigned long long 157 | #define opus_uint8 unsigned char 158 | 159 | #endif /* OPUS_TYPES_H */ 160 | -------------------------------------------------------------------------------- /src/pitch.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007-2008 CSIRO 2 | Copyright (c) 2007-2009 Xiph.Org Foundation 3 | Written by Jean-Marc Valin */ 4 | /** 5 | @file pitch.c 6 | @brief Pitch analysis 7 | */ 8 | 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifdef HAVE_CONFIG_H 35 | #include "config.h" 36 | #endif 37 | 38 | #include "pitch.h" 39 | #include "common.h" 40 | #include "celt_lpc.h" 41 | #include "math.h" 42 | 43 | static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, 44 | int max_pitch, int *best_pitch 45 | #ifdef FIXED_POINT 46 | , int yshift, opus_val32 maxcorr 47 | #endif 48 | ) 49 | { 50 | int i, j; 51 | opus_val32 Syy=1; 52 | opus_val16 best_num[2]; 53 | opus_val32 best_den[2]; 54 | #ifdef FIXED_POINT 55 | int xshift; 56 | 57 | xshift = celt_ilog2(maxcorr)-14; 58 | #endif 59 | 60 | best_num[0] = -1; 61 | best_num[1] = -1; 62 | best_den[0] = 0; 63 | best_den[1] = 0; 64 | best_pitch[0] = 0; 65 | best_pitch[1] = 1; 66 | for (j=0;j0) 71 | { 72 | opus_val16 num; 73 | opus_val32 xcorr16; 74 | xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); 75 | #ifndef FIXED_POINT 76 | /* Considering the range of xcorr16, this should avoid both underflows 77 | and overflows (inf) when squaring xcorr16 */ 78 | xcorr16 *= 1e-12f; 79 | #endif 80 | num = MULT16_16_Q15(xcorr16,xcorr16); 81 | if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) 82 | { 83 | if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) 84 | { 85 | best_num[1] = best_num[0]; 86 | best_den[1] = best_den[0]; 87 | best_pitch[1] = best_pitch[0]; 88 | best_num[0] = num; 89 | best_den[0] = Syy; 90 | best_pitch[0] = i; 91 | } else { 92 | best_num[1] = num; 93 | best_den[1] = Syy; 94 | best_pitch[1] = i; 95 | } 96 | } 97 | } 98 | Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); 99 | Syy = MAX32(1, Syy); 100 | } 101 | } 102 | 103 | static void celt_fir5(const opus_val16 *x, 104 | const opus_val16 *num, 105 | opus_val16 *y, 106 | int N, 107 | opus_val16 *mem) 108 | { 109 | int i; 110 | opus_val16 num0, num1, num2, num3, num4; 111 | opus_val32 mem0, mem1, mem2, mem3, mem4; 112 | num0=num[0]; 113 | num1=num[1]; 114 | num2=num[2]; 115 | num3=num[3]; 116 | num4=num[4]; 117 | mem0=mem[0]; 118 | mem1=mem[1]; 119 | mem2=mem[2]; 120 | mem3=mem[3]; 121 | mem4=mem[4]; 122 | for (i=0;i>1;i++) 171 | x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); 172 | x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); 173 | if (C==2) 174 | { 175 | for (i=1;i>1;i++) 176 | x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); 177 | x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); 178 | } 179 | 180 | _celt_autocorr(x_lp, ac, NULL, 0, 181 | 4, len>>1); 182 | 183 | /* Noise floor -40 dB */ 184 | #ifdef FIXED_POINT 185 | ac[0] += SHR32(ac[0],13); 186 | #else 187 | ac[0] *= 1.0001f; 188 | #endif 189 | /* Lag windowing */ 190 | for (i=1;i<=4;i++) 191 | { 192 | /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ 193 | #ifdef FIXED_POINT 194 | ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); 195 | #else 196 | ac[i] -= ac[i]*(.008f*i)*(.008f*i); 197 | #endif 198 | } 199 | 200 | _celt_lpc(lpc, ac, 4); 201 | for (i=0;i<4;i++) 202 | { 203 | tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); 204 | lpc[i] = MULT16_16_Q15(lpc[i], tmp); 205 | } 206 | /* Add a zero */ 207 | lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); 208 | lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); 209 | lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); 210 | lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); 211 | lpc2[4] = MULT16_16_Q15(c1,lpc[3]); 212 | celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); 213 | } 214 | 215 | void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, 216 | opus_val32 *xcorr, int len, int max_pitch) 217 | { 218 | 219 | #if 0 /* This is a simple version of the pitch correlation that should work 220 | well on DSPs like Blackfin and TI C5x/C6x */ 221 | int i, j; 222 | #ifdef FIXED_POINT 223 | opus_val32 maxcorr=1; 224 | #endif 225 | for (i=0;i0); 248 | celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 249 | for (i=0;i 0); 298 | celt_assert(max_pitch > 0); 299 | lag = len + max_pitch; 300 | 301 | opus_val16 *x_lp4 = rnnoise_alloc((len >> 2) * sizeof(opus_val16)); 302 | opus_val16 *y_lp4 = rnnoise_alloc((lag >> 2) * sizeof(opus_val16)); 303 | opus_val32 *xcorr = rnnoise_alloc((max_pitch >> 1) * sizeof(opus_val32)); 304 | if (x_lp4 == NULL || y_lp4 == NULL || xcorr == NULL) { 305 | 306 | rnnoise_free(x_lp4); 307 | rnnoise_free(xcorr); 308 | rnnoise_free(y_lp4); 309 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 310 | return; 311 | } 312 | 313 | /* Downsample by 2 again */ 314 | for (j = 0; j < len >> 2; j++) 315 | x_lp4[j] = x_lp[2 * j]; 316 | for (j = 0; j < lag >> 2; j++) 317 | y_lp4[j] = y[2 * j]; 318 | 319 | #ifdef FIXED_POINT 320 | xmax = celt_maxabs16(x_lp4, len>>2); 321 | ymax = celt_maxabs16(y_lp4, lag>>2); 322 | shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; 323 | if (shift>0) 324 | { 325 | for (j=0;j>2;j++) 326 | x_lp4[j] = SHR16(x_lp4[j], shift); 327 | for (j=0;j>2;j++) 328 | y_lp4[j] = SHR16(y_lp4[j], shift); 329 | /* Use double the shift for a MAC */ 330 | shift *= 2; 331 | } else { 332 | shift = 0; 333 | } 334 | #endif 335 | 336 | /* Coarse search with 4x decimation */ 337 | 338 | #ifdef FIXED_POINT 339 | maxcorr = 340 | #endif 341 | celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len >> 2, max_pitch >> 2); 342 | 343 | find_best_pitch(xcorr, y_lp4, len >> 2, max_pitch >> 2, best_pitch 344 | #ifdef FIXED_POINT 345 | , 0, maxcorr 346 | #endif 347 | ); 348 | 349 | /* Finer search with 2x decimation */ 350 | #ifdef FIXED_POINT 351 | maxcorr=1; 352 | #endif 353 | for (i = 0; i < max_pitch >> 1; i++) { 354 | opus_val32 sum; 355 | xcorr[i] = 0; 356 | if (fastabs(i - 2 * best_pitch[0]) > 2 && fastabs(i - 2 * best_pitch[1]) > 2) 357 | continue; 358 | #ifdef FIXED_POINT 359 | sum = 0; 360 | for (j=0;j>1;j++) 361 | sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 362 | #else 363 | sum = celt_inner_prod(x_lp, y + i, len >> 1); 364 | #endif 365 | xcorr[i] = MAX32(-1, sum); 366 | #ifdef FIXED_POINT 367 | maxcorr = MAX32(maxcorr, sum); 368 | #endif 369 | } 370 | find_best_pitch(xcorr, y, len >> 1, max_pitch >> 1, best_pitch 371 | #ifdef FIXED_POINT 372 | , shift+1, maxcorr 373 | #endif 374 | ); 375 | 376 | /* Refine by pseudo-interpolation */ 377 | if (best_pitch[0] > 0 && best_pitch[0] < (max_pitch >> 1) - 1) { 378 | opus_val32 a, b, c; 379 | a = xcorr[best_pitch[0] - 1]; 380 | b = xcorr[best_pitch[0]]; 381 | c = xcorr[best_pitch[0] + 1]; 382 | if ((c - a) > MULT16_32_Q15(QCONST16(.7f, 15), b - a)) 383 | offset = 1; 384 | else if ((a - c) > MULT16_32_Q15(QCONST16(.7f, 15), b - c)) 385 | offset = -1; 386 | else 387 | offset = 0; 388 | } else { 389 | offset = 0; 390 | } 391 | *pitch = 2 * best_pitch[0] - offset; 392 | rnnoise_free(x_lp4); 393 | rnnoise_free(y_lp4); 394 | rnnoise_free(xcorr); 395 | } 396 | 397 | #ifdef FIXED_POINT 398 | static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) 399 | { 400 | opus_val32 x2y2; 401 | int sx, sy, shift; 402 | opus_val32 g; 403 | opus_val16 den; 404 | if (xy == 0 || xx == 0 || yy == 0) 405 | return 0; 406 | sx = celt_ilog2(xx)-14; 407 | sy = celt_ilog2(yy)-14; 408 | shift = sx + sy; 409 | x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14); 410 | if (shift & 1) { 411 | if (x2y2 < 32768) 412 | { 413 | x2y2 <<= 1; 414 | shift--; 415 | } else { 416 | x2y2 >>= 1; 417 | shift++; 418 | } 419 | } 420 | den = celt_rsqrt_norm(x2y2); 421 | g = MULT16_32_Q15(den, xy); 422 | g = VSHR32(g, (shift>>1)-1); 423 | return EXTRACT16(MIN32(g, Q15ONE)); 424 | } 425 | #else 426 | 427 | 428 | static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) { 429 | return xy * (1.0f/sqrtf(1 + xx * yy)); 430 | } 431 | 432 | #endif 433 | 434 | static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; 435 | 436 | opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 437 | int N, int *T0_, int prev_period, opus_val16 prev_gain) { 438 | int k, i, T, T0; 439 | opus_val16 g, g0; 440 | opus_val16 pg; 441 | opus_val32 xy, xx, yy, xy2; 442 | opus_val32 xcorr[3]; 443 | opus_val32 best_xy, best_yy; 444 | int offset; 445 | int minperiod0; 446 | 447 | minperiod0 = minperiod; 448 | maxperiod /= 2; 449 | minperiod /= 2; 450 | *T0_ /= 2; 451 | prev_period /= 2; 452 | N /= 2; 453 | x += maxperiod; 454 | if (*T0_ >= maxperiod) 455 | *T0_ = maxperiod - 1; 456 | 457 | T = T0 = *T0_; 458 | opus_val32 *yy_lookup = rnnoise_alloc((maxperiod + 1) * sizeof(opus_val32)); 459 | if (yy_lookup == NULL) { 460 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 461 | return 0; 462 | } 463 | dual_inner_prod(x, x, x - T0, N, &xx, &xy); 464 | yy_lookup[0] = xx; 465 | yy = xx; 466 | for (i = 1; i <= maxperiod; i++) { 467 | yy = yy + MULT16_16(x[-i], x[-i]) - MULT16_16(x[N - i], x[N - i]); 468 | yy_lookup[i] = MAX32(0, yy); 469 | } 470 | yy = yy_lookup[T0]; 471 | best_xy = xy; 472 | best_yy = yy; 473 | g = g0 = compute_pitch_gain(xy, xx, yy); 474 | /* Look for any pitch at T/k */ 475 | for (k = 2; k <= 15; k++) { 476 | int T1, T1b; 477 | opus_val16 g1; 478 | opus_val16 cont = 0; 479 | opus_val16 thresh; 480 | T1 = (2 * T0 + k) / (2 * k); 481 | if (T1 < minperiod) 482 | break; 483 | /* Look for another strong correlation at T1b */ 484 | if (k == 2) { 485 | if (T1 + T0 > maxperiod) 486 | T1b = T0; 487 | else 488 | T1b = T0 + T1; 489 | } else { 490 | T1b = (2 * second_check[k] * T0 + k) / (2 * k); 491 | } 492 | dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 493 | xy = HALF32(xy + xy2); 494 | yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); 495 | g1 = compute_pitch_gain(xy, xx, yy); 496 | if (fastabs(T1 - prev_period) <= 1) 497 | cont = prev_gain; 498 | else if (fastabs(T1 - prev_period) <= 2 && 5 * k * k < T0) 499 | cont = HALF16(prev_gain); 500 | else 501 | cont = 0; 502 | thresh = MAX16(QCONST16(.3f, 15), MULT16_16_Q15(QCONST16(.7f, 15), g0) - cont); 503 | /* Bias against very high pitch (very short period) to avoid false-positives 504 | due to short-term correlation */ 505 | if (T1 < 3 * minperiod) 506 | thresh = MAX16(QCONST16(.4f, 15), MULT16_16_Q15(QCONST16(.85f, 15), g0) - cont); 507 | else if (T1 < 2 * minperiod) 508 | thresh = MAX16(QCONST16(.5f, 15), MULT16_16_Q15(QCONST16(.9f, 15), g0) - cont); 509 | if (g1 > thresh) { 510 | best_xy = xy; 511 | best_yy = yy; 512 | T = T1; 513 | g = g1; 514 | } 515 | } 516 | best_xy = MAX32(0, best_xy); 517 | if (best_yy <= best_xy) 518 | pg = Q15ONE; 519 | else 520 | pg = best_xy / (best_yy + 1); 521 | 522 | for (k = 0; k < 3; k++) 523 | xcorr[k] = celt_inner_prod(x, x - (T + k - 1), N); 524 | if ((xcorr[2] - xcorr[0]) > MULT16_32_Q15(QCONST16(.7f, 15), xcorr[1] - xcorr[0])) 525 | offset = 1; 526 | else if ((xcorr[0] - xcorr[2]) > MULT16_32_Q15(QCONST16(.7f, 15), xcorr[1] - xcorr[2])) 527 | offset = -1; 528 | else 529 | offset = 0; 530 | if (pg > g) 531 | pg = g; 532 | *T0_ = 2 * T + offset; 533 | 534 | if (*T0_ < minperiod0) 535 | *T0_ = minperiod0; 536 | rnnoise_free(yy_lookup); 537 | return pg; 538 | } 539 | -------------------------------------------------------------------------------- /src/pitch.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007-2008 CSIRO 2 | Copyright (c) 2007-2009 Xiph.Org Foundation 3 | Written by Jean-Marc Valin */ 4 | /** 5 | @file pitch.h 6 | @brief Pitch analysis 7 | */ 8 | 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef PITCH_H 35 | #define PITCH_H 36 | 37 | //#include "modes.h" 38 | //#include "cpu_support.h" 39 | #include "arch.h" 40 | 41 | void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, 42 | int len, int C); 43 | 44 | void pitch_search(const opus_val16 *x_lp, opus_val16 *y, 45 | int len, int max_pitch, int *pitch); 46 | 47 | opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 48 | int N, int *T0, int prev_period, opus_val16 prev_gain); 49 | 50 | 51 | /* OPT: This is the kernel you really want to optimize. It gets used a lot 52 | by the prefilter and by the PLC. */ 53 | static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) 54 | { 55 | int j; 56 | opus_val16 y_0, y_1, y_2, y_3; 57 | celt_assert(len>=3); 58 | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ 59 | y_0=*y++; 60 | y_1=*y++; 61 | y_2=*y++; 62 | for (j=0;j 33 | #include 34 | #include "opus_types.h" 35 | #include "common.h" 36 | #include "arch.h" 37 | #include "rnn.h" 38 | #include "rnn_data.h" 39 | #include 40 | 41 | static OPUS_INLINE float fastPow2(float p) { 42 | float clipp = (p < -126) ? -126.0f : p; 43 | union { 44 | uint32_t i; 45 | float f; 46 | } v = {(uint32_t) ((1 << 23) * (clipp + 126.94269504f))}; 47 | return v.f; 48 | } 49 | 50 | static OPUS_INLINE float 51 | fastExp(float p) { 52 | return fastPow2(1.442695040f * p); 53 | } 54 | 55 | static OPUS_INLINE float tansig_approx(float x) { 56 | const float z = fastExp((WEIGHTS_SCALE * 2) * x); 57 | return (z - 1) / (z + 1); 58 | } 59 | 60 | static OPUS_INLINE float sigmoid_approx(float x) { 61 | return 1.f / (1.f + fastExp(-WEIGHTS_SCALE * x)); 62 | } 63 | 64 | static OPUS_INLINE float relu(float x) { 65 | return WEIGHTS_SCALE * x * (x > 0); 66 | } 67 | 68 | void compute_dense(const DenseLayer *layer, float *output, const float *input) { 69 | int i, j; 70 | int M = layer->nb_inputs; 71 | int N = layer->nb_neurons; 72 | int stride = N; 73 | int idx = 0; 74 | if (layer->activation == ACTIVATION_SIGMOID) { 75 | for (i = 0; i < N; i++) { /* Compute update gate. */ 76 | float sum = layer->bias[i]; 77 | for (j = 0; j < M; j++) { 78 | idx = j * stride + i; 79 | sum += layer->input_weights[idx] * input[j]; 80 | } 81 | output[i] = sigmoid_approx(sum); 82 | } 83 | } else if (layer->activation == ACTIVATION_TANH) { 84 | for (i = 0; i < N; i++) { /* Compute update gate. */ 85 | float sum = layer->bias[i]; 86 | for (j = 0; j < M; j++) { 87 | idx = j * stride + i; 88 | sum += layer->input_weights[idx] * input[j]; 89 | } 90 | output[i] = tansig_approx(sum); 91 | 92 | } 93 | 94 | } else if (layer->activation == ACTIVATION_RELU) { 95 | for (i = 0; i < N; i++) { /* Compute update gate. */ 96 | float sum = layer->bias[i]; 97 | for (j = 0; j < M; j++) { 98 | idx = j * stride + i; 99 | sum += layer->input_weights[idx] * input[j]; 100 | } 101 | output[i] = relu(sum); 102 | } 103 | 104 | } else { 105 | *(int *) 0 = 0; 106 | } 107 | } 108 | 109 | void compute_gru(float *update, float *reset, const GRULayer *gru, float *state, const float *input) { 110 | int i, j; 111 | if ((reset == NULL) || (update == NULL)) { 112 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 113 | rnnoise_free(update); 114 | rnnoise_free(reset); 115 | return; 116 | } 117 | int M = gru->nb_inputs; 118 | int N = gru->nb_neurons; 119 | int stride = 3 * N; 120 | int idx = 0; 121 | if (M > N) { 122 | for (i = 0; i < N; i++) { 123 | /* Compute update gate. */ 124 | float sum_update = gru->bias[i]; 125 | /* Compute reset gate. */ 126 | float sum_reset = gru->bias[N + i]; 127 | for (j = 0; j < N; j++) { 128 | idx = j * stride + i; 129 | sum_update += gru->input_weights[idx] * input[j]; 130 | sum_reset += gru->input_weights[N + idx] * input[j]; 131 | sum_reset += gru->recurrent_weights[N + idx] * state[j]; 132 | sum_update += gru->recurrent_weights[idx] * state[j]; 133 | } 134 | for (j = N; j < M; j++) { 135 | idx = j * stride + i; 136 | sum_update += gru->input_weights[idx] * input[j]; 137 | sum_reset += gru->input_weights[N + idx] * input[j]; 138 | } 139 | update[i] = sigmoid_approx(sum_update); 140 | reset[i] = sigmoid_approx(sum_reset); 141 | } 142 | if (gru->activation == ACTIVATION_SIGMOID) { 143 | for (i = 0; i < N; i++) { 144 | /* Compute output. */ 145 | float sum_output = gru->bias[(N << 1) + i]; 146 | for (j = 0; j < N; j++) { 147 | idx = j * stride + i; 148 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 149 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 150 | } 151 | for (j = N; j < M; j++) { 152 | idx = j * stride + i; 153 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 154 | } 155 | state[i] = update[i] * state[i] + (1 - update[i]) * sigmoid_approx(sum_output); 156 | } 157 | } else if (gru->activation == ACTIVATION_TANH) { 158 | for (i = 0; i < N; i++) { 159 | /* Compute output. */ 160 | float sum_output = gru->bias[(N << 1) + i]; 161 | for (j = 0; j < N; j++) { 162 | idx = j * stride + i; 163 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 164 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 165 | } 166 | for (j = N; j < M; j++) { 167 | idx = j * stride + i; 168 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 169 | } 170 | state[i] = update[i] * state[i] + (1 - update[i]) * tansig_approx(sum_output); 171 | } 172 | } else if (gru->activation == ACTIVATION_RELU) { 173 | for (i = 0; i < N; i++) { 174 | /* Compute output. */ 175 | float sum_output = gru->bias[(N << 1) + i]; 176 | for (j = 0; j < N; j++) { 177 | idx = j * stride + i; 178 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 179 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 180 | } 181 | for (j = N; j < M; j++) { 182 | idx = j * stride + i; 183 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 184 | } 185 | state[i] = update[i] * state[i] + (1 - update[i]) * relu(sum_output); 186 | } 187 | } else { 188 | for (i = 0; i < N; i++) { 189 | /* Compute output. */ 190 | float sum_output = gru->bias[(N << 1) + i]; 191 | for (j = 0; j < N; j++) { 192 | idx = j * stride + i; 193 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 194 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 195 | } 196 | for (j = N; j < M; j++) { 197 | idx = j * stride + i; 198 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 199 | } 200 | state[i] = update[i] * state[i] + (1 - update[i]) * sum_output; 201 | } 202 | } 203 | } else { 204 | for (i = 0; i < N; i++) { 205 | /* Compute update gate. */ 206 | float sum_update = gru->bias[i]; 207 | /* Compute reset gate. */ 208 | float sum_reset = gru->bias[N + i]; 209 | for (j = M; j < N; j++) { 210 | idx = j * stride + i; 211 | sum_update += gru->input_weights[idx] * input[j]; 212 | sum_reset += gru->input_weights[N + idx] * input[j]; 213 | sum_reset += gru->recurrent_weights[N + idx] * state[j]; 214 | sum_update += gru->recurrent_weights[idx] * state[j]; 215 | } 216 | for (j = 0; j < M; j++) { 217 | idx = j * stride + i; 218 | sum_update += gru->input_weights[idx] * input[j]; 219 | sum_reset += gru->input_weights[N + idx] * input[j]; 220 | } 221 | update[i] = sigmoid_approx(sum_update); 222 | reset[i] = sigmoid_approx(sum_reset); 223 | } 224 | if (gru->activation == ACTIVATION_SIGMOID) { 225 | for (i = 0; i < N; i++) { 226 | /* Compute output. */ 227 | float sum_output = gru->bias[(N << 1) + i]; 228 | for (j = M; j < N; j++) { 229 | idx = j * stride + i; 230 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 231 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 232 | } 233 | for (j = 0; j < M; j++) { 234 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 235 | } 236 | state[i] = update[i] * state[i] + (1 - update[i]) * sigmoid_approx(sum_output); 237 | } 238 | } else if (gru->activation == ACTIVATION_TANH) { 239 | for (i = 0; i < N; i++) { 240 | /* Compute output. */ 241 | float sum_output = gru->bias[(N << 1) + i]; 242 | for (j = M; j < N; j++) { 243 | idx = j * stride + i; 244 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 245 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 246 | } 247 | for (j = 0; j < M; j++) { 248 | idx = j * stride + i; 249 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 250 | } 251 | state[i] = update[i] * state[i] + (1 - update[i]) * tansig_approx(sum_output); 252 | } 253 | } else if (gru->activation == ACTIVATION_RELU) { 254 | for (i = 0; i < N; i++) { 255 | /* Compute output. */ 256 | float sum_output = gru->bias[(N << 1) + i]; 257 | for (j = M; j < N; j++) { 258 | idx = j * stride + i; 259 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 260 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 261 | } 262 | for (j = 0; j < M; j++) { 263 | idx = j * stride + i; 264 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 265 | } 266 | state[i] = update[i] * state[i] + (1 - update[i]) * relu(sum_output); 267 | } 268 | } else { 269 | for (i = 0; i < N; i++) { 270 | /* Compute output. */ 271 | float sum_output = gru->bias[(N << 1) + i]; 272 | for (j = M; j < N; j++) { 273 | idx = j * stride + i; 274 | sum_output += gru->recurrent_weights[(N << 1) + idx] * state[j] * reset[j]; 275 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 276 | } 277 | for (j = 0; j < M; j++) { 278 | idx = j * stride + i; 279 | sum_output += gru->input_weights[(N << 1) + idx] * input[j]; 280 | } 281 | state[i] = update[i] * state[i] + (1 - update[i]) * sum_output; 282 | } 283 | } 284 | } 285 | } 286 | 287 | #define INPUT_SIZE 42 288 | 289 | void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input) { 290 | float *cache = rnnoise_alloc(MAX_NEURONS * 5 * sizeof(float)); 291 | if (cache == NULL) { 292 | printf("[%s %d] malloc failed\n", __FUNCTION__, __LINE__); 293 | return; 294 | } 295 | float *update = cache + (MAX_NEURONS * 3); 296 | float *reset = update + MAX_NEURONS; 297 | float *dense_out = cache; 298 | float *noise_input = cache; 299 | float *denoise_input = cache; 300 | compute_dense(&input_dense, dense_out, input); 301 | compute_gru(update, reset, &vad_gru, rnn->vad_gru_state, dense_out); 302 | compute_dense(&vad_output, vad, rnn->vad_gru_state); 303 | memcpy(noise_input + INPUT_DENSE_SIZE, rnn->vad_gru_state, VAD_GRU_SIZE * sizeof(float)); 304 | memcpy(noise_input + INPUT_DENSE_SIZE + VAD_GRU_SIZE, input, INPUT_SIZE * sizeof(float)); 305 | compute_gru(update, reset, &noise_gru, rnn->noise_gru_state, noise_input); 306 | memcpy(denoise_input, rnn->vad_gru_state, VAD_GRU_SIZE * sizeof(float)); 307 | memcpy(denoise_input + VAD_GRU_SIZE, rnn->noise_gru_state, NOISE_GRU_SIZE * sizeof(float)); 308 | memcpy(denoise_input + VAD_GRU_SIZE + NOISE_GRU_SIZE, input, INPUT_SIZE * sizeof(float)); 309 | compute_gru(update, reset, &denoise_gru, rnn->denoise_gru_state, denoise_input); 310 | compute_dense(&denoise_output, gains, rnn->denoise_gru_state); 311 | rnnoise_free(cache); 312 | } 313 | -------------------------------------------------------------------------------- /src/rnn.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2017 Jean-Marc Valin */ 2 | /* 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | - Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef RNN_H_ 28 | #define RNN_H_ 29 | 30 | #include "opus_types.h" 31 | 32 | #define WEIGHTS_SCALE (1.f/256) 33 | 34 | #define MAX_NEURONS 128 35 | 36 | #define ACTIVATION_TANH 0 37 | #define ACTIVATION_SIGMOID 1 38 | #define ACTIVATION_RELU 2 39 | 40 | typedef signed char rnn_weight; 41 | 42 | typedef struct { 43 | const rnn_weight *bias; 44 | const rnn_weight *input_weights; 45 | int nb_inputs; 46 | int nb_neurons; 47 | int activation; 48 | } DenseLayer; 49 | 50 | typedef struct { 51 | const rnn_weight *bias; 52 | const rnn_weight *input_weights; 53 | const rnn_weight *recurrent_weights; 54 | int nb_inputs; 55 | int nb_neurons; 56 | int activation; 57 | } GRULayer; 58 | 59 | typedef struct RNNState RNNState; 60 | 61 | void compute_dense(const DenseLayer *layer, float *output, const float *input); 62 | 63 | void compute_gru(float *update, float *reset, const GRULayer *gru, float *state, const float *input); 64 | 65 | void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input); 66 | 67 | #endif /* _MLP_H_ */ 68 | -------------------------------------------------------------------------------- /src/rnn_data.h: -------------------------------------------------------------------------------- 1 | /*This file is automatically generated from a Keras model*/ 2 | 3 | #ifndef RNN_DATA_H 4 | #define RNN_DATA_H 5 | 6 | #include "rnn.h" 7 | 8 | #define INPUT_DENSE_SIZE 24 9 | extern const DenseLayer input_dense; 10 | 11 | #define VAD_GRU_SIZE 24 12 | extern const GRULayer vad_gru; 13 | 14 | #define NOISE_GRU_SIZE 48 15 | extern const GRULayer noise_gru; 16 | 17 | #define DENOISE_GRU_SIZE 96 18 | extern const GRULayer denoise_gru; 19 | 20 | #define DENOISE_OUTPUT_SIZE 22 21 | extern const DenseLayer denoise_output; 22 | 23 | #define VAD_OUTPUT_SIZE 1 24 | extern const DenseLayer vad_output; 25 | 26 | struct RNNState { 27 | float vad_gru_state[VAD_GRU_SIZE]; 28 | float noise_gru_state[NOISE_GRU_SIZE]; 29 | float denoise_gru_state[DENOISE_GRU_SIZE]; 30 | }; 31 | 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /src/rnn_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | 5 | from keras.models import Sequential 6 | from keras.models import Model 7 | from keras.layers import Input 8 | from keras.layers import Dense 9 | from keras.layers import LSTM 10 | from keras.layers import GRU 11 | from keras.layers import SimpleRNN 12 | from keras.layers import Dropout 13 | from keras import losses 14 | import h5py 15 | 16 | from keras import backend as K 17 | import numpy as np 18 | 19 | print('Build model...') 20 | main_input = Input(shape=(None, 22), name='main_input') 21 | #x = Dense(44, activation='relu')(main_input) 22 | #x = GRU(44, dropout=0.0, recurrent_dropout=0.0, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) 23 | x=main_input 24 | x = GRU(128, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) 25 | #x = GRU(128, return_sequences=True)(x) 26 | #x = GRU(22, activation='relu', return_sequences=True)(x) 27 | x = Dense(22, activation='sigmoid')(x) 28 | #x = Dense(22, activation='softplus')(x) 29 | model = Model(inputs=main_input, outputs=x) 30 | 31 | batch_size = 32 32 | 33 | print('Loading data...') 34 | with h5py.File('denoise_data.h5', 'r') as hf: 35 | all_data = hf['denoise_data'][:] 36 | print('done.') 37 | 38 | window_size = 500 39 | 40 | nb_sequences = len(all_data)//window_size 41 | print(nb_sequences, ' sequences') 42 | x_train = all_data[:nb_sequences*window_size, :-22] 43 | x_train = np.reshape(x_train, (nb_sequences, window_size, 22)) 44 | 45 | y_train = np.copy(all_data[:nb_sequences*window_size, -22:]) 46 | y_train = np.reshape(y_train, (nb_sequences, window_size, 22)) 47 | 48 | #y_train = -20*np.log10(np.add(y_train, .03)); 49 | 50 | all_data = 0; 51 | x_train = x_train.astype('float32') 52 | y_train = y_train.astype('float32') 53 | 54 | print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape) 55 | 56 | # try using different optimizers and different optimizer configs 57 | model.compile(loss='mean_squared_error', 58 | optimizer='adam', 59 | metrics=['binary_accuracy']) 60 | 61 | print('Train...') 62 | model.fit(x_train, y_train, 63 | batch_size=batch_size, 64 | epochs=200, 65 | validation_data=(x_train, y_train)) 66 | model.save("newweights.hdf5") 67 | -------------------------------------------------------------------------------- /src/speech.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cpuimage/rnnoise/21af48b754fb39b4171da69e71ef5b3f3806a842/src/speech.wav -------------------------------------------------------------------------------- /src/train_compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # run this script to generate rnnoise audio feature 4 | # extraction tool for training 5 | 6 | gcc -DTRAINING=1 -Wall -W -O3 -g -I../include denoise.c pitch.c celt_lpc.c rnn.c rnn_data.c -o denoise_training -lm 7 | -------------------------------------------------------------------------------- /training/bin2hdf5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import os 5 | import numpy as np 6 | import h5py 7 | 8 | def file_trans(args): 9 | height, width = args.matrix_shape.split('x') 10 | data = np.fromfile(args.bin_file, dtype='float32'); 11 | data = np.reshape(data, (int(height), int(width))); 12 | h5f = h5py.File(args.h5_file, 'w'); 13 | h5f.create_dataset('data', data=data) 14 | h5f.close() 15 | 16 | def main(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--bin_file', help='feature matrix bin file generated with denoise_training', type=str) 19 | parser.add_argument('--matrix_shape', help='feature matrix shape as x', type=str) 20 | parser.add_argument('--h5_file', help='output h5 file', type=str, default=os.path.join(os.path.dirname(__file__), 'denoise_data.h5')) 21 | args = parser.parse_args() 22 | if not args.bin_file: 23 | raise ValueError('bin file is missing') 24 | if not args.matrix_shape: 25 | raise ValueError('matrix shape is missing') 26 | 27 | file_trans(args) 28 | 29 | 30 | if __name__ == "__main__": 31 | main() 32 | -------------------------------------------------------------------------------- /training/dump_rnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | #from __future__ import print_function 4 | 5 | from keras.models import Sequential, load_model 6 | from keras.layers import Dense, LSTM, GRU 7 | from keras.constraints import Constraint 8 | from keras import backend as K 9 | import argparse, os, sys, re 10 | import numpy as np 11 | 12 | def printVector(f, vector, name): 13 | v = np.reshape(vector, (-1)); 14 | #print('static const float ', name, '[', len(v), '] = \n', file=f) 15 | f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v))) 16 | for i in range(0, len(v)): 17 | f.write('{}'.format(min(127, int(round(256*v[i]))))) 18 | if (i!=len(v)-1): 19 | f.write(',') 20 | else: 21 | break; 22 | if (i%8==7): 23 | f.write("\n ") 24 | else: 25 | f.write(" ") 26 | #print(v, file=f) 27 | f.write('\n};\n\n') 28 | return; 29 | 30 | def printLayer(f, hf, layer): 31 | weights = layer.get_weights() 32 | printVector(f, weights[0], layer.name + '_weights') 33 | if len(weights) > 2: 34 | printVector(f, weights[1], layer.name + '_recurrent_weights') 35 | printVector(f, weights[-1], layer.name + '_bias') 36 | name = layer.name 37 | activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() 38 | if len(weights) > 2: 39 | f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' 40 | .format(name, name, name, name, weights[0].shape[0], int(weights[0].shape[1]/3), activation)) 41 | hf.write('#define {}_SIZE {}\n'.format(name.upper(), int(weights[0].shape[1]/3))) 42 | hf.write('extern const GRULayer {};\n\n'.format(name)); 43 | else: 44 | f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' 45 | .format(name, name, name, weights[0].shape[0], weights[0].shape[1], activation)) 46 | hf.write('#define {}_SIZE {}\n'.format(name.upper(), int(weights[0].shape[1]))) 47 | hf.write('extern const DenseLayer {};\n\n'.format(name)); 48 | 49 | def my_crossentropy(y_true, y_pred): 50 | return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1) 51 | 52 | def mymask(y_true): 53 | return K.minimum(y_true+1., 1.) 54 | 55 | def msse(y_true, y_pred): 56 | return K.mean(mymask(y_true) * K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) 57 | 58 | def mycost(y_true, y_pred): 59 | return K.mean(mymask(y_true) * (10*K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01*K.binary_crossentropy(y_pred, y_true)), axis=-1) 60 | 61 | #def my_accuracy(y_true, y_pred): 62 | #return K.mean(2*K.abs(y_true-0.5) * K.equal(y_true, K.round(y_pred)), axis=-1) 63 | 64 | class WeightClip(Constraint): 65 | '''Clips the weights incident to each hidden unit to be inside a range 66 | ''' 67 | def __init__(self, c=2, name='WeightClip'): 68 | self.c = c 69 | 70 | def __call__(self, p): 71 | return K.clip(p, -self.c, self.c) 72 | 73 | def get_config(self): 74 | return {'name': self.__class__.__name__, 75 | 'c': self.c} 76 | 77 | #def foo(c, name): 78 | #return 1 79 | 80 | def mean_squared_sqrt_error(y_true, y_pred): 81 | return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) 82 | 83 | 84 | def rnn_dump(args): 85 | #model = load_model(sys.argv[1], custom_objects={'msse': mean_squared_sqrt_error, 'mean_squared_sqrt_error': mean_squared_sqrt_error, 'my_crossentropy': mean_squared_sqrt_error, 'mycost': mean_squared_sqrt_error, 'WeightClip': foo}) 86 | model = load_model(args.model_file, custom_objects={'msse':msse, 'mean_squared_sqrt_error': mean_squared_sqrt_error, 'my_crossentropy':my_crossentropy, 'mycost':mycost, 'WeightClip':WeightClip}) 87 | 88 | weights = model.get_weights() 89 | 90 | f = open(args.code_file, 'w') 91 | hf = open(args.header_file, 'w') 92 | 93 | f.write('/*This file is automatically generated from a Keras model*/\n\n') 94 | f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n\n') 95 | 96 | hf.write('/*This file is automatically generated from a Keras model*/\n\n') 97 | hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "rnn.h"\n\n') 98 | 99 | layer_list = [] 100 | for i, layer in enumerate(model.layers): 101 | if len(layer.get_weights()) > 0: 102 | printLayer(f, hf, layer) 103 | if len(layer.get_weights()) > 2: 104 | layer_list.append(layer.name) 105 | 106 | hf.write('struct RNNState {\n') 107 | for i, name in enumerate(layer_list): 108 | hf.write(' float {}_state[{}_SIZE];\n'.format(name, name.upper())) 109 | hf.write('};\n') 110 | hf.write('\n\n#endif\n') 111 | f.close() 112 | hf.close() 113 | 114 | 115 | def main(): 116 | parser = argparse.ArgumentParser() 117 | parser.add_argument('--model_file', help='h5 trained model file', type=str, default=os.path.join(os.path.dirname(__file__), 'model_weights.hdf5')) 118 | parser.add_argument('--code_file', help='output weights c code file', type=str, default=os.path.join(os.path.dirname(__file__), 'rnn_data.c')) 119 | parser.add_argument('--header_file', help='output layer definition header file', type=str, default=os.path.join(os.path.dirname(__file__), 'rnn_data.h')) 120 | args = parser.parse_args() 121 | if not args.model_file: 122 | raise ValueError('data file is missing') 123 | 124 | rnn_dump(args) 125 | 126 | 127 | if __name__ == "__main__": 128 | main() 129 | -------------------------------------------------------------------------------- /training/rnn_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | #from __future__ import print_function 4 | 5 | import keras 6 | from keras.models import Sequential, Model 7 | from keras.layers import Input, Dense, LSTM, GRU, SimpleRNN, Dropout, concatenate 8 | from keras import losses, regularizers 9 | from keras.constraints import min_max_norm, Constraint 10 | from keras import backend as K 11 | import numpy as np 12 | import argparse, os 13 | import h5py 14 | 15 | #import tensorflow as tf 16 | #from keras.backend.tensorflow_backend import set_session 17 | #config = tf.ConfigProto() 18 | #config.gpu_options.per_process_gpu_memory_fraction = 0.42 19 | #set_session(tf.Session(config=config)) 20 | 21 | def my_crossentropy(y_true, y_pred): 22 | return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1) 23 | 24 | def mymask(y_true): 25 | return K.minimum(y_true+1., 1.) 26 | 27 | def msse(y_true, y_pred): 28 | return K.mean(mymask(y_true) * K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) 29 | 30 | def mycost(y_true, y_pred): 31 | return K.mean(mymask(y_true) * (10*K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01*K.binary_crossentropy(y_pred, y_true)), axis=-1) 32 | 33 | #def my_accuracy(y_true, y_pred): 34 | #return K.mean(2*K.abs(y_true-0.5) * K.equal(y_true, K.round(y_pred)), axis=-1) 35 | 36 | class WeightClip(Constraint): 37 | '''Clips the weights incident to each hidden unit to be inside a range 38 | ''' 39 | def __init__(self, c=2): 40 | self.c = c 41 | 42 | def __call__(self, p): 43 | return K.clip(p, -self.c, self.c) 44 | 45 | def get_config(self): 46 | return {'name': self.__class__.__name__, 47 | 'c': self.c} 48 | 49 | def rnn_training(args): 50 | reg = 0.000001 51 | constraint = WeightClip(0.499) 52 | 53 | print('Build model...') 54 | main_input = Input(shape=(None, 42), name='main_input') 55 | tmp = Dense(24, activation='tanh', name='input_dense', kernel_constraint=constraint, bias_constraint=constraint)(main_input) 56 | vad_gru = GRU(24, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name='vad_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(tmp) 57 | vad_output = Dense(1, activation='sigmoid', name='vad_output', kernel_constraint=constraint, bias_constraint=constraint)(vad_gru) 58 | noise_input = keras.layers.concatenate([tmp, vad_gru, main_input]) 59 | noise_gru = GRU(48, activation='relu', recurrent_activation='sigmoid', return_sequences=True, name='noise_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(noise_input) 60 | denoise_input = keras.layers.concatenate([vad_gru, noise_gru, main_input]) 61 | 62 | denoise_gru = GRU(96, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name='denoise_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(denoise_input) 63 | 64 | denoise_output = Dense(22, activation='sigmoid', name='denoise_output', kernel_constraint=constraint, bias_constraint=constraint)(denoise_gru) 65 | 66 | model = Model(inputs=main_input, outputs=[denoise_output, vad_output]) 67 | 68 | model.compile(loss=[mycost, my_crossentropy], 69 | metrics=[msse], 70 | optimizer='adam', loss_weights=[10, 0.5]) 71 | 72 | batch_size = 32 73 | 74 | print('Loading data...') 75 | with h5py.File(args.data_file, 'r') as hf: 76 | all_data = hf['data'][:] 77 | print('done.') 78 | 79 | window_size = 2000 80 | 81 | nb_sequences = len(all_data)//window_size 82 | print(nb_sequences, ' sequences') 83 | x_train = all_data[:nb_sequences*window_size, :42] 84 | x_train = np.reshape(x_train, (nb_sequences, window_size, 42)) 85 | 86 | y_train = np.copy(all_data[:nb_sequences*window_size, 42:64]) 87 | y_train = np.reshape(y_train, (nb_sequences, window_size, 22)) 88 | 89 | noise_train = np.copy(all_data[:nb_sequences*window_size, 64:86]) 90 | noise_train = np.reshape(noise_train, (nb_sequences, window_size, 22)) 91 | 92 | vad_train = np.copy(all_data[:nb_sequences*window_size, 86:87]) 93 | vad_train = np.reshape(vad_train, (nb_sequences, window_size, 1)) 94 | 95 | all_data = 0; 96 | #x_train = x_train.astype('float32') 97 | #y_train = y_train.astype('float32') 98 | 99 | print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape) 100 | 101 | print('Train...') 102 | model.fit(x_train, [y_train, vad_train], 103 | batch_size=batch_size, 104 | epochs=120, 105 | validation_split=0.1) 106 | model.save(args.model_file) 107 | 108 | 109 | def main(): 110 | parser = argparse.ArgumentParser() 111 | parser.add_argument('--data_file', help='feature matrix h5 data file', type=str, default=os.path.join(os.path.dirname(__file__), 'denoise_data.h5')) 112 | parser.add_argument('--model_file', help='output h5 model file', type=str, default=os.path.join(os.path.dirname(__file__), 'model_weights.hdf5')) 113 | args = parser.parse_args() 114 | if not args.data_file: 115 | raise ValueError('data file is missing') 116 | 117 | rnn_training(args) 118 | 119 | 120 | if __name__ == "__main__": 121 | main() 122 | --------------------------------------------------------------------------------