├── AUTHORS ├── sample.wav ├── .gitignore ├── training ├── bin2hdf5.py ├── dump_rnn.py └── rnn_train.py ├── doc └── Doxyfile.in ├── src ├── rnn_data.h ├── common.h ├── rnn_data.c ├── celt_lpc.h ├── rnn_train.py ├── tansig_table.h ├── rnn.h ├── pitch.h ├── opus_types.h ├── rnn_reader.c ├── _kiss_fft_guts.h ├── rnn.c ├── kiss_fft.h ├── arch.h ├── celt_lpc.c ├── pitch.c ├── kiss_fft.c └── denoise.c ├── examples ├── bertool.c └── rnnoise_demo.c ├── COPYING ├── Makefile ├── README.md └── include └── rnnoise.h /AUTHORS: -------------------------------------------------------------------------------- 1 | Jean-Marc Valin 2 | -------------------------------------------------------------------------------- /sample.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sysprog21/rnnoise/HEAD/sample.wav -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.raw 4 | *.svg 5 | *.prof 6 | *.gperf 7 | examples/bertool 8 | examples/rnnoise_demo 9 | # generated 10 | clean.wav 11 | -------------------------------------------------------------------------------- /training/bin2hdf5.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import h5py 7 | import sys 8 | 9 | data = np.fromfile(sys.argv[1], dtype='float32'); 10 | data = np.reshape(data, (int(sys.argv[2]), int(sys.argv[3]))); 11 | h5f = h5py.File(sys.argv[4], 'w'); 12 | h5f.create_dataset('data', data=data) 13 | h5f.close() 14 | -------------------------------------------------------------------------------- /doc/Doxyfile.in: -------------------------------------------------------------------------------- 1 | # Process with doxygen to generate API documentation 2 | 3 | PROJECT_NAME = @PACKAGE_NAME@ 4 | PROJECT_NUMBER = @PACKAGE_VERSION@ 5 | PROJECT_BRIEF = "RNN-based noise suppressor." 6 | INPUT = @top_srcdir@/include/rnnoise.h 7 | OPTIMIZE_OUTPUT_FOR_C = YES 8 | 9 | QUIET = YES 10 | WARNINGS = YES 11 | WARN_IF_UNDOCUMENTED = YES 12 | WARN_IF_DOC_ERROR = YES 13 | WARN_NO_PARAMDOC = YES 14 | 15 | JAVADOC_AUTOBRIEF = YES 16 | SORT_MEMBER_DOCS = NO 17 | 18 | HAVE_DOT = @HAVE_DOT@ 19 | -------------------------------------------------------------------------------- /src/rnn_data.h: -------------------------------------------------------------------------------- 1 | #ifndef RNN_DATA_H 2 | #define RNN_DATA_H 3 | 4 | #include "rnn.h" 5 | 6 | struct RNNModel { 7 | int input_dense_size; 8 | const DenseLayer *input_dense; 9 | 10 | int vad_gru_size; 11 | const GRULayer *vad_gru; 12 | 13 | int noise_gru_size; 14 | const GRULayer *noise_gru; 15 | 16 | int denoise_gru_size; 17 | const GRULayer *denoise_gru; 18 | 19 | int denoise_output_size; 20 | const DenseLayer *denoise_output; 21 | 22 | int vad_output_size; 23 | const DenseLayer *vad_output; 24 | }; 25 | 26 | struct RNNState { 27 | const RNNModel *model; 28 | float *vad_gru_state; 29 | float *noise_gru_state; 30 | float *denoise_gru_state; 31 | }; 32 | 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /examples/bertool.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define FRAME_SIZE 480 7 | 8 | int main(int argc, char **argv) { 9 | int i, bt = 0, bd = 0; 10 | double sqsum = 0; 11 | FILE *f1, *f2; 12 | if (argc!=3) { 13 | fprintf(stderr, "usage: %s \n", argv[0]); 14 | return 1; 15 | } 16 | f1 = fopen(argv[1], "rb"); 17 | f2 = fopen(argv[2], "rb"); 18 | while (!feof(f1)) { 19 | int16_t a[FRAME_SIZE]; 20 | int16_t b[FRAME_SIZE]; 21 | int br1 = fread(a, sizeof(int16_t), FRAME_SIZE, f1); 22 | int br2 = fread(b, sizeof(int16_t), FRAME_SIZE, f2); 23 | if (br1 != br2 || br1 < FRAME_SIZE) break; 24 | for (i=0;i callgrind.gperf 41 | gprof2dot --format=callgrind callgrind.gperf -z main | dot -T svg > gperf.svg 42 | ffmpeg -i clean.wav -ac 1 -ar 48000 -f s16le -acodec pcm_s16le clean.raw 43 | examples/bertool clean.raw target.raw 44 | 45 | valgrind: 46 | valgrind examples/rnnoise_demo sample_short.wav /dev/null 47 | 48 | check: sample.wav examples/rnnoise_demo 49 | examples/rnnoise_demo $< clean.wav 50 | 51 | .PHONY: clean 52 | 53 | clean: 54 | $(RM) $(OBJS) examples/*.o *.a src/denoise_training examples/rnnoise_demo 55 | $(RM) clean.raw clean.wav 56 | $(RM) gperf.svg 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RNNoise 2 | A noise suppression library based on a recurrent neural network. 3 | 4 | ## Build 5 | 6 | Prerequisite 7 | * macOS: `brew install libsndfile libsoxr sox` 8 | * Debian/Ubuntu: `sudo apt install libsndfile1-dev libsoxr-dev libsox-dev` 9 | 10 | To compile, just type: 11 | ```shell 12 | make 13 | ``` 14 | 15 | Sample noisy file `sample.wav` was included, and you can run `make check` to generate the processed one, `clean.wav`. 16 | 17 | ## Test 18 | 19 | While it is meant to be used as a library, a simple command-line tool is 20 | provided as an example. It can be used as: 21 | ```shell 22 | examples/rnnoise_demo sample.wav output.wav 23 | ``` 24 | 25 | ## Training 26 | 27 | ### Audio feature extract 28 | 29 | Build audio feature extraction tool 30 | ```shell 31 | make src/denoise_training 32 | ``` 33 | 34 | Use the tool `denoise_training` to get the audio feature array from speech and noise audio clip 35 | ```shell 36 | src/denoise_training signal.raw noise.raw count > training.f32 37 | ``` 38 | (note the matrix size and replace 500000 87 below) 39 | 40 | ### RNN model traning 41 | 42 | Pick feature array to "training" dir and go through the training process. 43 | ```shell 44 | cd training ; ./bin2hdf5.py ../src/training.f32 500000 87 training.h5 45 | ./rnn_train.py 46 | ./dump_rnn.py weights.hdf5 ../src/rnn_data.c ../src/rnn_data.h 47 | ``` 48 | 49 | Training process will generate the RNN model weight code file (default is `rnn_data.c`) and layer definition header file (default is `rnn_data.h`). 50 | They can be used to refresh the `src/rnn_data.c`, `src/rnn_data.h` and rebuild the rnnoise library and/or examples. 51 | 52 | ## License 53 | 54 | `rnnoise` is freely redistributable under the revised BSD license. 55 | Use of this source code is governed by a BSD-style license that can be found in the `COPYING` file. 56 | -------------------------------------------------------------------------------- /src/celt_lpc.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009-2010 Xiph.Org Foundation 2 | Written by Jean-Marc Valin */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifndef PLC_H 29 | #define PLC_H 30 | 31 | #include "arch.h" 32 | #include "common.h" 33 | 34 | #if defined(OPUS_X86_MAY_HAVE_SSE4_1) 35 | #include "x86/celt_lpc_sse.h" 36 | #endif 37 | 38 | #define LPC_ORDER 24 39 | 40 | void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); 41 | 42 | void celt_fir( 43 | const opus_val16 *x, 44 | const opus_val16 *num, 45 | opus_val16 *y, 46 | int N, 47 | int ord); 48 | 49 | void celt_iir(const opus_val32 *x, 50 | const opus_val16 *den, 51 | opus_val32 *y, 52 | int N, 53 | int ord, 54 | opus_val16 *mem); 55 | 56 | int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, 57 | const opus_val16 *window, int overlap, int lag, int n); 58 | 59 | #endif /* PLC_H */ 60 | -------------------------------------------------------------------------------- /src/rnn_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | 5 | from keras.models import Sequential 6 | from keras.models import Model 7 | from keras.layers import Input 8 | from keras.layers import Dense 9 | from keras.layers import LSTM 10 | from keras.layers import GRU 11 | from keras.layers import SimpleRNN 12 | from keras.layers import Dropout 13 | from keras import losses 14 | import h5py 15 | 16 | from keras import backend as K 17 | import numpy as np 18 | 19 | print('Build model...') 20 | main_input = Input(shape=(None, 22), name='main_input') 21 | #x = Dense(44, activation='relu')(main_input) 22 | #x = GRU(44, dropout=0.0, recurrent_dropout=0.0, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) 23 | x=main_input 24 | x = GRU(128, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) 25 | #x = GRU(128, return_sequences=True)(x) 26 | #x = GRU(22, activation='relu', return_sequences=True)(x) 27 | x = Dense(22, activation='sigmoid')(x) 28 | #x = Dense(22, activation='softplus')(x) 29 | model = Model(inputs=main_input, outputs=x) 30 | 31 | batch_size = 32 32 | 33 | print('Loading data...') 34 | with h5py.File('denoise_data.h5', 'r') as hf: 35 | all_data = hf['denoise_data'][:] 36 | print('done.') 37 | 38 | window_size = 500 39 | 40 | nb_sequences = len(all_data)//window_size 41 | print(nb_sequences, ' sequences') 42 | x_train = all_data[:nb_sequences*window_size, :-22] 43 | x_train = np.reshape(x_train, (nb_sequences, window_size, 22)) 44 | 45 | y_train = np.copy(all_data[:nb_sequences*window_size, -22:]) 46 | y_train = np.reshape(y_train, (nb_sequences, window_size, 22)) 47 | 48 | #y_train = -20*np.log10(np.add(y_train, .03)); 49 | 50 | all_data = 0; 51 | x_train = x_train.astype('float32') 52 | y_train = y_train.astype('float32') 53 | 54 | print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape) 55 | 56 | # try using different optimizers and different optimizer configs 57 | model.compile(loss='mean_squared_error', 58 | optimizer='adam', 59 | metrics=['binary_accuracy']) 60 | 61 | print('Train...') 62 | model.fit(x_train, y_train, 63 | batch_size=batch_size, 64 | epochs=200, 65 | validation_data=(x_train, y_train)) 66 | model.save("newweights.hdf5") 67 | -------------------------------------------------------------------------------- /src/tansig_table.h: -------------------------------------------------------------------------------- 1 | /* This file is auto-generated by gen_tables */ 2 | 3 | static const float tansig_table[201] = { 4 | 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, 5 | 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, 6 | 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f, 7 | 0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f, 8 | 0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f, 9 | 0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f, 10 | 0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f, 11 | 0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f, 12 | 0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f, 13 | 0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f, 14 | 0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f, 15 | 0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f, 16 | 0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f, 17 | 0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f, 18 | 0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f, 19 | 0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f, 20 | 0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f, 21 | 0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f, 22 | 0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f, 23 | 0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f, 24 | 0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f, 25 | 0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f, 26 | 0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f, 27 | 0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f, 28 | 0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f, 29 | 0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f, 30 | 0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f, 31 | 0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f, 32 | 0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f, 33 | 0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f, 34 | 0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f, 35 | 0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f, 36 | 0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f, 37 | 0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f, 38 | 0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f, 39 | 0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f, 40 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, 41 | 0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, 42 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 43 | 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 44 | 1.000000f, 45 | }; 46 | -------------------------------------------------------------------------------- /src/rnn.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2017 Jean-Marc Valin */ 2 | /* 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | - Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifndef RNN_H_ 28 | #define RNN_H_ 29 | 30 | #include "rnnoise.h" 31 | 32 | #include "opus_types.h" 33 | 34 | #define WEIGHTS_SCALE (1.f/256) 35 | 36 | #define MAX_NEURONS 128 37 | 38 | #define ACTIVATION_TANH 0 39 | #define ACTIVATION_SIGMOID 1 40 | #define ACTIVATION_RELU 2 41 | 42 | typedef signed char rnn_weight; 43 | 44 | typedef struct { 45 | const rnn_weight *bias; 46 | const rnn_weight *input_weights; 47 | int nb_inputs; 48 | int nb_neurons; 49 | int activation; 50 | } DenseLayer; 51 | 52 | typedef struct { 53 | const rnn_weight *bias; 54 | const rnn_weight *input_weights; 55 | const rnn_weight *recurrent_weights; 56 | int nb_inputs; 57 | int nb_neurons; 58 | int activation; 59 | } GRULayer; 60 | 61 | typedef struct RNNState RNNState; 62 | 63 | void compute_dense(const DenseLayer *layer, float *output, const float *input); 64 | 65 | void compute_gru(const GRULayer *gru, float *state, const float *input); 66 | 67 | void compute_rnn(RNNState *rnn, float *gains, float *vad, const float *input); 68 | 69 | #endif /* _RNN_H_ */ 70 | -------------------------------------------------------------------------------- /include/rnnoise.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2018 Gregor Richards 2 | * Copyright (c) 2017 Mozilla */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifndef RNNOISE_H 29 | #define RNNOISE_H 1 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | #include 35 | 36 | 37 | #ifndef RNNOISE_EXPORT 38 | # if defined(WIN32) 39 | # if defined(RNNOISE_BUILD) && defined(DLL_EXPORT) 40 | # define RNNOISE_EXPORT __declspec(dllexport) 41 | # else 42 | # define RNNOISE_EXPORT 43 | # endif 44 | # elif defined(__GNUC__) && defined(RNNOISE_BUILD) 45 | # define RNNOISE_EXPORT __attribute__ ((visibility ("default"))) 46 | # else 47 | # define RNNOISE_EXPORT 48 | # endif 49 | #endif 50 | 51 | typedef struct DenoiseState DenoiseState; 52 | typedef struct RNNModel RNNModel; 53 | 54 | RNNOISE_EXPORT int rnnoise_get_size(); 55 | 56 | RNNOISE_EXPORT int rnnoise_init(DenoiseState *st, RNNModel *model); 57 | 58 | RNNOISE_EXPORT DenoiseState *rnnoise_create(RNNModel *model); 59 | 60 | RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); 61 | 62 | RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); 63 | 64 | RNNOISE_EXPORT RNNModel *rnnoise_model_from_file(FILE *f); 65 | 66 | RNNOISE_EXPORT void rnnoise_model_free(RNNModel *model); 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | #endif 72 | -------------------------------------------------------------------------------- /training/dump_rnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from keras.layers import LSTM 8 | from keras.layers import GRU 9 | from keras.models import load_model 10 | from keras import backend as K 11 | import sys 12 | import re 13 | import numpy as np 14 | 15 | def printVector(f, ft, vector, name): 16 | v = np.reshape(vector, (-1)); 17 | #print('static const float ', name, '[', len(v), '] = \n', file=f) 18 | f.write('static const rnn_weight {}[{}] = {{\n '.format(name, len(v))) 19 | for i in range(0, len(v)): 20 | f.write('{}'.format(min(127, int(round(256*v[i]))))) 21 | ft.write('{}'.format(min(127, int(round(256*v[i]))))) 22 | if (i!=len(v)-1): 23 | f.write(',') 24 | else: 25 | break; 26 | ft.write(" ") 27 | if (i%8==7): 28 | f.write("\n ") 29 | else: 30 | f.write(" ") 31 | #print(v, file=f) 32 | f.write('\n};\n\n') 33 | ft.write("\n") 34 | return; 35 | 36 | def printLayer(f, ft, layer): 37 | weights = layer.get_weights() 38 | activation = re.search('function (.*) at', str(layer.activation)).group(1).upper() 39 | if len(weights) > 2: 40 | ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1]/3)) 41 | else: 42 | ft.write('{} {} '.format(weights[0].shape[0], weights[0].shape[1])) 43 | if activation == 'SIGMOID': 44 | ft.write('1\n') 45 | elif activation == 'RELU': 46 | ft.write('2\n') 47 | else: 48 | ft.write('0\n') 49 | printVector(f, ft, weights[0], layer.name + '_weights') 50 | if len(weights) > 2: 51 | printVector(f, ft, weights[1], layer.name + '_recurrent_weights') 52 | printVector(f, ft, weights[-1], layer.name + '_bias') 53 | name = layer.name 54 | if len(weights) > 2: 55 | f.write('static const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' 56 | .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]/3, activation)) 57 | else: 58 | f.write('static const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' 59 | .format(name, name, name, weights[0].shape[0], weights[0].shape[1], activation)) 60 | 61 | def structLayer(f, layer): 62 | weights = layer.get_weights() 63 | name = layer.name 64 | if len(weights) > 2: 65 | f.write(' {},\n'.format(weights[0].shape[1]/3)) 66 | else: 67 | f.write(' {},\n'.format(weights[0].shape[1])) 68 | f.write(' &{},\n'.format(name)) 69 | 70 | 71 | def foo(c, name): 72 | return None 73 | 74 | def mean_squared_sqrt_error(y_true, y_pred): 75 | return K.mean(K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) 76 | 77 | 78 | model = load_model(sys.argv[1], custom_objects={'msse': mean_squared_sqrt_error, 'mean_squared_sqrt_error': mean_squared_sqrt_error, 'my_crossentropy': mean_squared_sqrt_error, 'mycost': mean_squared_sqrt_error, 'WeightClip': foo}) 79 | 80 | weights = model.get_weights() 81 | 82 | f = open(sys.argv[2], 'w') 83 | ft = open(sys.argv[3], 'w') 84 | 85 | f.write('/*This file is automatically generated from a Keras model*/\n\n') 86 | f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "rnn.h"\n#include "rnn_data.h"\n\n') 87 | ft.write('rnnoise-nu model file version 1\n') 88 | 89 | layer_list = [] 90 | for i, layer in enumerate(model.layers): 91 | if len(layer.get_weights()) > 0: 92 | printLayer(f, ft, layer) 93 | if len(layer.get_weights()) > 2: 94 | layer_list.append(layer.name) 95 | 96 | f.write('const struct RNNModel rnnoise_model_{} = {{\n'.format(sys.argv[4])) 97 | for i, layer in enumerate(model.layers): 98 | if len(layer.get_weights()) > 0: 99 | structLayer(f, layer) 100 | f.write('};\n') 101 | 102 | #hf.write('struct RNNState {\n') 103 | #for i, name in enumerate(layer_list): 104 | # hf.write(' float {}_state[{}_SIZE];\n'.format(name, name.upper())) 105 | #hf.write('};\n') 106 | 107 | f.close() 108 | -------------------------------------------------------------------------------- /training/rnn_train.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | from __future__ import print_function 4 | 5 | import keras 6 | from keras.models import Sequential 7 | from keras.models import Model 8 | from keras.layers import Input 9 | from keras.layers import Dense 10 | from keras.layers import LSTM 11 | from keras.layers import GRU 12 | from keras.layers import SimpleRNN 13 | from keras.layers import Dropout 14 | from keras.layers import concatenate 15 | from keras import losses 16 | from keras import regularizers 17 | from keras.constraints import min_max_norm 18 | import h5py 19 | 20 | from keras.constraints import Constraint 21 | from keras import backend as K 22 | import numpy as np 23 | 24 | #import tensorflow as tf 25 | #from keras.backend.tensorflow_backend import set_session 26 | #config = tf.ConfigProto() 27 | #config.gpu_options.per_process_gpu_memory_fraction = 0.42 28 | #set_session(tf.Session(config=config)) 29 | 30 | 31 | def my_crossentropy(y_true, y_pred): 32 | return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1) 33 | 34 | def mymask(y_true): 35 | return K.minimum(y_true+1., 1.) 36 | 37 | def msse(y_true, y_pred): 38 | return K.mean(mymask(y_true) * K.square(K.sqrt(y_pred) - K.sqrt(y_true)), axis=-1) 39 | 40 | def mycost(y_true, y_pred): 41 | return K.mean(mymask(y_true) * (10*K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01*K.binary_crossentropy(y_pred, y_true)), axis=-1) 42 | 43 | def my_accuracy(y_true, y_pred): 44 | return K.mean(2*K.abs(y_true-0.5) * K.equal(y_true, K.round(y_pred)), axis=-1) 45 | 46 | class WeightClip(Constraint): 47 | '''Clips the weights incident to each hidden unit to be inside a range 48 | ''' 49 | def __init__(self, c=2): 50 | self.c = c 51 | 52 | def __call__(self, p): 53 | return K.clip(p, -self.c, self.c) 54 | 55 | def get_config(self): 56 | return {'name': self.__class__.__name__, 57 | 'c': self.c} 58 | 59 | reg = 0.000001 60 | constraint = WeightClip(0.499) 61 | 62 | print('Build model...') 63 | main_input = Input(shape=(None, 42), name='main_input') 64 | tmp = Dense(24, activation='tanh', name='input_dense', kernel_constraint=constraint, bias_constraint=constraint)(main_input) 65 | vad_gru = GRU(24, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name='vad_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(tmp) 66 | vad_output = Dense(1, activation='sigmoid', name='vad_output', kernel_constraint=constraint, bias_constraint=constraint)(vad_gru) 67 | noise_input = keras.layers.concatenate([tmp, vad_gru, main_input]) 68 | noise_gru = GRU(48, activation='relu', recurrent_activation='sigmoid', return_sequences=True, name='noise_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(noise_input) 69 | denoise_input = keras.layers.concatenate([vad_gru, noise_gru, main_input]) 70 | 71 | denoise_gru = GRU(96, activation='tanh', recurrent_activation='sigmoid', return_sequences=True, name='denoise_gru', kernel_regularizer=regularizers.l2(reg), recurrent_regularizer=regularizers.l2(reg), kernel_constraint=constraint, recurrent_constraint=constraint, bias_constraint=constraint)(denoise_input) 72 | 73 | denoise_output = Dense(22, activation='sigmoid', name='denoise_output', kernel_constraint=constraint, bias_constraint=constraint)(denoise_gru) 74 | 75 | model = Model(inputs=main_input, outputs=[denoise_output, vad_output]) 76 | 77 | model.compile(loss=[mycost, my_crossentropy], 78 | metrics=[msse], 79 | optimizer='adam', loss_weights=[10, 0.5]) 80 | 81 | 82 | batch_size = 32 83 | 84 | print('Loading data...') 85 | with h5py.File('training.h5', 'r') as hf: 86 | all_data = hf['data'][:] 87 | print('done.') 88 | 89 | window_size = 2000 90 | 91 | nb_sequences = len(all_data)//window_size 92 | print(nb_sequences, ' sequences') 93 | x_train = all_data[:nb_sequences*window_size, :42] 94 | x_train = np.reshape(x_train, (nb_sequences, window_size, 42)) 95 | 96 | y_train = np.copy(all_data[:nb_sequences*window_size, 42:64]) 97 | y_train = np.reshape(y_train, (nb_sequences, window_size, 22)) 98 | 99 | noise_train = np.copy(all_data[:nb_sequences*window_size, 64:86]) 100 | noise_train = np.reshape(noise_train, (nb_sequences, window_size, 22)) 101 | 102 | vad_train = np.copy(all_data[:nb_sequences*window_size, 86:87]) 103 | vad_train = np.reshape(vad_train, (nb_sequences, window_size, 1)) 104 | 105 | all_data = 0; 106 | #x_train = x_train.astype('float32') 107 | #y_train = y_train.astype('float32') 108 | 109 | print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape) 110 | 111 | print('Train...') 112 | model.fit(x_train, [y_train, vad_train], 113 | batch_size=batch_size, 114 | epochs=120, 115 | validation_split=0.1) 116 | model.save("weights.hdf5") 117 | -------------------------------------------------------------------------------- /src/pitch.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007-2008 CSIRO 2 | Copyright (c) 2007-2009 Xiph.Org Foundation 3 | Written by Jean-Marc Valin */ 4 | /** 5 | @file pitch.h 6 | @brief Pitch analysis 7 | */ 8 | 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef PITCH_H 35 | #define PITCH_H 36 | 37 | //#include "modes.h" 38 | //#include "cpu_support.h" 39 | #include "arch.h" 40 | 41 | void pitch_downsample(celt_sig *x[], opus_val16 *x_lp, 42 | int len, int C); 43 | 44 | void pitch_search(const opus_val16 *x_lp, opus_val16 *y, 45 | int len, int max_pitch, int *pitch); 46 | 47 | opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 48 | int N, int *T0, int prev_period, opus_val16 prev_gain); 49 | 50 | 51 | /* OPT: This is the kernel you really want to optimize. It gets used a lot 52 | by the prefilter and by the PLC. */ 53 | static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) 54 | { 55 | int j; 56 | opus_val16 y_0, y_1, y_2, y_3; 57 | celt_assert(len>=3); 58 | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ 59 | y_0=*y++; 60 | y_1=*y++; 61 | y_2=*y++; 62 | for (j=0;j= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) 38 | #include 39 | 40 | typedef int16_t opus_int16; 41 | typedef uint16_t opus_uint16; 42 | typedef int32_t opus_int32; 43 | typedef uint32_t opus_uint32; 44 | #elif defined(_WIN32) 45 | 46 | # if defined(__CYGWIN__) 47 | # include <_G_config.h> 48 | typedef _G_int32_t opus_int32; 49 | typedef _G_uint32_t opus_uint32; 50 | typedef _G_int16 opus_int16; 51 | typedef _G_uint16 opus_uint16; 52 | # elif defined(__MINGW32__) 53 | typedef short opus_int16; 54 | typedef unsigned short opus_uint16; 55 | typedef int opus_int32; 56 | typedef unsigned int opus_uint32; 57 | # elif defined(__MWERKS__) 58 | typedef int opus_int32; 59 | typedef unsigned int opus_uint32; 60 | typedef short opus_int16; 61 | typedef unsigned short opus_uint16; 62 | # else 63 | /* MSVC/Borland */ 64 | typedef __int32 opus_int32; 65 | typedef unsigned __int32 opus_uint32; 66 | typedef __int16 opus_int16; 67 | typedef unsigned __int16 opus_uint16; 68 | # endif 69 | 70 | #elif defined(__MACOS__) 71 | 72 | # include 73 | typedef SInt16 opus_int16; 74 | typedef UInt16 opus_uint16; 75 | typedef SInt32 opus_int32; 76 | typedef UInt32 opus_uint32; 77 | 78 | #elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ 79 | 80 | # include 81 | typedef int16_t opus_int16; 82 | typedef u_int16_t opus_uint16; 83 | typedef int32_t opus_int32; 84 | typedef u_int32_t opus_uint32; 85 | 86 | #elif defined(__BEOS__) 87 | 88 | /* Be */ 89 | # include 90 | typedef int16 opus_int16; 91 | typedef u_int16 opus_uint16; 92 | typedef int32_t opus_int32; 93 | typedef u_int32_t opus_uint32; 94 | 95 | #elif defined (__EMX__) 96 | 97 | /* OS/2 GCC */ 98 | typedef short opus_int16; 99 | typedef unsigned short opus_uint16; 100 | typedef int opus_int32; 101 | typedef unsigned int opus_uint32; 102 | 103 | #elif defined (DJGPP) 104 | 105 | /* DJGPP */ 106 | typedef short opus_int16; 107 | typedef unsigned short opus_uint16; 108 | typedef int opus_int32; 109 | typedef unsigned int opus_uint32; 110 | 111 | #elif defined(R5900) 112 | 113 | /* PS2 EE */ 114 | typedef int opus_int32; 115 | typedef unsigned opus_uint32; 116 | typedef short opus_int16; 117 | typedef unsigned short opus_uint16; 118 | 119 | #elif defined(__SYMBIAN32__) 120 | 121 | /* Symbian GCC */ 122 | typedef signed short opus_int16; 123 | typedef unsigned short opus_uint16; 124 | typedef signed int opus_int32; 125 | typedef unsigned int opus_uint32; 126 | 127 | #elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) 128 | 129 | typedef short opus_int16; 130 | typedef unsigned short opus_uint16; 131 | typedef long opus_int32; 132 | typedef unsigned long opus_uint32; 133 | 134 | #elif defined(CONFIG_TI_C6X) 135 | 136 | typedef short opus_int16; 137 | typedef unsigned short opus_uint16; 138 | typedef int opus_int32; 139 | typedef unsigned int opus_uint32; 140 | 141 | #else 142 | 143 | /* Give up, take a reasonable guess */ 144 | typedef short opus_int16; 145 | typedef unsigned short opus_uint16; 146 | typedef int opus_int32; 147 | typedef unsigned int opus_uint32; 148 | 149 | #endif 150 | 151 | #define opus_int int /* used for counters etc; at least 16 bits */ 152 | #define opus_int64 long long 153 | #define opus_int8 signed char 154 | 155 | #define opus_uint unsigned int /* used for counters etc; at least 16 bits */ 156 | #define opus_uint64 unsigned long long 157 | #define opus_uint8 unsigned char 158 | 159 | #endif /* OPUS_TYPES_H */ 160 | -------------------------------------------------------------------------------- /examples/rnnoise_demo.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2020 National Cheng Kung University, Taiwan. 2 | * Copyright (c) 2018 Gregor Richards. 3 | * Copyright (c) 2017 Mozilla 4 | */ 5 | /* 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 10 | - Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | - Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 21 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 22 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 23 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 24 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | 41 | #include "rnnoise.h" 42 | 43 | #define DENOISE_SAMPLE_RATE 48000 44 | #define DENOISE_FRAMES (DENOISE_SAMPLE_RATE / 1000 * 10) 45 | 46 | size_t resample(soxr_t soxr, const int16_t *in_buf, const int in_frames, 47 | int16_t *out_buf, const int out_frames) { 48 | size_t odone; 49 | const soxr_error_t error = 50 | soxr_process(soxr, in_buf, in_frames, NULL, out_buf, out_frames, &odone); 51 | return error ? -1 : odone; 52 | } 53 | 54 | void denoise(soxr_t soxr_in, soxr_t soxr_out, int16_t *snd_buf, 55 | const int in_frames, int16_t *resample_buf, float *denoise_buf, 56 | DenoiseState *st) { 57 | resample(soxr_in, snd_buf, in_frames, resample_buf, DENOISE_FRAMES); 58 | 59 | for (int i = 0; i < DENOISE_FRAMES; i++) 60 | denoise_buf[i] = (float)resample_buf[i]; 61 | 62 | rnnoise_process_frame(st, denoise_buf, denoise_buf); 63 | 64 | for (int i = 0; i < DENOISE_FRAMES; i++) 65 | resample_buf[i] = denoise_buf[i]; 66 | 67 | resample(soxr_out, resample_buf, DENOISE_FRAMES, snd_buf, in_frames); 68 | } 69 | 70 | uint64_t usecs() { 71 | struct timeval tv; 72 | gettimeofday(&tv, NULL); 73 | return (tv.tv_sec * (uint64_t)1000000 + tv.tv_usec); 74 | } 75 | 76 | int main(int argc, char **argv) { 77 | DenoiseState *st; 78 | SNDFILE *in_sf, *out_sf; 79 | SF_INFO in_info = {0}, out_info = {0}; 80 | soxr_error_t error; 81 | 82 | if (argc != 3) { 83 | fprintf(stderr, "usage: %s \n", argv[0]); 84 | return 1; 85 | } 86 | 87 | if (access(argv[1], F_OK) == -1) { 88 | fprintf(stderr, "FATAL: can not access file %s.\n", argv[1]); 89 | return -1; 90 | } 91 | 92 | soxr_quality_spec_t const q_spec = soxr_quality_spec(SOXR_HQ, 0); 93 | soxr_io_spec_t const io_spec = soxr_io_spec(SOXR_INT16_I, SOXR_INT16_I); 94 | soxr_runtime_spec_t const runtime_spec = soxr_runtime_spec(sox_false); 95 | 96 | st = rnnoise_create(NULL); 97 | in_sf = sf_open(argv[1], SFM_READ, &in_info); 98 | out_info = (SF_INFO){.samplerate = in_info.samplerate, 99 | .channels = 1, 100 | .format = in_info.format}; 101 | out_sf = sf_open(argv[2], SFM_WRITE, &out_info); 102 | 103 | sf_command(in_sf, SFC_SET_NORM_FLOAT, NULL, SF_FALSE); 104 | sf_command(out_sf, SFC_SET_NORM_FLOAT, NULL, SF_FALSE); 105 | 106 | const int in_frames = in_info.samplerate / 1000 * 10; 107 | int16_t *snd_buf = malloc(in_frames * 2); 108 | int16_t resample_buf[DENOISE_FRAMES]; 109 | float denoise_buf[DENOISE_FRAMES]; 110 | 111 | soxr_t soxr_in = soxr_create(in_info.samplerate, DENOISE_SAMPLE_RATE, 1, 112 | &error, &io_spec, &q_spec, &runtime_spec); 113 | soxr_t soxr_out = soxr_create(DENOISE_SAMPLE_RATE, out_info.samplerate, 1, 114 | &error, &io_spec, &q_spec, &runtime_spec); 115 | 116 | const sf_count_t remain_frames = in_info.frames % in_frames; 117 | uint64_t t0 = usecs(); 118 | double runtime = 0.0; 119 | while (1) { 120 | const sf_count_t read_samples = sf_readf_short(in_sf, snd_buf, in_frames); 121 | if (read_samples < in_frames) 122 | break; 123 | 124 | denoise(soxr_in, soxr_out, snd_buf, in_frames, resample_buf, denoise_buf, 125 | st); 126 | sf_writef_short(out_sf, snd_buf, in_frames); 127 | runtime += 0.01; // 480 samples at 48Khz mono -> 5ms 128 | } 129 | if (remain_frames > 0) { 130 | memset(snd_buf + (in_frames - remain_frames), 0, remain_frames); 131 | denoise(soxr_in, soxr_out, snd_buf, in_frames, resample_buf, denoise_buf, 132 | st); 133 | sf_writef_short(out_sf, snd_buf, remain_frames); 134 | runtime += 0.01; // 480 samples at 48Khz mono -> 5ms 135 | } 136 | double elapsed = (usecs() - t0) / 1000000.0; 137 | fprintf(stdout, 138 | "processed %3.3f seconds in %3.3f seconds (%3.2fx realtime) \n", 139 | runtime, elapsed, runtime / elapsed); 140 | 141 | free(snd_buf); 142 | soxr_delete(soxr_in); 143 | soxr_delete(soxr_out); 144 | rnnoise_destroy(st); 145 | 146 | sf_close(in_sf); 147 | sf_close(out_sf); 148 | return 0; 149 | } 150 | -------------------------------------------------------------------------------- /src/rnn_reader.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2018 Gregor Richards */ 2 | /* 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 7 | - Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | - Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 18 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 23 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | */ 26 | 27 | #ifdef HAVE_CONFIG_H 28 | #include "config.h" 29 | #endif 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | #include "rnn.h" 36 | #include "rnn_data.h" 37 | #include "rnnoise.h" 38 | 39 | /* Although these values are the same as in rnn.h, we make them separate to 40 | * avoid accidentally burning internal values into a file format */ 41 | #define F_ACTIVATION_TANH 0 42 | #define F_ACTIVATION_SIGMOID 1 43 | #define F_ACTIVATION_RELU 2 44 | 45 | RNNModel *rnnoise_model_from_file(FILE *f) 46 | { 47 | int i, in; 48 | 49 | if (fscanf(f, "rnnoise-nu model file version %d\n", &in) != 1 || in != 1) 50 | return NULL; 51 | 52 | RNNModel *ret = calloc(1, sizeof(RNNModel)); 53 | if (!ret) 54 | return NULL; 55 | 56 | #define ALLOC_LAYER(type, name) \ 57 | type *name; \ 58 | name = calloc(1, sizeof(type)); \ 59 | if (!name) { \ 60 | rnnoise_model_free(ret); \ 61 | return NULL; \ 62 | } \ 63 | ret->name = name 64 | 65 | ALLOC_LAYER(DenseLayer, input_dense); 66 | ALLOC_LAYER(GRULayer, vad_gru); 67 | ALLOC_LAYER(GRULayer, noise_gru); 68 | ALLOC_LAYER(GRULayer, denoise_gru); 69 | ALLOC_LAYER(DenseLayer, denoise_output); 70 | ALLOC_LAYER(DenseLayer, vad_output); 71 | 72 | #define INPUT_VAL(name) do { \ 73 | if (fscanf(f, "%d", &in) != 1 || in < 0 || in > 128) { \ 74 | rnnoise_model_free(ret); \ 75 | return NULL; \ 76 | } \ 77 | name = in; \ 78 | } while (0) 79 | 80 | #define INPUT_ACTIVATION(name) do { \ 81 | int activation; \ 82 | INPUT_VAL(activation); \ 83 | switch (activation) { \ 84 | case F_ACTIVATION_SIGMOID: \ 85 | name = ACTIVATION_SIGMOID; \ 86 | break; \ 87 | case F_ACTIVATION_RELU: \ 88 | name = ACTIVATION_RELU; \ 89 | break; \ 90 | default: \ 91 | name = ACTIVATION_TANH; \ 92 | } \ 93 | } while (0) 94 | 95 | #define INPUT_ARRAY(name, len) do { \ 96 | rnn_weight *values = malloc((len) * sizeof(rnn_weight)); \ 97 | if (!values) { \ 98 | rnnoise_model_free(ret); \ 99 | return NULL; \ 100 | } \ 101 | name = values; \ 102 | for (i = 0; i < (len); i++) { \ 103 | if (fscanf(f, "%d", &in) != 1) { \ 104 | rnnoise_model_free(ret); \ 105 | return NULL; \ 106 | } \ 107 | values[i] = in; \ 108 | } \ 109 | } while (0) 110 | 111 | #define INPUT_DENSE(name) do { \ 112 | INPUT_VAL(name->nb_inputs); \ 113 | INPUT_VAL(name->nb_neurons); \ 114 | ret->name ## _size = name->nb_neurons; \ 115 | INPUT_ACTIVATION(name->activation); \ 116 | INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons); \ 117 | INPUT_ARRAY(name->bias, name->nb_neurons); \ 118 | } while (0) 119 | 120 | #define INPUT_GRU(name) do { \ 121 | INPUT_VAL(name->nb_inputs); \ 122 | INPUT_VAL(name->nb_neurons); \ 123 | ret->name ## _size = name->nb_neurons; \ 124 | INPUT_ACTIVATION(name->activation); \ 125 | INPUT_ARRAY(name->input_weights, name->nb_inputs * name->nb_neurons * 3); \ 126 | INPUT_ARRAY(name->recurrent_weights, name->nb_neurons * name->nb_neurons * 3); \ 127 | INPUT_ARRAY(name->bias, name->nb_neurons * 3); \ 128 | } while (0) 129 | 130 | INPUT_DENSE(input_dense); 131 | INPUT_GRU(vad_gru); 132 | INPUT_GRU(noise_gru); 133 | INPUT_GRU(denoise_gru); 134 | INPUT_DENSE(denoise_output); 135 | INPUT_DENSE(vad_output); 136 | 137 | return ret; 138 | } 139 | 140 | void rnnoise_model_free(RNNModel *model) 141 | { 142 | #define FREE_MAYBE(ptr) do { if (ptr) free(ptr); } while (0) 143 | #define FREE_DENSE(name) do { \ 144 | if (model->name) { \ 145 | free((void *) model->name->input_weights); \ 146 | free((void *) model->name->bias); \ 147 | free((void *) model->name); \ 148 | } \ 149 | } while (0) 150 | #define FREE_GRU(name) do { \ 151 | if (model->name) { \ 152 | free((void *) model->name->input_weights); \ 153 | free((void *) model->name->recurrent_weights); \ 154 | free((void *) model->name->bias); \ 155 | free((void *) model->name); \ 156 | } \ 157 | } while (0) 158 | 159 | if (!model) 160 | return; 161 | FREE_DENSE(input_dense); 162 | FREE_GRU(vad_gru); 163 | FREE_GRU(noise_gru); 164 | FREE_GRU(denoise_gru); 165 | FREE_DENSE(denoise_output); 166 | FREE_DENSE(vad_output); 167 | free(model); 168 | } 169 | -------------------------------------------------------------------------------- /src/_kiss_fft_guts.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2003-2004, Mark Borgerding 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 | POSSIBILITY OF SUCH DAMAGE.*/ 25 | 26 | #ifndef KISS_FFT_GUTS_H 27 | #define KISS_FFT_GUTS_H 28 | 29 | #define MIN(a,b) ((a)<(b) ? (a):(b)) 30 | #define MAX(a,b) ((a)>(b) ? (a):(b)) 31 | 32 | /* kiss_fft.h 33 | defines kiss_fft_scalar as either short or a float type 34 | and defines 35 | typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ 36 | #include "kiss_fft.h" 37 | 38 | /* 39 | Explanation of macros dealing with complex math: 40 | 41 | C_MUL(m,a,b) : m = a*b 42 | C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise 43 | C_SUB( res, a,b) : res = a - b 44 | C_SUBFROM( res , a) : res -= a 45 | C_ADDTO( res , a) : res += a 46 | * */ 47 | #ifdef FIXED_POINT 48 | #include "arch.h" 49 | 50 | 51 | #define SAMP_MAX 2147483647 52 | #define TWID_MAX 32767 53 | #define TRIG_UPSCALE 1 54 | 55 | #define SAMP_MIN -SAMP_MAX 56 | 57 | 58 | # define S_MUL(a,b) MULT16_32_Q15(b, a) 59 | 60 | # define C_MUL(m,a,b) \ 61 | do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 62 | (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) 63 | 64 | # define C_MULC(m,a,b) \ 65 | do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ 66 | (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) 67 | 68 | # define C_MULBYSCALAR( c, s ) \ 69 | do{ (c).r = S_MUL( (c).r , s ) ;\ 70 | (c).i = S_MUL( (c).i , s ) ; }while(0) 71 | 72 | # define DIVSCALAR(x,k) \ 73 | (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 ) 74 | 75 | # define C_FIXDIV(c,div) \ 76 | do { DIVSCALAR( (c).r , div); \ 77 | DIVSCALAR( (c).i , div); }while (0) 78 | 79 | #define C_ADD( res, a,b)\ 80 | do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \ 81 | }while(0) 82 | #define C_SUB( res, a,b)\ 83 | do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \ 84 | }while(0) 85 | #define C_ADDTO( res , a)\ 86 | do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\ 87 | }while(0) 88 | 89 | #define C_SUBFROM( res , a)\ 90 | do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \ 91 | }while(0) 92 | 93 | #if defined(OPUS_ARM_INLINE_ASM) 94 | #include "arm/kiss_fft_armv4.h" 95 | #endif 96 | 97 | #if defined(OPUS_ARM_INLINE_EDSP) 98 | #include "arm/kiss_fft_armv5e.h" 99 | #endif 100 | #if defined(MIPSr1_ASM) 101 | #include "mips/kiss_fft_mipsr1.h" 102 | #endif 103 | 104 | #else /* not FIXED_POINT*/ 105 | 106 | # define S_MUL(a,b) ( (a)*(b) ) 107 | #define C_MUL(m,a,b) \ 108 | do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ 109 | (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) 110 | #define C_MULC(m,a,b) \ 111 | do{ (m).r = (a).r*(b).r + (a).i*(b).i;\ 112 | (m).i = (a).i*(b).r - (a).r*(b).i; }while(0) 113 | 114 | #define C_MUL4(m,a,b) C_MUL(m,a,b) 115 | 116 | # define C_FIXDIV(c,div) /* NOOP */ 117 | # define C_MULBYSCALAR( c, s ) \ 118 | do{ (c).r *= (s);\ 119 | (c).i *= (s); }while(0) 120 | #endif 121 | 122 | #ifndef CHECK_OVERFLOW_OP 123 | # define CHECK_OVERFLOW_OP(a,op,b) /* noop */ 124 | #endif 125 | 126 | #ifndef C_ADD 127 | #define C_ADD( res, a,b)\ 128 | do { \ 129 | CHECK_OVERFLOW_OP((a).r,+,(b).r)\ 130 | CHECK_OVERFLOW_OP((a).i,+,(b).i)\ 131 | (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ 132 | }while(0) 133 | #define C_SUB( res, a,b)\ 134 | do { \ 135 | CHECK_OVERFLOW_OP((a).r,-,(b).r)\ 136 | CHECK_OVERFLOW_OP((a).i,-,(b).i)\ 137 | (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ 138 | }while(0) 139 | #define C_ADDTO( res , a)\ 140 | do { \ 141 | CHECK_OVERFLOW_OP((res).r,+,(a).r)\ 142 | CHECK_OVERFLOW_OP((res).i,+,(a).i)\ 143 | (res).r += (a).r; (res).i += (a).i;\ 144 | }while(0) 145 | 146 | #define C_SUBFROM( res , a)\ 147 | do {\ 148 | CHECK_OVERFLOW_OP((res).r,-,(a).r)\ 149 | CHECK_OVERFLOW_OP((res).i,-,(a).i)\ 150 | (res).r -= (a).r; (res).i -= (a).i; \ 151 | }while(0) 152 | #endif /* C_ADD defined */ 153 | 154 | #ifdef FIXED_POINT 155 | /*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) 156 | # define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/ 157 | # define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) 158 | # define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) 159 | # define HALF_OF(x) ((x)>>1) 160 | #elif defined(USE_SIMD) 161 | # define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) 162 | # define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) 163 | # define HALF_OF(x) ((x)*_mm_set1_ps(.5f)) 164 | #else 165 | # define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) 166 | # define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) 167 | # define HALF_OF(x) ((x)*.5f) 168 | #endif 169 | 170 | #define kf_cexp(x,phase) \ 171 | do{ \ 172 | (x)->r = KISS_FFT_COS(phase);\ 173 | (x)->i = KISS_FFT_SIN(phase);\ 174 | }while(0) 175 | 176 | #define kf_cexp2(x,phase) \ 177 | do{ \ 178 | (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\ 179 | (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\ 180 | }while(0) 181 | 182 | #endif /* KISS_FFT_GUTS_H */ 183 | -------------------------------------------------------------------------------- /src/rnn.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2008-2011 Octasic Inc. 2 | 2012-2017 Jean-Marc Valin */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifdef HAVE_CONFIG_H 29 | #include "config.h" 30 | #endif 31 | 32 | #include 33 | #include "opus_types.h" 34 | #include "common.h" 35 | #include "arch.h" 36 | #include "tansig_table.h" 37 | #include "rnn.h" 38 | #include "rnn_data.h" 39 | #include 40 | 41 | static OPUS_INLINE float tansig_approx(float x) 42 | { 43 | int i; 44 | float y, dy; 45 | float sign=1; 46 | /* Tests are reversed to catch NaNs */ 47 | if (!(x<8)) 48 | return 1; 49 | if (!(x>-8)) 50 | return -1; 51 | #ifndef FIXED_POINT 52 | /* Another check in case of -ffast-math */ 53 | if (celt_isnan(x)) 54 | return 0; 55 | #endif 56 | if (x<0) 57 | { 58 | x=-x; 59 | sign=-1; 60 | } 61 | i = (int)floor(.5f+25*x); 62 | x -= .04f*i; 63 | y = tansig_table[i]; 64 | dy = 1-y*y; 65 | y = y + x*dy*(1 - y*x); 66 | return sign*y; 67 | } 68 | 69 | static OPUS_INLINE float sigmoid_approx(float x) 70 | { 71 | return .5 + .5*tansig_approx(.5*x); 72 | } 73 | 74 | static OPUS_INLINE float relu(float x) 75 | { 76 | return x < 0 ? 0 : x; 77 | } 78 | 79 | void faxpy(float *restrict a, const rnn_weight *restrict b, int k, float u) 80 | { 81 | if (u == 0.0) return; 82 | for (int idx = 0; idx < k; idx++) 83 | a[idx] += b[idx] * u; 84 | } 85 | 86 | void compute_dense(const DenseLayer *layer, float *output, const float *input) 87 | { 88 | int i, j; 89 | int N, M; 90 | M = layer->nb_inputs; 91 | N = layer->nb_neurons; 92 | const rnn_weight *ip = layer->input_weights; 93 | /* Compute update gate. */ 94 | for(i = 0; i < N; i++) 95 | output[i] = layer->bias[i]; 96 | for (j=0;jactivation) { 99 | case ACTIVATION_SIGMOID: 100 | for (i=0;inb_inputs; 124 | N = gru->nb_neurons; 125 | stride = 3*N; 126 | const rnn_weight *ip = gru->input_weights; 127 | const rnn_weight *rp = gru->recurrent_weights; 128 | /* Compute update gate. */ 129 | for(i = 0; i < N; i++) 130 | z[i] = gru->bias[i]; 131 | for (j=0;jbias[N+i]; 140 | ip = gru->input_weights + N; 141 | rp = gru->recurrent_weights + N; 142 | for (j=0;jbias[2*N+i]; 152 | ip = gru->input_weights + 2*N; 153 | rp = gru->recurrent_weights + 2*N; 154 | for (j=0;jactivation) { 160 | case ACTIVATION_SIGMOID: h[i] = sigmoid_approx(WEIGHTS_SCALE*h[i]);break; 161 | case ACTIVATION_TANH: h[i] = tansig_approx(WEIGHTS_SCALE*h[i]); break; 162 | default: 163 | case ACTIVATION_RELU: h[i] = relu(WEIGHTS_SCALE*h[i]); break; 164 | } 165 | h[i] = z[i]*state[i] + (1-z[i])*h[i]; 166 | } 167 | for (i=0;imodel->input_dense, dense_out, input); 181 | compute_gru(rnn->model->vad_gru, rnn->vad_gru_state, dense_out); 182 | compute_dense(rnn->model->vad_output, vad, rnn->vad_gru_state); 183 | for (i=0;imodel->input_dense_size;i++) noise_input[i] = dense_out[i]; 184 | for (i=0;imodel->vad_gru_size;i++) noise_input[i+rnn->model->input_dense_size] = rnn->vad_gru_state[i]; 185 | for (i=0;imodel->input_dense_size+rnn->model->vad_gru_size] = input[i]; 186 | compute_gru(rnn->model->noise_gru, rnn->noise_gru_state, noise_input); 187 | 188 | for (i=0;imodel->vad_gru_size;i++) denoise_input[i] = rnn->vad_gru_state[i]; 189 | for (i=0;imodel->noise_gru_size;i++) denoise_input[i+rnn->model->vad_gru_size] = rnn->noise_gru_state[i]; 190 | for (i=0;imodel->vad_gru_size+rnn->model->noise_gru_size] = input[i]; 191 | compute_gru(rnn->model->denoise_gru, rnn->denoise_gru_state, denoise_input); 192 | compute_dense(rnn->model->denoise_output, gains, rnn->denoise_gru_state); 193 | } 194 | -------------------------------------------------------------------------------- /src/kiss_fft.h: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2003-2004, Mark Borgerding 2 | Lots of modifications by Jean-Marc Valin 3 | Copyright (c) 2005-2007, Xiph.Org Foundation 4 | Copyright (c) 2008, Xiph.Org Foundation, CSIRO 5 | 6 | All rights reserved. 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | * Redistributions of source code must retain the above copyright notice, 12 | this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE.*/ 28 | 29 | #ifndef KISS_FFT_H 30 | #define KISS_FFT_H 31 | 32 | #include 33 | #include 34 | #include "arch.h" 35 | 36 | #include 37 | #define opus_alloc(x) malloc(x) 38 | #define opus_free(x) free(x) 39 | 40 | #ifdef __cplusplus 41 | extern "C" { 42 | #endif 43 | 44 | #ifdef USE_SIMD 45 | # include 46 | # define kiss_fft_scalar __m128 47 | #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) 48 | #else 49 | #define KISS_FFT_MALLOC opus_alloc 50 | #endif 51 | 52 | #ifdef FIXED_POINT 53 | #include "arch.h" 54 | 55 | # define kiss_fft_scalar opus_int32 56 | # define kiss_twiddle_scalar opus_int16 57 | 58 | 59 | #else 60 | # ifndef kiss_fft_scalar 61 | /* default is float */ 62 | # define kiss_fft_scalar float 63 | # define kiss_twiddle_scalar float 64 | # define KF_SUFFIX _celt_single 65 | # endif 66 | #endif 67 | 68 | typedef struct { 69 | kiss_fft_scalar r; 70 | kiss_fft_scalar i; 71 | }kiss_fft_cpx; 72 | 73 | typedef struct { 74 | kiss_twiddle_scalar r; 75 | kiss_twiddle_scalar i; 76 | }kiss_twiddle_cpx; 77 | 78 | #define MAXFACTORS 8 79 | /* e.g. an fft of length 128 has 4 factors 80 | as far as kissfft is concerned 81 | 4*4*4*2 82 | */ 83 | 84 | typedef struct arch_fft_state{ 85 | int is_supported; 86 | void *priv; 87 | } arch_fft_state; 88 | 89 | typedef struct kiss_fft_state{ 90 | int nfft; 91 | opus_val16 scale; 92 | #ifdef FIXED_POINT 93 | int scale_shift; 94 | #endif 95 | int shift; 96 | opus_int16 factors[2*MAXFACTORS]; 97 | const opus_int16 *bitrev; 98 | const kiss_twiddle_cpx *twiddles; 99 | arch_fft_state *arch_fft; 100 | } kiss_fft_state; 101 | 102 | #if defined(HAVE_ARM_NE10) 103 | #include "arm/fft_arm.h" 104 | #endif 105 | 106 | /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ 107 | 108 | /** 109 | * opus_fft_alloc 110 | * 111 | * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. 112 | * 113 | * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL); 114 | * 115 | * The return value from fft_alloc is a cfg buffer used internally 116 | * by the fft routine or NULL. 117 | * 118 | * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc. 119 | * The returned value should be free()d when done to avoid memory leaks. 120 | * 121 | * The state can be placed in a user supplied buffer 'mem': 122 | * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, 123 | * then the function places the cfg in mem and the size used in *lenmem 124 | * and returns mem. 125 | * 126 | * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), 127 | * then the function returns NULL and places the minimum cfg 128 | * buffer size in *lenmem. 129 | * */ 130 | 131 | kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); 132 | 133 | kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); 134 | 135 | /** 136 | * opus_fft(cfg,in_out_buf) 137 | * 138 | * Perform an FFT on a complex input buffer. 139 | * for a forward FFT, 140 | * fin should be f[0] , f[1] , ... ,f[nfft-1] 141 | * fout will be F[0] , F[1] , ... ,F[nfft-1] 142 | * Note that each element is complex and can be accessed like 143 | f[k].r and f[k].i 144 | * */ 145 | void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 146 | void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 147 | 148 | void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 149 | void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 150 | 151 | void opus_fft_free(const kiss_fft_state *cfg, int arch); 152 | 153 | 154 | void opus_fft_free_arch_c(kiss_fft_state *st); 155 | int opus_fft_alloc_arch_c(kiss_fft_state *st); 156 | 157 | #if !defined(OVERRIDE_OPUS_FFT) 158 | /* Is run-time CPU detection enabled on this platform? */ 159 | #if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) 160 | 161 | extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( 162 | kiss_fft_state *st); 163 | 164 | #define opus_fft_alloc_arch(_st, arch) \ 165 | ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) 166 | 167 | extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( 168 | kiss_fft_state *st); 169 | #define opus_fft_free_arch(_st, arch) \ 170 | ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) 171 | 172 | extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, 173 | const kiss_fft_cpx *fin, kiss_fft_cpx *fout); 174 | #define opus_fft(_cfg, _fin, _fout, arch) \ 175 | ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) 176 | 177 | extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, 178 | const kiss_fft_cpx *fin, kiss_fft_cpx *fout); 179 | #define opus_ifft(_cfg, _fin, _fout, arch) \ 180 | ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) 181 | 182 | #else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ 183 | 184 | #define opus_fft_alloc_arch(_st, arch) \ 185 | ((void)(arch), opus_fft_alloc_arch_c(_st)) 186 | 187 | #define opus_fft_free_arch(_st, arch) \ 188 | ((void)(arch), opus_fft_free_arch_c(_st)) 189 | 190 | #define opus_fft(_cfg, _fin, _fout, arch) \ 191 | ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) 192 | 193 | #define opus_ifft(_cfg, _fin, _fout, arch) \ 194 | ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) 195 | 196 | #endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ 197 | #endif /* end if !defined(OVERRIDE_OPUS_FFT) */ 198 | 199 | #ifdef __cplusplus 200 | } 201 | #endif 202 | 203 | #endif 204 | -------------------------------------------------------------------------------- /src/arch.h: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2003-2008 Jean-Marc Valin 2 | Copyright (c) 2007-2008 CSIRO 3 | Copyright (c) 2007-2009 Xiph.Org Foundation 4 | Written by Jean-Marc Valin */ 5 | /** 6 | @file arch.h 7 | @brief Various architecture definitions for CELT 8 | */ 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef ARCH_H 35 | #define ARCH_H 36 | 37 | #include "opus_types.h" 38 | #include "common.h" 39 | 40 | # if !defined(__GNUC_PREREQ) 41 | # if defined(__GNUC__)&&defined(__GNUC_MINOR__) 42 | # define __GNUC_PREREQ(_maj,_min) \ 43 | ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) 44 | # else 45 | # define __GNUC_PREREQ(_maj,_min) 0 46 | # endif 47 | # endif 48 | 49 | #define CELT_SIG_SCALE 32768.f 50 | 51 | #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); 52 | #ifdef ENABLE_ASSERTIONS 53 | #include 54 | #include 55 | #ifdef __GNUC__ 56 | __attribute__((noreturn)) 57 | #endif 58 | static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) 59 | { 60 | fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); 61 | abort(); 62 | } 63 | #define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} 64 | #define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} 65 | #else 66 | #define celt_assert(cond) 67 | #define celt_assert2(cond, message) 68 | #endif 69 | 70 | #define IMUL32(a,b) ((a)*(b)) 71 | 72 | #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ 73 | #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ 74 | #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ 75 | #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ 76 | #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ 77 | #define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ 78 | #define UADD32(a,b) ((a)+(b)) 79 | #define USUB32(a,b) ((a)-(b)) 80 | 81 | /* Set this if opus_int64 is a native type of the CPU. */ 82 | /* Assume that all LP64 architectures have fast 64-bit types; also x86_64 83 | (which can be ILP32 for x32) and Win64 (which is LLP64). */ 84 | #if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64) 85 | #define OPUS_FAST_INT64 1 86 | #else 87 | #define OPUS_FAST_INT64 0 88 | #endif 89 | 90 | #define PRINT_MIPS(file) 91 | 92 | #ifdef FIXED_POINT 93 | 94 | typedef opus_int16 opus_val16; 95 | typedef opus_int32 opus_val32; 96 | typedef opus_int64 opus_val64; 97 | 98 | typedef opus_val32 celt_sig; 99 | typedef opus_val16 celt_norm; 100 | typedef opus_val32 celt_ener; 101 | 102 | #define Q15ONE 32767 103 | 104 | #define SIG_SHIFT 12 105 | /* Safe saturation value for 32-bit signals. Should be less than 106 | 2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/ 107 | #define SIG_SAT (300000000) 108 | 109 | #define NORM_SCALING 16384 110 | 111 | #define DB_SHIFT 10 112 | 113 | #define EPSILON 1 114 | #define VERY_SMALL 0 115 | #define VERY_LARGE16 ((opus_val16)32767) 116 | #define Q15_ONE ((opus_val16)32767) 117 | 118 | #define SCALEIN(a) (a) 119 | #define SCALEOUT(a) (a) 120 | 121 | #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) 122 | #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) 123 | 124 | static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { 125 | return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; 126 | } 127 | 128 | #ifdef FIXED_DEBUG 129 | #include "fixed_debug.h" 130 | #else 131 | 132 | #include "fixed_generic.h" 133 | 134 | #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR 135 | #include "arm/fixed_arm64.h" 136 | #elif OPUS_ARM_INLINE_EDSP 137 | #include "arm/fixed_armv5e.h" 138 | #elif defined (OPUS_ARM_INLINE_ASM) 139 | #include "arm/fixed_armv4.h" 140 | #elif defined (BFIN_ASM) 141 | #include "fixed_bfin.h" 142 | #elif defined (TI_C5X_ASM) 143 | #include "fixed_c5x.h" 144 | #elif defined (TI_C6X_ASM) 145 | #include "fixed_c6x.h" 146 | #endif 147 | 148 | #endif 149 | 150 | #else /* FIXED_POINT */ 151 | 152 | typedef float opus_val16; 153 | typedef float opus_val32; 154 | typedef float opus_val64; 155 | 156 | typedef float celt_sig; 157 | typedef float celt_norm; 158 | typedef float celt_ener; 159 | 160 | #ifdef FLOAT_APPROX 161 | /* This code should reliably detect NaN/inf even when -ffast-math is used. 162 | Assumes IEEE 754 format. */ 163 | static OPUS_INLINE int celt_isnan(float x) 164 | { 165 | union {float f; opus_uint32 i;} in; 166 | in.f = x; 167 | return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0; 168 | } 169 | #else 170 | #ifdef __FAST_MATH__ 171 | #error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input 172 | #endif 173 | #define celt_isnan(x) ((x)!=(x)) 174 | #endif 175 | 176 | #define Q15ONE 1.0f 177 | 178 | #define NORM_SCALING 1.f 179 | 180 | #define EPSILON 1e-15f 181 | #define VERY_SMALL 1e-30f 182 | #define VERY_LARGE16 1e15f 183 | #define Q15_ONE ((opus_val16)1.f) 184 | 185 | /* This appears to be the same speed as C99's fabsf() but it's more portable. */ 186 | #define ABS16(x) ((float)fabs(x)) 187 | #define ABS32(x) ((float)fabs(x)) 188 | 189 | #define QCONST16(x,bits) (x) 190 | #define QCONST32(x,bits) (x) 191 | 192 | #define NEG16(x) (-(x)) 193 | #define NEG32(x) (-(x)) 194 | #define NEG32_ovflw(x) (-(x)) 195 | #define EXTRACT16(x) (x) 196 | #define EXTEND32(x) (x) 197 | #define SHR16(a,shift) (a) 198 | #define SHL16(a,shift) (a) 199 | #define SHR32(a,shift) (a) 200 | #define SHL32(a,shift) (a) 201 | #define PSHR32(a,shift) (a) 202 | #define VSHR32(a,shift) (a) 203 | 204 | #define PSHR(a,shift) (a) 205 | #define SHR(a,shift) (a) 206 | #define SHL(a,shift) (a) 207 | #define SATURATE(x,a) (x) 208 | #define SATURATE16(x) (x) 209 | 210 | #define ROUND16(a,shift) (a) 211 | #define SROUND16(a,shift) (a) 212 | #define HALF16(x) (.5f*(x)) 213 | #define HALF32(x) (.5f*(x)) 214 | 215 | #define ADD16(a,b) ((a)+(b)) 216 | #define SUB16(a,b) ((a)-(b)) 217 | #define ADD32(a,b) ((a)+(b)) 218 | #define SUB32(a,b) ((a)-(b)) 219 | #define ADD32_ovflw(a,b) ((a)+(b)) 220 | #define SUB32_ovflw(a,b) ((a)-(b)) 221 | #define MULT16_16_16(a,b) ((a)*(b)) 222 | #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) 223 | #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) 224 | 225 | #define MULT16_32_Q15(a,b) ((a)*(b)) 226 | #define MULT16_32_Q16(a,b) ((a)*(b)) 227 | 228 | #define MULT32_32_Q31(a,b) ((a)*(b)) 229 | 230 | #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) 231 | #define MAC16_32_Q16(c,a,b) ((c)+(a)*(b)) 232 | 233 | #define MULT16_16_Q11_32(a,b) ((a)*(b)) 234 | #define MULT16_16_Q11(a,b) ((a)*(b)) 235 | #define MULT16_16_Q13(a,b) ((a)*(b)) 236 | #define MULT16_16_Q14(a,b) ((a)*(b)) 237 | #define MULT16_16_Q15(a,b) ((a)*(b)) 238 | #define MULT16_16_P15(a,b) ((a)*(b)) 239 | #define MULT16_16_P13(a,b) ((a)*(b)) 240 | #define MULT16_16_P14(a,b) ((a)*(b)) 241 | #define MULT16_32_P16(a,b) ((a)*(b)) 242 | 243 | #define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) 244 | #define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) 245 | 246 | #define SCALEIN(a) ((a)*CELT_SIG_SCALE) 247 | #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) 248 | 249 | #define SIG2WORD16(x) (x) 250 | 251 | #endif /* !FIXED_POINT */ 252 | 253 | #ifndef GLOBAL_STACK_SIZE 254 | #ifdef FIXED_POINT 255 | #define GLOBAL_STACK_SIZE 120000 256 | #else 257 | #define GLOBAL_STACK_SIZE 120000 258 | #endif 259 | #endif 260 | 261 | #endif /* ARCH_H */ 262 | -------------------------------------------------------------------------------- /src/celt_lpc.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2009-2010 Xiph.Org Foundation 2 | Written by Jean-Marc Valin */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifdef HAVE_CONFIG_H 29 | #include "config.h" 30 | #endif 31 | 32 | #include "celt_lpc.h" 33 | #include "arch.h" 34 | #include "common.h" 35 | #include "pitch.h" 36 | 37 | void _celt_lpc( 38 | opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ 39 | const opus_val32 *ac, /* in: [0...p] autocorrelation values */ 40 | int p 41 | ) 42 | { 43 | int i, j; 44 | opus_val32 r; 45 | opus_val32 error = ac[0]; 46 | #ifdef FIXED_POINT 47 | opus_val32 lpc[LPC_ORDER]; 48 | #else 49 | float *lpc = _lpc; 50 | #endif 51 | 52 | RNN_CLEAR(lpc, p); 53 | if (ac[0] != 0) 54 | { 55 | for (i = 0; i < p; i++) { 56 | /* Sum up this iteration's reflection coefficient */ 57 | opus_val32 rr = 0; 58 | for (j = 0; j < i; j++) 59 | rr += MULT32_32_Q31(lpc[j],ac[i - j]); 60 | rr += SHR32(ac[i + 1],3); 61 | r = -SHL32(rr,3)/error; 62 | /* Update LPC coefficients and total error */ 63 | lpc[i] = SHR32(r,3); 64 | for (j = 0; j < (i+1)>>1; j++) 65 | { 66 | opus_val32 tmp1, tmp2; 67 | tmp1 = lpc[j]; 68 | tmp2 = lpc[i-1-j]; 69 | lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); 70 | lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); 71 | } 72 | 73 | error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); 74 | /* Bail out once we get 30 dB gain */ 75 | #ifdef FIXED_POINT 76 | if (error=1;j--) 142 | { 143 | mem[j]=mem[j-1]; 144 | } 145 | mem[0] = SROUND16(sum, SIG_SHIFT); 146 | _y[i] = sum; 147 | } 148 | #else 149 | int i,j; 150 | celt_assert((ord&3)==0); 151 | opus_val16 *rden = calloc(sizeof(opus_val16), ord); 152 | opus_val16 *y = calloc(sizeof(opus_val16), N+ord); 153 | 154 | for(i=0;i0); 218 | celt_assert(overlap>=0); 219 | if (overlap == 0) 220 | { 221 | xptr = x; 222 | } else { 223 | for (i=0;i0) 247 | { 248 | for(i=0;i= 536870912) 273 | { 274 | int shift2=1; 275 | if (ac[0] >= 1073741824) 276 | shift2++; 277 | for (i=0;i<=lag;i++) 278 | ac[i] = SHR32(ac[i], shift2); 279 | shift += shift2; 280 | } 281 | #endif 282 | free(xx); 283 | return shift; 284 | } 285 | -------------------------------------------------------------------------------- /src/pitch.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2007-2008 CSIRO 2 | Copyright (c) 2007-2009 Xiph.Org Foundation 3 | Written by Jean-Marc Valin */ 4 | /** 5 | @file pitch.c 6 | @brief Pitch analysis 7 | */ 8 | 9 | /* 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions 12 | are met: 13 | 14 | - Redistributions of source code must retain the above copyright 15 | notice, this list of conditions and the following disclaimer. 16 | 17 | - Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 25 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifdef HAVE_CONFIG_H 35 | #include "config.h" 36 | #endif 37 | 38 | #include "pitch.h" 39 | #include "common.h" 40 | //#include "modes.h" 41 | //#include "stack_alloc.h" 42 | //#include "mathops.h" 43 | #include "celt_lpc.h" 44 | #include "math.h" 45 | 46 | static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, 47 | int max_pitch, int *best_pitch 48 | #ifdef FIXED_POINT 49 | , int yshift, opus_val32 maxcorr 50 | #endif 51 | ) 52 | { 53 | int i, j; 54 | opus_val32 Syy=1; 55 | opus_val16 best_num[2]; 56 | opus_val32 best_den[2]; 57 | #ifdef FIXED_POINT 58 | int xshift; 59 | 60 | xshift = celt_ilog2(maxcorr)-14; 61 | #endif 62 | 63 | best_num[0] = -1; 64 | best_num[1] = -1; 65 | best_den[0] = 0; 66 | best_den[1] = 0; 67 | best_pitch[0] = 0; 68 | best_pitch[1] = 1; 69 | for (j=0;j0) 74 | { 75 | opus_val16 num; 76 | opus_val32 xcorr16; 77 | xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); 78 | #ifndef FIXED_POINT 79 | /* Considering the range of xcorr16, this should avoid both underflows 80 | and overflows (inf) when squaring xcorr16 */ 81 | xcorr16 *= 1e-12f; 82 | #endif 83 | num = MULT16_16_Q15(xcorr16,xcorr16); 84 | if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) 85 | { 86 | if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) 87 | { 88 | best_num[1] = best_num[0]; 89 | best_den[1] = best_den[0]; 90 | best_pitch[1] = best_pitch[0]; 91 | best_num[0] = num; 92 | best_den[0] = Syy; 93 | best_pitch[0] = i; 94 | } else { 95 | best_num[1] = num; 96 | best_den[1] = Syy; 97 | best_pitch[1] = i; 98 | } 99 | } 100 | } 101 | Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); 102 | Syy = MAX32(1, Syy); 103 | } 104 | } 105 | 106 | static void celt_fir5(const opus_val16 *x, 107 | const opus_val16 *num, 108 | opus_val16 *y, 109 | int N, 110 | opus_val16 *mem) 111 | { 112 | int i; 113 | opus_val16 num0, num1, num2, num3, num4; 114 | opus_val32 mem0, mem1, mem2, mem3, mem4; 115 | num0=num[0]; 116 | num1=num[1]; 117 | num2=num[2]; 118 | num3=num[3]; 119 | num4=num[4]; 120 | mem0=mem[0]; 121 | mem1=mem[1]; 122 | mem2=mem[2]; 123 | mem3=mem[3]; 124 | mem4=mem[4]; 125 | for (i=0;i>1;i++) 174 | x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); 175 | x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); 176 | if (C==2) 177 | { 178 | for (i=1;i>1;i++) 179 | x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); 180 | x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); 181 | } 182 | 183 | _celt_autocorr(x_lp, ac, NULL, 0, 184 | 4, len>>1); 185 | 186 | /* Noise floor -40 dB */ 187 | #ifdef FIXED_POINT 188 | ac[0] += SHR32(ac[0],13); 189 | #else 190 | ac[0] *= 1.0001f; 191 | #endif 192 | /* Lag windowing */ 193 | for (i=1;i<=4;i++) 194 | { 195 | /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ 196 | #ifdef FIXED_POINT 197 | ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); 198 | #else 199 | ac[i] -= ac[i]*(.008f*i)*(.008f*i); 200 | #endif 201 | } 202 | 203 | _celt_lpc(lpc, ac, 4); 204 | for (i=0;i<4;i++) 205 | { 206 | tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); 207 | lpc[i] = MULT16_16_Q15(lpc[i], tmp); 208 | } 209 | /* Add a zero */ 210 | lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); 211 | lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); 212 | lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); 213 | lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); 214 | lpc2[4] = MULT16_16_Q15(c1,lpc[3]); 215 | celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); 216 | } 217 | 218 | void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, 219 | opus_val32 *xcorr, int len, int max_pitch) 220 | { 221 | 222 | #if 0 /* This is a simple version of the pitch correlation that should work 223 | well on DSPs like Blackfin and TI C5x/C6x */ 224 | int i, j; 225 | #ifdef FIXED_POINT 226 | opus_val32 maxcorr=1; 227 | #endif 228 | for (i=0;i0); 251 | celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); 252 | for (i=0;i0); 297 | celt_assert(max_pitch>0); 298 | lag = len+max_pitch; 299 | 300 | opus_val16 *x_lp4 = calloc(sizeof(opus_val16), len>>2); 301 | opus_val16 *y_lp4 = calloc(sizeof(opus_val16), lag>>2); 302 | opus_val32 *xcorr = calloc(sizeof(opus_val32), max_pitch>>1); 303 | 304 | /* Downsample by 2 again */ 305 | for (j=0;j>2;j++) 306 | x_lp4[j] = x_lp[2*j]; 307 | for (j=0;j>2;j++) 308 | y_lp4[j] = y[2*j]; 309 | 310 | #ifdef FIXED_POINT 311 | xmax = celt_maxabs16(x_lp4, len>>2); 312 | ymax = celt_maxabs16(y_lp4, lag>>2); 313 | shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; 314 | if (shift>0) 315 | { 316 | for (j=0;j>2;j++) 317 | x_lp4[j] = SHR16(x_lp4[j], shift); 318 | for (j=0;j>2;j++) 319 | y_lp4[j] = SHR16(y_lp4[j], shift); 320 | /* Use double the shift for a MAC */ 321 | shift *= 2; 322 | } else { 323 | shift = 0; 324 | } 325 | #endif 326 | 327 | /* Coarse search with 4x decimation */ 328 | 329 | #ifdef FIXED_POINT 330 | maxcorr = 331 | #endif 332 | celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); 333 | 334 | find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch 335 | #ifdef FIXED_POINT 336 | , 0, maxcorr 337 | #endif 338 | ); 339 | 340 | /* Finer search with 2x decimation */ 341 | #ifdef FIXED_POINT 342 | maxcorr=1; 343 | #endif 344 | for (i=0;i>1;i++) 345 | { 346 | opus_val32 sum; 347 | xcorr[i] = 0; 348 | if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) 349 | continue; 350 | #ifdef FIXED_POINT 351 | sum = 0; 352 | for (j=0;j>1;j++) 353 | sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); 354 | #else 355 | sum = celt_inner_prod(x_lp, y+i, len>>1); 356 | #endif 357 | xcorr[i] = MAX32(-1, sum); 358 | #ifdef FIXED_POINT 359 | maxcorr = MAX32(maxcorr, sum); 360 | #endif 361 | } 362 | find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch 363 | #ifdef FIXED_POINT 364 | , shift+1, maxcorr 365 | #endif 366 | ); 367 | 368 | /* Refine by pseudo-interpolation */ 369 | if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1) 370 | { 371 | opus_val32 a, b, c; 372 | a = xcorr[best_pitch[0]-1]; 373 | b = xcorr[best_pitch[0]]; 374 | c = xcorr[best_pitch[0]+1]; 375 | if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a)) 376 | offset = 1; 377 | else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c)) 378 | offset = -1; 379 | else 380 | offset = 0; 381 | } else { 382 | offset = 0; 383 | } 384 | *pitch = 2*best_pitch[0]-offset; 385 | free(xcorr); 386 | free(x_lp4); 387 | free(y_lp4); 388 | } 389 | 390 | #ifdef FIXED_POINT 391 | static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) 392 | { 393 | opus_val32 x2y2; 394 | int sx, sy, shift; 395 | opus_val32 g; 396 | opus_val16 den; 397 | if (xy == 0 || xx == 0 || yy == 0) 398 | return 0; 399 | sx = celt_ilog2(xx)-14; 400 | sy = celt_ilog2(yy)-14; 401 | shift = sx + sy; 402 | x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14); 403 | if (shift & 1) { 404 | if (x2y2 < 32768) 405 | { 406 | x2y2 <<= 1; 407 | shift--; 408 | } else { 409 | x2y2 >>= 1; 410 | shift++; 411 | } 412 | } 413 | den = celt_rsqrt_norm(x2y2); 414 | g = MULT16_32_Q15(den, xy); 415 | g = VSHR32(g, (shift>>1)-1); 416 | return EXTRACT16(MIN32(g, Q15ONE)); 417 | } 418 | #else 419 | static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy) 420 | { 421 | return xy/sqrt(1+xx*yy); 422 | } 423 | #endif 424 | 425 | static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; 426 | opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, 427 | int N, int *T0_, int prev_period, opus_val16 prev_gain) 428 | { 429 | int k, i, T, T0; 430 | opus_val16 g, g0; 431 | opus_val16 pg; 432 | opus_val32 xy,xx,yy,xy2; 433 | opus_val32 xcorr[3]; 434 | opus_val32 best_xy, best_yy; 435 | int offset; 436 | int minperiod0; 437 | 438 | minperiod0 = minperiod; 439 | maxperiod /= 2; 440 | minperiod /= 2; 441 | *T0_ /= 2; 442 | prev_period /= 2; 443 | N /= 2; 444 | x += maxperiod; 445 | if (*T0_>=maxperiod) 446 | *T0_=maxperiod-1; 447 | 448 | T = T0 = *T0_; 449 | opus_val32 *yy_lookup = calloc(sizeof(opus_val32), maxperiod+1); 450 | dual_inner_prod(x, x, x-T0, N, &xx, &xy); 451 | yy_lookup[0] = xx; 452 | yy=xx; 453 | for (i=1;i<=maxperiod;i++) 454 | { 455 | yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]); 456 | yy_lookup[i] = MAX32(0, yy); 457 | } 458 | yy = yy_lookup[T0]; 459 | best_xy = xy; 460 | best_yy = yy; 461 | g = g0 = compute_pitch_gain(xy, xx, yy); 462 | /* Look for any pitch at T/k */ 463 | for (k=2;k<=15;k++) 464 | { 465 | int T1, T1b; 466 | opus_val16 g1; 467 | opus_val16 cont=0; 468 | opus_val16 thresh; 469 | T1 = (2*T0+k)/(2*k); 470 | if (T1 < minperiod) 471 | break; 472 | /* Look for another strong correlation at T1b */ 473 | if (k==2) 474 | { 475 | if (T1+T0>maxperiod) 476 | T1b = T0; 477 | else 478 | T1b = T0+T1; 479 | } else 480 | { 481 | T1b = (2*second_check[k]*T0+k)/(2*k); 482 | } 483 | dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); 484 | xy = HALF32(xy + xy2); 485 | yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]); 486 | g1 = compute_pitch_gain(xy, xx, yy); 487 | if (abs(T1-prev_period)<=1) 488 | cont = prev_gain; 489 | else if (abs(T1-prev_period)<=2 && 5*k*k < T0) 490 | cont = HALF16(prev_gain); 491 | else 492 | cont = 0; 493 | thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); 494 | /* Bias against very high pitch (very short period) to avoid false-positives 495 | due to short-term correlation */ 496 | if (T1<3*minperiod) 497 | thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont); 498 | else if (T1<2*minperiod) 499 | thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont); 500 | if (g1 > thresh) 501 | { 502 | best_xy = xy; 503 | best_yy = yy; 504 | T = T1; 505 | g = g1; 506 | } 507 | } 508 | free(yy_lookup); 509 | best_xy = MAX32(0, best_xy); 510 | if (best_yy <= best_xy) 511 | pg = Q15ONE; 512 | else 513 | pg = best_xy/(best_yy+1); 514 | 515 | for (k=0;k<3;k++) 516 | xcorr[k] = celt_inner_prod(x, x-(T+k-1), N); 517 | if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) 518 | offset = 1; 519 | else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) 520 | offset = -1; 521 | else 522 | offset = 0; 523 | if (pg > g) 524 | pg = g; 525 | *T0_ = 2*T+offset; 526 | 527 | if (*T0_twiddles; 143 | /* m is guaranteed to be a multiple of 4. */ 144 | for (j=0;jtwiddles[fstride*m]; 195 | #endif 196 | for (i=0;itwiddles; 200 | /* For non-custom modes, m is guaranteed to be a multiple of 4. */ 201 | k=m; 202 | do { 203 | 204 | C_MUL(scratch[1],Fout[m] , *tw1); 205 | C_MUL(scratch[2],Fout[m2] , *tw2); 206 | 207 | C_ADD(scratch[3],scratch[1],scratch[2]); 208 | C_SUB(scratch[0],scratch[1],scratch[2]); 209 | tw1 += fstride; 210 | tw2 += fstride*2; 211 | 212 | Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); 213 | Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); 214 | 215 | C_MULBYSCALAR( scratch[0] , epi3.i ); 216 | 217 | C_ADDTO(*Fout,scratch[3]); 218 | 219 | Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); 220 | Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); 221 | 222 | Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); 223 | Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); 224 | 225 | ++Fout; 226 | } while(--k); 227 | } 228 | } 229 | 230 | 231 | #ifndef OVERRIDE_kf_bfly5 232 | static void kf_bfly5( 233 | kiss_fft_cpx * Fout, 234 | const size_t fstride, 235 | const kiss_fft_state *st, 236 | int m, 237 | int N, 238 | int mm 239 | ) 240 | { 241 | kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 242 | int i, u; 243 | kiss_fft_cpx scratch[13]; 244 | const kiss_twiddle_cpx *tw; 245 | kiss_twiddle_cpx ya,yb; 246 | kiss_fft_cpx * Fout_beg = Fout; 247 | 248 | #ifdef FIXED_POINT 249 | ya.r = 10126; 250 | ya.i = -31164; 251 | yb.r = -26510; 252 | yb.i = -19261; 253 | #else 254 | ya = st->twiddles[fstride*m]; 255 | yb = st->twiddles[fstride*2*m]; 256 | #endif 257 | tw=st->twiddles; 258 | 259 | for (i=0;ir = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); 283 | Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); 284 | 285 | scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); 286 | scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); 287 | 288 | scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); 289 | scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); 290 | 291 | C_SUB(*Fout1,scratch[5],scratch[6]); 292 | C_ADD(*Fout4,scratch[5],scratch[6]); 293 | 294 | scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); 295 | scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); 296 | scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); 297 | scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); 298 | 299 | C_ADD(*Fout2,scratch[11],scratch[12]); 300 | C_SUB(*Fout3,scratch[11],scratch[12]); 301 | 302 | ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 303 | } 304 | } 305 | } 306 | #endif /* OVERRIDE_kf_bfly5 */ 307 | 308 | 309 | #endif 310 | 311 | 312 | #ifdef CUSTOM_MODES 313 | 314 | static 315 | void compute_bitrev_table( 316 | int Fout, 317 | opus_int16 *f, 318 | const size_t fstride, 319 | int in_stride, 320 | opus_int16 * factors, 321 | const kiss_fft_state *st 322 | ) 323 | { 324 | const int p=*factors++; /* the radix */ 325 | const int m=*factors++; /* stage's fft length/p */ 326 | 327 | /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ 328 | if (m==1) 329 | { 330 | int j; 331 | for (j=0;j32000 || (opus_int32)p*(opus_int32)p > n) 368 | p = n; /* no more factors, skip to end */ 369 | } 370 | n /= p; 371 | #ifdef RADIX_TWO_ONLY 372 | if (p!=2 && p != 4) 373 | #else 374 | if (p>5) 375 | #endif 376 | { 377 | return 0; 378 | } 379 | facbuf[2*stages] = p; 380 | if (p==2 && stages > 1) 381 | { 382 | facbuf[2*stages] = 4; 383 | facbuf[2] = 2; 384 | } 385 | stages++; 386 | } while (n > 1); 387 | n = nbak; 388 | /* Reverse the order to get the radix 4 at the end, so we can use the 389 | fast degenerate case. It turns out that reversing the order also 390 | improves the noise behaviour. */ 391 | for (i=0;i= memneeded) 444 | st = (kiss_fft_state*)mem; 445 | *lenmem = memneeded; 446 | } 447 | if (st) { 448 | opus_int16 *bitrev; 449 | kiss_twiddle_cpx *twiddles; 450 | memset(st, 0, sizeof(kiss_fft_state)); 451 | st->nfft=nfft; 452 | #ifdef FIXED_POINT 453 | st->scale_shift = celt_ilog2(st->nfft); 454 | if (st->nfft == 1<scale_shift) 455 | st->scale = Q15ONE; 456 | else 457 | st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); 458 | #else 459 | st->scale = 1.f/nfft; 460 | #endif 461 | if (base != NULL) 462 | { 463 | st->twiddles = base->twiddles; 464 | st->shift = 0; 465 | while (st->shift < 32 && nfft<shift != base->nfft) 466 | st->shift++; 467 | if (st->shift>=32) 468 | goto fail; 469 | } else { 470 | st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); 471 | compute_twiddles(twiddles, nfft); 472 | st->shift = -1; 473 | } 474 | if (!kf_factor(nfft,st->factors)) 475 | { 476 | goto fail; 477 | } 478 | 479 | /* bitrev */ 480 | st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); 481 | if (st->bitrev==NULL) 482 | goto fail; 483 | compute_bitrev_table(0, bitrev, 1,1, st->factors,st); 484 | 485 | /* Initialize architecture specific fft parameters */ 486 | if (opus_fft_alloc_arch(st, arch)) 487 | goto fail; 488 | } 489 | return st; 490 | fail: 491 | opus_fft_free(st, arch); 492 | return NULL; 493 | } 494 | 495 | kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch) 496 | { 497 | return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch); 498 | } 499 | 500 | void opus_fft_free_arch_c(kiss_fft_state *st) { 501 | (void)st; 502 | } 503 | 504 | void opus_fft_free(const kiss_fft_state *cfg, int arch) 505 | { 506 | if (cfg) 507 | { 508 | opus_fft_free_arch((kiss_fft_state *)cfg, arch); 509 | opus_free((opus_int16*)cfg->bitrev); 510 | if (cfg->shift < 0) 511 | opus_free((kiss_twiddle_cpx*)cfg->twiddles); 512 | opus_free((kiss_fft_state*)cfg); 513 | } 514 | } 515 | 516 | #endif /* CUSTOM_MODES */ 517 | 518 | void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) 519 | { 520 | int m2, m; 521 | int p; 522 | int L; 523 | int fstride[MAXFACTORS]; 524 | int i; 525 | int shift; 526 | 527 | /* st->shift can be -1 */ 528 | shift = st->shift>0 ? st->shift : 0; 529 | 530 | fstride[0] = 1; 531 | L=0; 532 | do { 533 | p = st->factors[2*L]; 534 | m = st->factors[2*L+1]; 535 | fstride[L+1] = fstride[L]*p; 536 | L++; 537 | } while(m!=1); 538 | m = st->factors[2*L-1]; 539 | for (i=L-1;i>=0;i--) 540 | { 541 | if (i!=0) 542 | m2 = st->factors[2*i-1]; 543 | else 544 | m2 = 1; 545 | switch (st->factors[2*i]) 546 | { 547 | case 2: 548 | kf_bfly2(fout, m, fstride[i]); 549 | break; 550 | case 4: 551 | kf_bfly4(fout,fstride[i]<scale_shift-1; 574 | #endif 575 | scale = st->scale; 576 | 577 | celt_assert2 (fin != fout, "In-place FFT not supported"); 578 | /* Bit-reverse the input */ 579 | for (i=0;infft;i++) 580 | { 581 | kiss_fft_cpx x = fin[i]; 582 | fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); 583 | fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); 584 | } 585 | opus_fft_impl(st, fout); 586 | } 587 | 588 | 589 | void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 590 | { 591 | int i; 592 | celt_assert2 (fin != fout, "In-place FFT not supported"); 593 | /* Bit-reverse the input */ 594 | for (i=0;infft;i++) 595 | fout[st->bitrev[i]] = fin[i]; 596 | for (i=0;infft;i++) 597 | fout[i].i = -fout[i].i; 598 | opus_fft_impl(st, fout); 599 | for (i=0;infft;i++) 600 | fout[i].i = -fout[i].i; 601 | } 602 | -------------------------------------------------------------------------------- /src/denoise.c: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2018 Gregor Richards 2 | * Copyright (c) 2017 Mozilla */ 3 | /* 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | - Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | - Redistributions in binary form must reproduce the above copyright 12 | notice, this list of conditions and the following disclaimer in the 13 | documentation and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | */ 27 | 28 | #ifdef HAVE_CONFIG_H 29 | #include "config.h" 30 | #endif 31 | 32 | #ifndef M_PI 33 | #define M_PI (3.14159265358979323846) 34 | #endif 35 | 36 | #include 37 | #include 38 | #include 39 | #include "kiss_fft.h" 40 | #include "common.h" 41 | #include 42 | #include "rnnoise.h" 43 | #include "pitch.h" 44 | #include "arch.h" 45 | #include "rnn.h" 46 | #include "rnn_data.h" 47 | 48 | #define FRAME_SIZE_SHIFT 2 49 | /* 480 means 10ms for 4.8k sample rate */ 50 | #define FRAME_SIZE (120<rnn.model = model; 269 | else 270 | st->rnn.model = &rnnoise_model_orig; 271 | st->rnn.vad_gru_state = calloc(sizeof(float), st->rnn.model->vad_gru_size); 272 | st->rnn.noise_gru_state = calloc(sizeof(float), st->rnn.model->noise_gru_size); 273 | st->rnn.denoise_gru_state = calloc(sizeof(float), st->rnn.model->denoise_gru_size); 274 | return 0; 275 | } 276 | 277 | DenoiseState *rnnoise_create(RNNModel *model) { 278 | DenoiseState *st; 279 | st = malloc(rnnoise_get_size()); 280 | rnnoise_init(st, model); 281 | return st; 282 | } 283 | 284 | void rnnoise_destroy(DenoiseState *st) { 285 | if (common.init) opus_fft_free(common.kfft, 0); 286 | free(st->rnn.vad_gru_state); 287 | free(st->rnn.noise_gru_state); 288 | free(st->rnn.denoise_gru_state); 289 | free(st); 290 | } 291 | 292 | #if TRAINING 293 | int lowpass = FREQ_SIZE; 294 | int band_lp = NB_BANDS; 295 | #endif 296 | 297 | static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) { 298 | int i; 299 | float x[WINDOW_SIZE]; 300 | RNN_COPY(x, st->analysis_mem, FRAME_SIZE); 301 | for (i=0;ianalysis_mem, in, FRAME_SIZE); 303 | apply_window(x); 304 | forward_transform(X, x); 305 | #if TRAINING 306 | for (i=lowpass;i>1]; 321 | int pitch_index; 322 | float gain; 323 | float *(pre[1]); 324 | float tmp[NB_BANDS]; 325 | float follow, logMax; 326 | frame_analysis(st, X, Ex, in); 327 | RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE); 328 | RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE); 329 | pre[0] = &st->pitch_buf[0]; 330 | pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1); 331 | pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE, 332 | PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index); 333 | pitch_index = PITCH_MAX_PERIOD-pitch_index; 334 | 335 | gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD, 336 | PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain); 337 | st->last_period = pitch_index; 338 | st->last_gain = gain; 339 | for (i=0;ipitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i]; 341 | apply_window(p); 342 | forward_transform(P, p); 343 | compute_band_energy(Ep, P); 344 | compute_band_corr(Exp, X, P); 345 | for (i=0;icepstral_mem[st->memid]; 369 | ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1]; 370 | ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2]; 371 | for (i=0;imemid++; 373 | for (i=0;imemid == CEPS_MEM) st->memid = 0; 380 | for (i=0;icepstral_mem[i][k] - st->cepstral_mem[j][k]; 392 | dist += tmp*tmp; 393 | } 394 | if (j!=i) 395 | mindist = MIN32(mindist, dist); 396 | } 397 | spec_variability += mindist; 398 | } 399 | features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1; 400 | return TRAINING && E < 0.1; 401 | } 402 | 403 | static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) { 404 | float x[WINDOW_SIZE]; 405 | int i; 406 | inverse_transform(x, y); 407 | apply_window(x); 408 | for (i=0;isynthesis_mem[i]; 409 | RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE); 410 | } 411 | 412 | static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) { 413 | int i; 414 | for (i=0;ig[i]) r[i] = 1; 432 | else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i])); 433 | r[i] = MIN16(1, MAX16(0, r[i])); 434 | #else 435 | if (Exp[i]>g[i]) r[i] = 1; 436 | else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i]))); 437 | r[i] = sqrt(MIN16(1, MAX16(0, r[i]))); 438 | #endif 439 | r[i] *= sqrt(Ex[i]/(1e-8+Ep[i])); 440 | } 441 | interp_band_gain(rf, r); 442 | for (i=0;imem_hp_x, in, b_hp, a_hp, FRAME_SIZE); 475 | silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x); 476 | 477 | if (!silence) { 478 | compute_rnn(&st->rnn, g, &vad_prob, features); 479 | pitch_filter(X, P, Ex, Ep, Exp, g); 480 | for (i=0;ilastg[i]); 483 | st->lastg[i] = g[i]; 484 | } 485 | interp_band_gain(gf, g); 486 | #if 1 487 | for (i=0;i feature */ 537 | st = rnnoise_create(NULL); 538 | noise_state = rnnoise_create(NULL); 539 | noisy = rnnoise_create(NULL); 540 | if (argc!=4) { 541 | fprintf(stderr, "usage: %s \n", argv[0]); 542 | return 1; 543 | } 544 | f1 = fopen(argv[1], "r"); 545 | f2 = fopen(argv[2], "r"); 546 | maxCount = atoi(argv[3]); 547 | 548 | /* pre skip in noise file */ 549 | for(i=0;i<150;i++) { 550 | short tmp[FRAME_SIZE]; 551 | fread(tmp, sizeof(short), FRAME_SIZE, f2); 552 | } 553 | while (1) { 554 | kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE]; 555 | float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS]; 556 | float Exp[NB_BANDS]; 557 | float Ln[NB_BANDS]; 558 | float features[NB_FEATURES]; 559 | float g[NB_BANDS]; 560 | short tmp[FRAME_SIZE]; 561 | float vad=0; 562 | float E=0; 563 | if (count==maxCount) break; 564 | if ((count%1000)==0) fprintf(stderr, "%d\r", count); 565 | /* random gain/filter/freq_range for speech and noise */ 566 | if (++gain_change_count > 2821) { 567 | speech_gain = pow(10., (-40+(rand()%60))/20.); 568 | noise_gain = pow(10., (-30+(rand()%50))/20.); 569 | if (rand()%10==0) noise_gain = 0; 570 | noise_gain *= speech_gain; 571 | if (rand()%10==0) speech_gain = 0; 572 | gain_change_count = 0; 573 | rand_resp(a_noise, b_noise); 574 | rand_resp(a_sig, b_sig); 575 | lowpass = FREQ_SIZE * 3000./24000. * pow(50., rand()/(double)RAND_MAX); 576 | for (i=0;i lowpass) { 578 | band_lp = i; 579 | break; 580 | } 581 | } 582 | } 583 | if (speech_gain != 0) { 584 | fread(tmp, sizeof(short), FRAME_SIZE, f1); 585 | if (feof(f1)) { 586 | rewind(f1); 587 | fread(tmp, sizeof(short), FRAME_SIZE, f1); 588 | } 589 | for (i=0;i 1e9f) { 615 | vad_cnt=0; 616 | } else if (E > 1e8f) { 617 | vad_cnt -= 5; 618 | } else if (E > 1e7f) { 619 | vad_cnt++; 620 | } else { 621 | vad_cnt+=2; 622 | } 623 | if (vad_cnt < 0) vad_cnt = 0; 624 | if (vad_cnt > 15) vad_cnt = 15; 625 | 626 | if (vad_cnt >= 10) vad = 0; 627 | else if (vad_cnt > 0) vad = 0.5f; 628 | else vad = 1.f; 629 | 630 | frame_analysis(st, Y, Ey, x); 631 | frame_analysis(noise_state, N, En, n); 632 | for (i=0;ilast_gain, noisy->last_period); 636 | for (i=0;i 1) g[i] = 1; 639 | if (silence || i > band_lp) g[i] = -1; 640 | if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1; 641 | if (vad==0 && noise_gain==0) g[i] = -1; 642 | } 643 | count++; 644 | #if 1 645 | fwrite(features, sizeof(float), NB_FEATURES, stdout); 646 | fwrite(g, sizeof(float), NB_BANDS, stdout); 647 | fwrite(Ln, sizeof(float), NB_BANDS, stdout); 648 | fwrite(&vad, sizeof(float), 1, stdout); 649 | #endif 650 | } 651 | fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1); 652 | fclose(f1); 653 | fclose(f2); 654 | return 0; 655 | } 656 | 657 | #endif 658 | --------------------------------------------------------------------------------