├── .hgtags ├── .hgignore ├── test ├── timings.cpp └── TestFFT.cpp ├── .build.yml ├── COPYING ├── Makefile ├── README.md ├── bqfft └── FFT.h └── src └── FFT.cpp /.hgtags: -------------------------------------------------------------------------------- 1 | a766fe47501b185bc46cffc210735304e28f2189 v1.0.0 2 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | *~ 3 | *.o 4 | *.so 5 | *.dylib 6 | *.dll 7 | *.a 8 | *.lib 9 | *.bak 10 | test-fft 11 | timings 12 | moc_* 13 | test/Makefile 14 | 15 | -------------------------------------------------------------------------------- /test/timings.cpp: -------------------------------------------------------------------------------- 1 | 2 | #define FFT_MEASUREMENT 1 3 | 4 | #include "../src/FFT.cpp" 5 | 6 | #include 7 | 8 | int main(int, char **) 9 | { 10 | breakfastquay::FFT::tune(); 11 | } 12 | -------------------------------------------------------------------------------- /.build.yml: -------------------------------------------------------------------------------- 1 | image: ubuntu/20.04 2 | packages: 3 | - libboost-test-dev 4 | - valgrind 5 | - libfftw3-dev 6 | - libsleef-dev 7 | - curl 8 | sources: 9 | - hg+https://hg.sr.ht/~breakfastquay/bqfft 10 | tasks: 11 | - setup: | 12 | hg clone https://hg.sr.ht/~breakfastquay/bqvec 13 | curl -OL 'https://downloads.sourceforge.net/project/kissfft/kissfft/v1_3_0/kiss_fft130.tar.gz' && tar xvzf kiss_fft130.tar.gz 14 | - build: | 15 | cd bqfft 16 | build/run-platform-tests.sh linux 17 | triggers: 18 | - action: email 19 | condition: always 20 | to: chris.cannam@breakfastquay.com 21 | 22 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Copyright 2007-2021 Particular Programs Ltd. 3 | 4 | Permission is hereby granted, free of charge, to any person 5 | obtaining a copy of this software and associated documentation 6 | files (the "Software"), to deal in the Software without 7 | restriction, including without limitation the rights to use, copy, 8 | modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 19 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 20 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | Except as contained in this notice, the names of Chris Cannam and 24 | Particular Programs Ltd shall not be used in advertising or 25 | otherwise to promote the sale, use or other dealings in this 26 | Software without prior written authorization. 27 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | # Add to FFT_DEFINES the relevant options for your desired 3 | # implementation and/or third-party library support. 4 | # 5 | # Available library options are 6 | # 7 | # -DHAVE_IPP Intel's Integrated Performance Primitives are available 8 | # -DHAVE_VDSP Apple's Accelerate framework is available 9 | # -DHAVE_FFTW3 The FFTW library is available 10 | # -DHAVE_SLEEF The SLEEF library is available 11 | # -DHAVE_KISSFFT The KissFFT library is available 12 | # -DUSE_BUILTIN_FFT Compile the built-in FFT code (which is not bad) 13 | # 14 | # You may define more than one of these. If you do so, the decision 15 | # about which implementation to use when an FFT object is constructed 16 | # will depend on the FFT length (some libraries only support certain 17 | # lengths) and on some hardcoded expectations about performance. If no 18 | # flags are supplied, the code will refuse to compile. 19 | # 20 | FFT_DEFINES := -DUSE_BUILTIN_FFT 21 | 22 | 23 | # Add to VECTOR_DEFINES and ALLOCATOR_DEFINES any options desired for 24 | # the bqvec library (that are not already defined in FFT_DEFINES). 25 | # See the bqvec build documentation for more details. 26 | # 27 | VECTOR_DEFINES := 28 | ALLOCATOR_DEFINES := 29 | 30 | 31 | # Add any related includes and libraries here 32 | # 33 | THIRD_PARTY_INCLUDES := 34 | THIRD_PARTY_LIBS := 35 | 36 | 37 | # If you are including a set of bq libraries into a project, you can 38 | # override variables for all of them (including all of the above) in 39 | # the following file, which all bq* Makefiles will include if found 40 | 41 | -include ../Makefile.inc-bq 42 | 43 | 44 | # This project-local Makefile describes the source files and contains 45 | # no routinely user-modifiable parts 46 | 47 | include build/Makefile.inc 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | bqfft 3 | ===== 4 | 5 | A small library wrapping various FFT implementations for some common 6 | audio processing use cases. Contains a built-in implementation and 7 | wrappers for FFTW3, SLEEF, KissFFT, Intel IPP, and Apple vDSP. 8 | Suitable for Windows, Mac, Linux, and mobile platforms. 9 | 10 | Note this is not a general FFT interface, as it handles only real 11 | signals on the time-domain side. 12 | 13 | Transforms of any length are supported, but if you request a length 14 | that bqfft does not know how to calculate using any of the libraries 15 | that have been compiled in, a simple slow DFT will be used instead. A 16 | warning will be printed to stderr if this happens. 17 | 18 | Of the available libraries, vDSP, IPP, SLEEF, and the built-in 19 | implementation support power-of-two FFT lengths only, KissFFT supports 20 | any multiple of two, and FFTW supports any length. You can compile in 21 | more than one library, so for example if you compile in Accelerate and 22 | KissFFT, the former will be used for powers of two and the latter for 23 | other even lengths. 24 | 25 | Here are some other pros and cons of the supported libraries: 26 | 27 | * Intel IPP - The fastest on actual Intel hardware. Of uncertain 28 | benefit with other manufacturers. Not available beyond x86/amd64, 29 | not open source. 30 | 31 | * Apple vDSP - Generally the fastest on all Apple hardware, and 32 | provided with the OS. There is seldom any good reason not to use 33 | this on Apple platforms. 34 | 35 | * SLEEF - Typically very fast, unencumbered, portable, open source 36 | vector library; complex and (at the time of writing) rather new. 37 | 38 | * FFTW3 - Fast, open source, and portable, but its bulk and GPL 39 | licence may be an issue. 40 | 41 | * KissFFT - As used here it is single-precision throughout, so it may 42 | be a good choice for platforms on which double-precision arithmetic 43 | is slow. Disadvantage is that it does not preserve the full float 44 | range of precision (i.e. forward-inverse transform pairs on float 45 | data do not produce identical results to the input). Not especially 46 | fast on desktop or modern mobile hardware. 47 | 48 | * Built-in implementation - Double precision, so more precise than 49 | KissFFT, and faster on typical 64-bit desktop and modern mobile 50 | hardware. Slower than IPP, vDSP, SLEEF, and FFTW3. 51 | 52 | Requires the bqvec library. 53 | 54 | This code originated as part of the Rubber Band Library written by the 55 | same authors (see https://hg.sr.ht/~breakfastquay/rubberband/). 56 | It has been pulled out into a separate library and relicensed under a 57 | more permissive licence. 58 | 59 | C++ standard required: C++98 (does not use C++11 or newer features) 60 | 61 | * To compile on Linux: Edit Makefile to select implementation, then make. 62 | Do read the notes in the Makefile 63 | 64 | * To compile on macOS: make -f build/Makefile.osx 65 | 66 | * To build and run tests: as above, but add the "test" target - 67 | requires Boost test headers installed 68 | 69 | * Depends on: [bqvec](https://hg.sr.ht/~breakfastquay/bqvec) 70 | 71 | * See also: [bqresample](https://hg.sr.ht/~breakfastquay/bqresample) [bqaudioio](https://hg.sr.ht/~breakfastquay/bqaudioio) [bqthingfactory](https://hg.sr.ht/~breakfastquay/bqthingfactory) [bqaudiostream](https://hg.sr.ht/~breakfastquay/bqaudiostream) 72 | 73 | [![Build status](https://builds.sr.ht/~breakfastquay/bqfft.svg)](https://builds.sr.ht/~breakfastquay/bqfft?) 74 | 75 | Copyright 2007-2022 Particular Programs Ltd. See the file COPYING for 76 | (BSD/MIT-style) licence terms. 77 | -------------------------------------------------------------------------------- /bqfft/FFT.h: -------------------------------------------------------------------------------- 1 | /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 2 | 3 | /* 4 | bqfft 5 | 6 | A small library wrapping various FFT implementations for some 7 | common audio processing use cases. 8 | 9 | Copyright 2007-2021 Particular Programs Ltd. 10 | 11 | Permission is hereby granted, free of charge, to any person 12 | obtaining a copy of this software and associated documentation 13 | files (the "Software"), to deal in the Software without 14 | restriction, including without limitation the rights to use, copy, 15 | modify, merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be 20 | included in all copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 26 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 27 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | Except as contained in this notice, the names of Chris Cannam and 31 | Particular Programs Ltd shall not be used in advertising or 32 | otherwise to promote the sale, use or other dealings in this 33 | Software without prior written authorization. 34 | */ 35 | 36 | #ifndef BQFFT_FFT_H 37 | #define BQFFT_FFT_H 38 | 39 | #include 40 | 41 | #include 42 | #include 43 | 44 | namespace breakfastquay { 45 | 46 | class FFTImpl; 47 | 48 | /** 49 | * Provide basic FFT computations using one of a set of candidate FFT 50 | * implementations (depending on compile flags). 51 | * 52 | * Implements real->complex FFTs of power-of-two sizes only. Note 53 | * that only the first half of the output signal is returned (the 54 | * complex conjugates half is omitted), so the "complex" arrays need 55 | * room for size/2+1 elements. 56 | * 57 | * The "interleaved" functions use the format sometimes called CCS -- 58 | * size/2+1 real+imaginary pairs. So, the array elements at indices 1 59 | * and size+1 will always be zero (since the signal is real). 60 | * 61 | * All pointer arguments must point to valid data. A NullArgument 62 | * exception is thrown if any argument is NULL. 63 | * 64 | * Neither forward nor inverse transform is scaled. 65 | * 66 | * This class is reentrant but not thread safe: use a separate 67 | * instance per thread, or use a mutex. 68 | */ 69 | class FFT 70 | { 71 | public: 72 | enum Exception { 73 | NullArgument, InvalidSize, InvalidImplementation, InternalError 74 | }; 75 | 76 | FFT(int size, int debugLevel = 0); // may throw InvalidSize 77 | ~FFT(); 78 | 79 | int getSize() const; 80 | 81 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut); 82 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut); 83 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut); 84 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut); 85 | 86 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut); 87 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut); 88 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut); 89 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut); 90 | 91 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut); 92 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut); 93 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut); 94 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut); 95 | 96 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut); 97 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut); 98 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut); 99 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut); 100 | 101 | // Calling one or both of these is optional -- if neither is 102 | // called, the first call to a forward or inverse method will call 103 | // init(). You only need call these if you don't want to risk 104 | // expensive allocations etc happening in forward or inverse. 105 | void initFloat(); 106 | void initDouble(); 107 | 108 | enum Precision { 109 | SinglePrecision = 0x1, 110 | DoublePrecision = 0x2 111 | }; 112 | typedef int Precisions; 113 | 114 | /** 115 | * Return the OR of all precisions supported by this 116 | * implementation. All of the functions (float and double) are 117 | * available regardless of the supported implementations, but they 118 | * will be calculated at the proper precision only if it is 119 | * available. (So float functions will be calculated using doubles 120 | * and then truncated if single-precision is unavailable, and 121 | * double functions will use single-precision arithmetic if double 122 | * is unavailable.) 123 | */ 124 | Precisions getSupportedPrecisions() const; 125 | 126 | static std::set getImplementations(); 127 | static std::string getDefaultImplementation(); 128 | static void setDefaultImplementation(std::string); 129 | 130 | #ifdef FFT_MEASUREMENT 131 | static 132 | #ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT 133 | std::string 134 | #else 135 | void 136 | #endif 137 | tune(); 138 | #endif 139 | 140 | protected: 141 | FFTImpl *d; 142 | 143 | private: 144 | FFT(const FFT &); // not provided 145 | FFT &operator=(const FFT &); // not provided 146 | }; 147 | 148 | } 149 | 150 | #endif 151 | 152 | -------------------------------------------------------------------------------- /test/TestFFT.cpp: -------------------------------------------------------------------------------- 1 | /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 2 | 3 | /* 4 | bqfft 5 | 6 | A small library wrapping various FFT implementations for some 7 | common audio processing use cases. 8 | 9 | Copyright 2007-2021 Particular Programs Ltd. 10 | 11 | Permission is hereby granted, free of charge, to any person 12 | obtaining a copy of this software and associated documentation 13 | files (the "Software"), to deal in the Software without 14 | restriction, including without limitation the rights to use, copy, 15 | modify, merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be 20 | included in all copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 26 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 27 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | Except as contained in this notice, the names of Chris Cannam and 31 | Particular Programs Ltd shall not be used in advertising or 32 | otherwise to promote the sale, use or other dealings in this 33 | Software without prior written authorization. 34 | */ 35 | 36 | #include "bqfft/FFT.h" 37 | 38 | #define BOOST_TEST_DYN_LINK 39 | #define BOOST_TEST_MAIN 40 | 41 | #include 42 | 43 | #include 44 | 45 | #include 46 | #include 47 | 48 | using namespace breakfastquay; 49 | 50 | BOOST_AUTO_TEST_SUITE(TestFFT) 51 | 52 | #define DEFINE_EPS(fft) \ 53 | float epsf = 1e-6f; \ 54 | double eps; \ 55 | if (fft.getSupportedPrecisions() & FFT::DoublePrecision) { \ 56 | eps = 1e-14; \ 57 | } else { \ 58 | eps = epsf; \ 59 | } \ 60 | (void)epsf; (void)eps; 61 | 62 | #define USING_FFT(n) \ 63 | FFT fft(n); \ 64 | DEFINE_EPS(fft); 65 | 66 | #define COMPARE(a, b) BOOST_CHECK_SMALL(a-b, eps) 67 | #define COMPARE_F(a, b) BOOST_CHECK_SMALL(a-b, epsf) 68 | 69 | #define COMPARE_ZERO(a) BOOST_CHECK_SMALL(a, eps) 70 | #define COMPARE_ZERO_F(a) BOOST_CHECK_SMALL(a, epsf) 71 | 72 | #define COMPARE_ALL(a, x) \ 73 | for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ 74 | BOOST_CHECK_SMALL(a[cmp_i] - x, eps); \ 75 | } 76 | #define COMPARE_ALL_F(a, x) \ 77 | for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ 78 | BOOST_CHECK_SMALL(a[cmp_i] - x, epsf); \ 79 | } 80 | #define COMPARE_ARR(a, b, n) \ 81 | for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ 82 | BOOST_CHECK_SMALL(a[cmp_i] - b[cmp_i], eps); \ 83 | } 84 | #define COMPARE_SCALED(a, b, s) \ 85 | for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ 86 | BOOST_CHECK_SMALL(a[cmp_i]/s - b[cmp_i], eps); \ 87 | } 88 | #define COMPARE_SCALED_N(a, b, n, s) \ 89 | for (int cmp_i = 0; cmp_i < n; ++cmp_i) { \ 90 | BOOST_CHECK_SMALL(a[cmp_i]/s - b[cmp_i], eps); \ 91 | } 92 | #define COMPARE_SCALED_F(a, b, s) \ 93 | for (int cmp_i = 0; cmp_i < (int)(sizeof(a)/sizeof(a[0])); ++cmp_i) { \ 94 | BOOST_CHECK_SMALL(a[cmp_i]/s - b[cmp_i], epsf); \ 95 | } 96 | 97 | #define ONE_IMPL_AUTO_TEST_CASE(name, impl) \ 98 | BOOST_AUTO_TEST_CASE(name##_##impl) \ 99 | { \ 100 | std::set impls = FFT::getImplementations(); \ 101 | if (impls.find(#impl) == impls.end()) return; \ 102 | FFT::setDefaultImplementation(#impl); \ 103 | performTest_##name(); \ 104 | FFT::setDefaultImplementation(""); \ 105 | } 106 | 107 | // If you add an implementation in FFT.cpp, add it also to 108 | // ALL_IMPL_AUTO_TEST_CASE and all_implementations[] below 109 | 110 | #define ALL_IMPL_AUTO_TEST_CASE(name) \ 111 | void performTest_##name (); \ 112 | ONE_IMPL_AUTO_TEST_CASE(name, ipp); \ 113 | ONE_IMPL_AUTO_TEST_CASE(name, vdsp); \ 114 | ONE_IMPL_AUTO_TEST_CASE(name, fftw); \ 115 | ONE_IMPL_AUTO_TEST_CASE(name, sleef); \ 116 | ONE_IMPL_AUTO_TEST_CASE(name, kissfft); \ 117 | ONE_IMPL_AUTO_TEST_CASE(name, builtin); \ 118 | ONE_IMPL_AUTO_TEST_CASE(name, dft); \ 119 | void performTest_##name () 120 | 121 | std::string all_implementations[] = { 122 | "ipp", "vdsp", "fftw", "sleef", "kissfft", "builtin", "dft" 123 | }; 124 | 125 | BOOST_AUTO_TEST_CASE(showImplementations) 126 | { 127 | std::set impls = FFT::getImplementations(); 128 | std::cerr << "\nThe following implementations are compiled in and will be tested:" << std::endl; 129 | for (int i = 0; i < int(sizeof(all_implementations)/sizeof(all_implementations[0])); ++i) { 130 | if (impls.find(all_implementations[i]) != impls.end()) { 131 | std::cerr << " +" << all_implementations[i]; 132 | } 133 | } 134 | std::cerr << std::endl << std::endl; 135 | std::cerr << "The following implementations are NOT compiled in and will not be tested:" << std::endl; 136 | for (int i = 0; i < int(sizeof(all_implementations)/sizeof(all_implementations[0])); ++i) { 137 | if (impls.find(all_implementations[i]) == impls.end()) { 138 | std::cerr << " -" << all_implementations[i]; 139 | } 140 | } 141 | std::cerr << std::endl << std::endl; 142 | } 143 | 144 | 145 | /* 146 | * 1a. Simple synthetic signals, transforms to separate real/imag arrays, 147 | * double-precision 148 | */ 149 | 150 | ALL_IMPL_AUTO_TEST_CASE(dc) 151 | { 152 | // DC-only signal. The DC bin is purely real 153 | double in[] = { 1, 1, 1, 1 }; 154 | double re[3], im[3]; 155 | USING_FFT(4); 156 | fft.forward(in, re, im); 157 | COMPARE(re[0], 4.0); 158 | COMPARE_ZERO(re[1]); 159 | COMPARE_ZERO(re[2]); 160 | COMPARE_ALL(im, 0.0); 161 | double back[4]; 162 | fft.inverse(re, im, back); 163 | COMPARE_SCALED(back, in, 4); 164 | } 165 | 166 | ALL_IMPL_AUTO_TEST_CASE(sine) 167 | { 168 | // Sine. Output is purely imaginary 169 | double in[] = { 0, 1, 0, -1 }; 170 | double re[3], im[3]; 171 | USING_FFT(4); 172 | fft.forward(in, re, im); 173 | COMPARE_ALL(re, 0.0); 174 | COMPARE_ZERO(im[0]); 175 | COMPARE(im[1], -2.0); 176 | COMPARE_ZERO(im[2]); 177 | double back[4]; 178 | fft.inverse(re, im, back); 179 | COMPARE_SCALED(back, in, 4); 180 | } 181 | 182 | ALL_IMPL_AUTO_TEST_CASE(sine_8) 183 | { 184 | // Longer sine. With only 4 elements, the real transform only 185 | // needs to get the DC and Nyquist bins right for its two complex 186 | // sub-transforms. We need a longer test to check the real 187 | // transform is working properly. 188 | double cospi4 = 0.5 * sqrt(2.0); 189 | double in[] = { 0, cospi4, 1.0, cospi4, 0.0, -cospi4, -1.0, -cospi4 }; 190 | double re[5], im[5]; 191 | USING_FFT(8); 192 | fft.forward(in, re, im); 193 | COMPARE_ALL(re, 0.0); 194 | COMPARE_ZERO(im[0]); 195 | COMPARE(im[1], -4.0); 196 | COMPARE_ZERO(im[2]); 197 | COMPARE_ZERO(im[3]); 198 | COMPARE_ZERO(im[4]); 199 | double back[8]; 200 | fft.inverse(re, im, back); 201 | COMPARE_SCALED(back, in, 8); 202 | } 203 | 204 | ALL_IMPL_AUTO_TEST_CASE(cosine) 205 | { 206 | // Cosine. Output is purely real 207 | double in[] = { 1, 0, -1, 0 }; 208 | double re[3], im[3]; 209 | USING_FFT(4); 210 | fft.forward(in, re, im); 211 | COMPARE_ZERO(re[0]); 212 | COMPARE(re[1], 2.0); 213 | COMPARE_ZERO(re[2]); 214 | COMPARE_ALL(im, 0.0); 215 | double back[4]; 216 | fft.inverse(re, im, back); 217 | COMPARE_SCALED(back, in, 4); 218 | } 219 | 220 | ALL_IMPL_AUTO_TEST_CASE(cosine_8) 221 | { 222 | // Longer cosine. 223 | double cospi4 = 0.5 * sqrt(2.0); 224 | double in[] = { 1.0, cospi4, 0.0, -cospi4, -1.0, -cospi4, 0.0, cospi4 }; 225 | double re[5], im[5]; 226 | USING_FFT(8); 227 | fft.forward(in, re, im); 228 | COMPARE_ALL(im, 0.0); 229 | COMPARE_ZERO(re[0]); 230 | COMPARE(re[1], 4.0); 231 | COMPARE_ZERO(re[2]); 232 | COMPARE_ZERO(re[3]); 233 | COMPARE_ZERO(re[4]); 234 | double back[8]; 235 | fft.inverse(re, im, back); 236 | COMPARE_SCALED(back, in, 8); 237 | } 238 | 239 | ALL_IMPL_AUTO_TEST_CASE(sineCosine) 240 | { 241 | // Sine and cosine mixed 242 | double in[] = { 0.5, 1, -0.5, -1 }; 243 | double re[3], im[3]; 244 | USING_FFT(4); 245 | fft.forward(in, re, im); 246 | COMPARE_ZERO(re[0]); 247 | COMPARE(re[1], 1.0); 248 | COMPARE_ZERO(re[2]); 249 | COMPARE_ZERO(im[0]); 250 | COMPARE(im[1], -2.0); 251 | COMPARE_ZERO(im[2]); 252 | double back[4]; 253 | fft.inverse(re, im, back); 254 | COMPARE_SCALED(back, in, 4); 255 | } 256 | 257 | ALL_IMPL_AUTO_TEST_CASE(nyquist) 258 | { 259 | double in[] = { 1, -1, 1, -1 }; 260 | double re[3], im[3]; 261 | USING_FFT(4); 262 | fft.forward(in, re, im); 263 | COMPARE_ZERO(re[0]); 264 | COMPARE_ZERO(re[1]); 265 | COMPARE(re[2], 4.0); 266 | COMPARE_ALL(im, 0.0); 267 | double back[4]; 268 | fft.inverse(re, im, back); 269 | COMPARE_SCALED(back, in, 4); 270 | } 271 | 272 | ALL_IMPL_AUTO_TEST_CASE(dirac) 273 | { 274 | double in[] = { 1, 0, 0, 0 }; 275 | double re[3], im[3]; 276 | USING_FFT(4); 277 | fft.forward(in, re, im); 278 | COMPARE(re[0], 1.0); 279 | COMPARE(re[1], 1.0); 280 | COMPARE(re[2], 1.0); 281 | COMPARE_ALL(im, 0.0); 282 | double back[4]; 283 | fft.inverse(re, im, back); 284 | COMPARE_SCALED(back, in, 4); 285 | } 286 | 287 | 288 | /* 289 | * 1b. Simple synthetic signals, transforms to separate real/imag arrays, 290 | * single-precision (i.e. single-precision version of 1a) 291 | */ 292 | 293 | ALL_IMPL_AUTO_TEST_CASE(dcF) 294 | { 295 | // DC-only signal. The DC bin is purely real 296 | float in[] = { 1, 1, 1, 1 }; 297 | float re[3], im[3]; 298 | USING_FFT(4); 299 | fft.forward(in, re, im); 300 | COMPARE_F(re[0], 4.0f); 301 | COMPARE_ZERO_F(re[1]); 302 | COMPARE_ZERO_F(re[2]); 303 | COMPARE_ALL_F(im, 0.0f); 304 | float back[4]; 305 | fft.inverse(re, im, back); 306 | COMPARE_SCALED_F(back, in, 4); 307 | } 308 | 309 | ALL_IMPL_AUTO_TEST_CASE(sineF) 310 | { 311 | // Sine. Output is purely imaginary 312 | float in[] = { 0, 1, 0, -1 }; 313 | float re[3], im[3]; 314 | USING_FFT(4); 315 | fft.forward(in, re, im); 316 | COMPARE_ALL_F(re, 0.0f); 317 | COMPARE_ZERO_F(im[0]); 318 | COMPARE_F(im[1], -2.0f); 319 | COMPARE_ZERO_F(im[2]); 320 | float back[4]; 321 | fft.inverse(re, im, back); 322 | COMPARE_SCALED_F(back, in, 4); 323 | } 324 | 325 | ALL_IMPL_AUTO_TEST_CASE(cosineF) 326 | { 327 | // Cosine. Output is purely real 328 | float in[] = { 1, 0, -1, 0 }; 329 | float re[3], im[3]; 330 | USING_FFT(4); 331 | fft.forward(in, re, im); 332 | COMPARE_ZERO_F(re[0]); 333 | COMPARE_F(re[1], 2.0f); 334 | COMPARE_ZERO_F(re[2]); 335 | COMPARE_ALL_F(im, 0.0f); 336 | float back[4]; 337 | fft.inverse(re, im, back); 338 | COMPARE_SCALED_F(back, in, 4); 339 | } 340 | 341 | ALL_IMPL_AUTO_TEST_CASE(sineCosineF) 342 | { 343 | // Sine and cosine mixed 344 | float in[] = { 0.5, 1, -0.5, -1 }; 345 | float re[3], im[3]; 346 | USING_FFT(4); 347 | fft.forward(in, re, im); 348 | COMPARE_ZERO_F(re[0]); 349 | COMPARE_F(re[1], 1.0f); 350 | COMPARE_ZERO_F(re[2]); 351 | COMPARE_ZERO_F(im[0]); 352 | COMPARE_F(im[1], -2.0f); 353 | COMPARE_ZERO_F(im[2]); 354 | float back[4]; 355 | fft.inverse(re, im, back); 356 | COMPARE_SCALED_F(back, in, 4); 357 | } 358 | 359 | ALL_IMPL_AUTO_TEST_CASE(nyquistF) 360 | { 361 | float in[] = { 1, -1, 1, -1 }; 362 | float re[3], im[3]; 363 | USING_FFT(4); 364 | fft.forward(in, re, im); 365 | COMPARE_ZERO_F(re[0]); 366 | COMPARE_ZERO_F(re[1]); 367 | COMPARE_F(re[2], 4.0f); 368 | COMPARE_ALL_F(im, 0.0f); 369 | float back[4]; 370 | fft.inverse(re, im, back); 371 | COMPARE_SCALED_F(back, in, 4); 372 | } 373 | 374 | ALL_IMPL_AUTO_TEST_CASE(diracF) 375 | { 376 | float in[] = { 1, 0, 0, 0 }; 377 | float re[3], im[3]; 378 | USING_FFT(4); 379 | fft.forward(in, re, im); 380 | COMPARE_F(re[0], 1.0f); 381 | COMPARE_F(re[1], 1.0f); 382 | COMPARE_F(re[2], 1.0f); 383 | COMPARE_ALL_F(im, 0.0f); 384 | float back[4]; 385 | fft.inverse(re, im, back); 386 | COMPARE_SCALED_F(back, in, 4); 387 | } 388 | 389 | 390 | /* 391 | * 2a. Subset of synthetic signals, testing different output formats 392 | * (interleaved complex, polar, magnitude-only, and our weird 393 | * cepstral thing), double-precision 394 | */ 395 | 396 | ALL_IMPL_AUTO_TEST_CASE(interleaved) 397 | { 398 | // Sine and cosine mixed, test output format 399 | double in[] = { 0.5, 1, -0.5, -1 }; 400 | double out[6]; 401 | USING_FFT(4); 402 | fft.forwardInterleaved(in, out); 403 | COMPARE_ZERO(out[0]); 404 | COMPARE_ZERO(out[1]); 405 | COMPARE(out[2], 1.0); 406 | COMPARE(out[3], -2.0); 407 | COMPARE_ZERO(out[4]); 408 | COMPARE_ZERO(out[5]); 409 | double back[4]; 410 | fft.inverseInterleaved(out, back); 411 | COMPARE_SCALED(back, in, 4); 412 | } 413 | 414 | ALL_IMPL_AUTO_TEST_CASE(sinePolar) 415 | { 416 | double in[] = { 0, 1, 0, -1 }; 417 | double mag[3], phase[3]; 418 | USING_FFT(4); 419 | fft.forwardPolar(in, mag, phase); 420 | COMPARE_ZERO(mag[0]); 421 | COMPARE(mag[1], 2.0); 422 | COMPARE_ZERO(mag[2]); 423 | // No meaningful tests for phase[i] where mag[i]==0 (phase 424 | // could legitimately be anything) 425 | COMPARE(phase[1], -M_PI/2.0); 426 | double back[4]; 427 | fft.inversePolar(mag, phase, back); 428 | COMPARE_SCALED(back, in, 4); 429 | } 430 | 431 | ALL_IMPL_AUTO_TEST_CASE(cosinePolar) 432 | { 433 | double in[] = { 1, 0, -1, 0 }; 434 | double mag[3], phase[3]; 435 | USING_FFT(4); 436 | fft.forwardPolar(in, mag, phase); 437 | COMPARE_ZERO(mag[0]); 438 | COMPARE(mag[1], 2.0); 439 | COMPARE_ZERO(mag[2]); 440 | // No meaningful tests for phase[i] where mag[i]==0 (phase 441 | // could legitimately be anything) 442 | COMPARE_ZERO(phase[1]); 443 | double back[4]; 444 | fft.inversePolar(mag, phase, back); 445 | COMPARE_SCALED(back, in, 4); 446 | } 447 | 448 | ALL_IMPL_AUTO_TEST_CASE(magnitude) 449 | { 450 | // Sine and cosine mixed 451 | double in[] = { 0.5, 1, -0.5, -1 }; 452 | double out[3]; 453 | USING_FFT(4); 454 | fft.forwardMagnitude(in, out); 455 | COMPARE_ZERO(out[0]); 456 | COMPARE_F(float(out[1]), sqrtf(5.0)); 457 | COMPARE_ZERO(out[2]); 458 | } 459 | 460 | ALL_IMPL_AUTO_TEST_CASE(cepstrum) 461 | { 462 | double in[] = { 1, 0, 0, 0, 1, 0, 0, 0 }; 463 | double mag[5]; 464 | USING_FFT(8); 465 | fft.forwardMagnitude(in, mag); 466 | double cep[8]; 467 | fft.inverseCepstral(mag, cep); 468 | BOOST_CHECK_SMALL(cep[1], 1e-9); 469 | BOOST_CHECK_SMALL(cep[2], 1e-9); 470 | BOOST_CHECK_SMALL(cep[3], 1e-9); 471 | BOOST_CHECK_SMALL(cep[5], 1e-9); 472 | BOOST_CHECK_SMALL(cep[6], 1e-9); 473 | BOOST_CHECK_SMALL(cep[7], 1e-9); 474 | BOOST_CHECK_SMALL(-6.561181 - cep[0]/8, 0.000001); 475 | BOOST_CHECK_SMALL( 7.254329 - cep[4]/8, 0.000001); 476 | } 477 | 478 | 479 | /* 480 | * 2b. Subset of synthetic signals, testing different output formats 481 | * (interleaved complex, polar, magnitude-only, and our weird 482 | * cepstral thing), single-precision (i.e. single-precision 483 | * version of 2a) 484 | */ 485 | 486 | ALL_IMPL_AUTO_TEST_CASE(interleavedF) 487 | { 488 | // Sine and cosine mixed, test output format 489 | float in[] = { 0.5, 1, -0.5, -1 }; 490 | float out[6]; 491 | USING_FFT(4); 492 | fft.forwardInterleaved(in, out); 493 | COMPARE_ZERO_F(out[0]); 494 | COMPARE_ZERO_F(out[1]); 495 | COMPARE_F(out[2], 1.0f); 496 | COMPARE_F(out[3], -2.0f); 497 | COMPARE_ZERO_F(out[4]); 498 | COMPARE_ZERO_F(out[5]); 499 | float back[4]; 500 | fft.inverseInterleaved(out, back); 501 | COMPARE_SCALED_F(back, in, 4); 502 | } 503 | 504 | ALL_IMPL_AUTO_TEST_CASE(cosinePolarF) 505 | { 506 | float in[] = { 1, 0, -1, 0 }; 507 | float mag[3], phase[3]; 508 | USING_FFT(4); 509 | fft.forwardPolar(in, mag, phase); 510 | COMPARE_ZERO_F(mag[0]); 511 | COMPARE_F(mag[1], 2.0f); 512 | COMPARE_ZERO_F(mag[2]); 513 | // No meaningful tests for phase[i] where mag[i]==0 (phase 514 | // could legitimately be anything) 515 | COMPARE_ZERO_F(phase[1]); 516 | float back[4]; 517 | fft.inversePolar(mag, phase, back); 518 | COMPARE_SCALED_F(back, in, 4); 519 | } 520 | 521 | ALL_IMPL_AUTO_TEST_CASE(sinePolarF) 522 | { 523 | float in[] = { 0, 1, 0, -1 }; 524 | float mag[3], phase[3]; 525 | USING_FFT(4); 526 | fft.forwardPolar(in, mag, phase); 527 | COMPARE_ZERO_F(mag[0]); 528 | COMPARE_F(mag[1], 2.0f); 529 | COMPARE_ZERO_F(mag[2]); 530 | // No meaningful tests for phase[i] where mag[i]==0 (phase 531 | // could legitimately be anything) 532 | COMPARE_F(phase[1], -float(M_PI)/2.0f); 533 | float back[4]; 534 | fft.inversePolar(mag, phase, back); 535 | COMPARE_SCALED_F(back, in, 4); 536 | } 537 | 538 | ALL_IMPL_AUTO_TEST_CASE(magnitudeF) 539 | { 540 | // Sine and cosine mixed 541 | float in[] = { 0.5, 1, -0.5, -1 }; 542 | float out[3]; 543 | USING_FFT(4); 544 | fft.forwardMagnitude(in, out); 545 | COMPARE_ZERO_F(out[0]); 546 | COMPARE_F(float(out[1]), sqrtf(5.0f)); 547 | COMPARE_ZERO_F(out[2]); 548 | } 549 | 550 | ALL_IMPL_AUTO_TEST_CASE(cepstrumF) 551 | { 552 | float in[] = { 1, 0, 0, 0, 1, 0, 0, 0 }; 553 | float mag[5]; 554 | USING_FFT(8); 555 | fft.forwardMagnitude(in, mag); 556 | float cep[8]; 557 | fft.inverseCepstral(mag, cep); 558 | COMPARE_ZERO_F(cep[1]); 559 | COMPARE_ZERO_F(cep[2]); 560 | COMPARE_ZERO_F(cep[3]); 561 | COMPARE_ZERO_F(cep[5]); 562 | COMPARE_ZERO_F(cep[6]); 563 | COMPARE_ZERO_F(cep[7]); 564 | BOOST_CHECK_SMALL(-6.561181 - cep[0]/8, 0.000001); 565 | BOOST_CHECK_SMALL( 7.254329 - cep[4]/8, 0.000001); 566 | } 567 | 568 | 569 | /* 570 | * 4. Bounds checking, double-precision and single-precision 571 | */ 572 | 573 | ALL_IMPL_AUTO_TEST_CASE(forwardArrayBounds) 574 | { 575 | double in[] = { 1, 1, -1, -1 }; 576 | 577 | // Initialise output bins to something recognisable, so we can 578 | // tell if they haven't been written 579 | double re[] = { 999, 999, 999, 999, 999 }; 580 | double im[] = { 999, 999, 999, 999, 999 }; 581 | 582 | USING_FFT(4); 583 | fft.forward(in, re+1, im+1); 584 | 585 | // Check we haven't overrun the output arrays 586 | COMPARE(re[0], 999.0); 587 | COMPARE(im[0], 999.0); 588 | COMPARE(re[4], 999.0); 589 | COMPARE(im[4], 999.0); 590 | } 591 | 592 | ALL_IMPL_AUTO_TEST_CASE(inverseArrayBounds) 593 | { 594 | // The inverse transform is only supposed to refer to the first 595 | // N/2+1 bins and synthesise the rest rather than read them - so 596 | // initialise the next one to some value that would mess up the 597 | // results if it were used 598 | double re[] = { 0, 1, 0, 456 }; 599 | double im[] = { 0, -2, 0, 456 }; 600 | 601 | // Initialise output bins to something recognisable, so we can 602 | // tell if they haven't been written 603 | double out[] = { 999, 999, 999, 999, 999, 999 }; 604 | 605 | USING_FFT(4); 606 | fft.inverse(re, im, out+1); 607 | 608 | // Check we haven't overrun the output arrays 609 | COMPARE(out[0], 999.0); 610 | COMPARE(out[5], 999.0); 611 | 612 | // And check the results are as we expect, i.e. that we haven't 613 | // used the bogus final bin 614 | COMPARE(out[1] / 4, 0.5); 615 | COMPARE(out[2] / 4, 1.0); 616 | COMPARE(out[3] / 4, -0.5); 617 | COMPARE(out[4] / 4, -1.0); 618 | } 619 | 620 | ALL_IMPL_AUTO_TEST_CASE(forwardArrayBoundsF) 621 | { 622 | float in[] = { 1, 1, -1, -1 }; 623 | 624 | // Initialise output bins to something recognisable, so we can 625 | // tell if they haven't been written 626 | float re[] = { 999, 999, 999, 999, 999 }; 627 | float im[] = { 999, 999, 999, 999, 999 }; 628 | 629 | USING_FFT(4); 630 | fft.forward(in, re+1, im+1); 631 | 632 | // Check we haven't overrun the output arrays 633 | COMPARE_F(re[0], 999.0f); 634 | COMPARE_F(im[0], 999.0f); 635 | COMPARE_F(re[4], 999.0f); 636 | COMPARE_F(im[4], 999.0f); 637 | } 638 | 639 | ALL_IMPL_AUTO_TEST_CASE(inverseArrayBoundsF) 640 | { 641 | // The inverse transform is only supposed to refer to the first 642 | // N/2+1 bins and synthesise the rest rather than read them - so 643 | // initialise the next one to some value that would mess up the 644 | // results if it were used 645 | float re[] = { 0, 1, 0, 456 }; 646 | float im[] = { 0, -2, 0, 456 }; 647 | 648 | // Initialise output bins to something recognisable, so we can 649 | // tell if they haven't been written 650 | float out[] = { 999, 999, 999, 999, 999, 999 }; 651 | 652 | USING_FFT(4); 653 | fft.inverse(re, im, out+1); 654 | 655 | // Check we haven't overrun the output arrays 656 | COMPARE_F(out[0], 999.0f); 657 | COMPARE_F(out[5], 999.0f); 658 | 659 | // And check the results are as we expect, i.e. that we haven't 660 | // used the bogus final bin 661 | COMPARE_F(out[1] / 4.0f, 0.5f); 662 | COMPARE_F(out[2] / 4.0f, 1.0f); 663 | COMPARE_F(out[3] / 4.0f, -0.5f); 664 | COMPARE_F(out[4] / 4.0f, -1.0f); 665 | } 666 | 667 | 668 | /* 669 | * 5. Less common transform lengths - we should always fall back on 670 | * some implementation that can handle these, even if the requested 671 | * one doesn't. Note that the dirac tests we do first are "easier" 672 | * in that they don't vary with length 673 | */ 674 | 675 | ALL_IMPL_AUTO_TEST_CASE(dirac_1) 676 | { 677 | double in[] = { 1 }; 678 | double re[1], im[1]; 679 | USING_FFT(1); 680 | fft.forward(in, re, im); 681 | COMPARE(re[0], 1.0); 682 | COMPARE_ALL(im, 0.0); 683 | double back[1]; 684 | fft.inverse(re, im, back); 685 | COMPARE_SCALED(back, in, 1); 686 | } 687 | 688 | ALL_IMPL_AUTO_TEST_CASE(dirac_6) 689 | { 690 | double in[] = { 1, 0, 0, 0, 0, 0 }; 691 | double re[4], im[4]; 692 | USING_FFT(6); 693 | fft.forward(in, re, im); 694 | COMPARE(re[0], 1.0); 695 | COMPARE(re[1], 1.0); 696 | COMPARE(re[2], 1.0); 697 | COMPARE(re[3], 1.0); 698 | COMPARE_ALL(im, 0.0); 699 | double back[6]; 700 | fft.inverse(re, im, back); 701 | COMPARE_SCALED(back, in, 6); 702 | } 703 | 704 | ALL_IMPL_AUTO_TEST_CASE(dirac_7) 705 | { 706 | double in[] = { 1, 0, 0, 0, 0, 0, 0 }; 707 | double re[4], im[4]; 708 | USING_FFT(7); 709 | fft.forward(in, re, im); 710 | COMPARE(re[0], 1.0); 711 | COMPARE(re[1], 1.0); 712 | COMPARE(re[2], 1.0); 713 | COMPARE(re[3], 1.0); 714 | COMPARE_ALL(im, 0.0); 715 | double back[7]; 716 | fft.inverse(re, im, back); 717 | COMPARE_SCALED(back, in, 7); 718 | } 719 | 720 | ALL_IMPL_AUTO_TEST_CASE(sineCosine_6) 721 | { 722 | // Sine and cosine mixed, i.e. f(x) = 0.5 * cos(2*x*pi/6) + sin(2*x*pi/6) 723 | double r = sqrt(3.0)/2.0; 724 | double in[] = { 0.5, r + 0.25, r - 0.25, -0.5, -r - 0.25, -r + 0.25 }; 725 | double re[4], im[4]; 726 | USING_FFT(6); 727 | fft.forward(in, re, im); 728 | COMPARE(re[0], 0.0); 729 | COMPARE(re[1], 1.5); 730 | COMPARE(re[2], 0.0); 731 | COMPARE(re[3], 0.0); 732 | COMPARE(im[0], 0.0); 733 | COMPARE(im[1], -3.0); 734 | COMPARE(im[2], 0.0); 735 | COMPARE(im[3], 0.0); 736 | double back[6]; 737 | fft.inverse(re, im, back); 738 | COMPARE_SCALED(back, in, 6); 739 | } 740 | 741 | ALL_IMPL_AUTO_TEST_CASE(sineCosine_7) 742 | { 743 | // Sine and cosine mixed, i.e. f(x) = 0.5 * cos(2*x*pi/6) + sin(2*x*pi/6) 744 | double in[] = { 745 | 0.5, 746 | 1.0935763833973966, 747 | 0.8636674452036665, 748 | -0.016600694833651286, 749 | -0.8843681730687676, 750 | -1.086188379159981, 751 | -0.47008658153866323 752 | }; 753 | double re[4], im[4]; 754 | USING_FFT(7); 755 | fft.forward(in, re, im); 756 | COMPARE(re[0], 0.0); 757 | COMPARE(re[1], 1.75); 758 | COMPARE(re[2], 0.0); 759 | COMPARE(re[3], 0.0); 760 | COMPARE(im[0], 0.0); 761 | COMPARE(im[1], -3.5); 762 | COMPARE(im[2], 0.0); 763 | COMPARE(im[3], 0.0); 764 | double back[7]; 765 | fft.inverse(re, im, back); 766 | COMPARE_SCALED(back, in, 7); 767 | } 768 | 769 | 770 | /* 771 | * 6. Slightly longer transforms of pseudorandom data. 772 | */ 773 | 774 | ALL_IMPL_AUTO_TEST_CASE(random_precalc_16) 775 | { 776 | double in[] = { 777 | -0.24392125308057722, 0.03443898163344272, 0.3448145656738877, 778 | -0.9625837464603908, 3.366568317669671, 0.9947191221586653, 779 | -1.5038984435999945, 1.3859898682581235, -1.1230576306688778, 780 | -1.6757487116512024, -1.5874436867863229, -2.0794018781307155, 781 | -0.5450152775818973, 0.7530907176983748, 1.0743170685904255, 782 | 3.1787609811018775 783 | }; 784 | double expected_re[] = { 785 | 1.41162899482, 7.63975551593, -1.20622641052, -1.77829578443, 786 | 3.12678465246, -2.84220463109, -7.17083743716, 0.497290409945, 787 | -1.84690167439, 788 | }; 789 | double expected_im[] = { 790 | 0.0, -4.67826048083, 8.58829211964, 4.96449646815, 791 | 1.41626511493, -3.77219223978, 6.96219662744, 2.23138519225, 792 | 0.0, 793 | }; 794 | double re[9], im[9]; 795 | USING_FFT(16); 796 | if (eps < 1e-11) { 797 | eps = 1e-11; 798 | } 799 | fft.forward(in, re, im); 800 | COMPARE_ARR(re, expected_re, 9); 801 | COMPARE_ARR(im, expected_im, 9); 802 | double back[16]; 803 | fft.inverse(re, im, back); 804 | COMPARE_SCALED(back, in, 16); 805 | } 806 | 807 | /* This one has data from a PRNG, with a fixed seed. Must pass two 808 | * tests: (i) same as DFT; (ii) inverse produces original input (after 809 | * scaling) */ 810 | ALL_IMPL_AUTO_TEST_CASE(random) 811 | { 812 | const int n = 64; 813 | double *in = new double[n]; 814 | double *re = new double[n/2 + 1]; 815 | double *im = new double[n/2 + 1]; 816 | double *re_compare = new double[n/2 + 1]; 817 | double *im_compare = new double[n/2 + 1]; 818 | double *back = new double[n]; 819 | srand48(0); 820 | for (int i = 0; i < n; ++i) { 821 | in[i] = drand48() * 4.0 - 2.0; 822 | } 823 | USING_FFT(n); 824 | if (eps < 1e-11) { 825 | eps = 1e-11; 826 | } 827 | fft.forward(in, re, im); 828 | fft.inverse(re, im, back); 829 | FFT::setDefaultImplementation("dft"); 830 | fft.forward(in, re_compare, im_compare); 831 | COMPARE_ARR(re, re_compare, n/2 + 1); 832 | COMPARE_ARR(im, im_compare, n/2 + 1); 833 | COMPARE_SCALED_N(back, in, n, n); 834 | delete[] back; 835 | delete[] im_compare; 836 | delete[] re_compare; 837 | delete[] im; 838 | delete[] re; 839 | delete[] in; 840 | } 841 | 842 | BOOST_AUTO_TEST_SUITE_END() 843 | -------------------------------------------------------------------------------- /src/FFT.cpp: -------------------------------------------------------------------------------- 1 | /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ 2 | 3 | /* 4 | bqfft 5 | 6 | A small library wrapping various FFT implementations for some 7 | common audio processing use cases. 8 | 9 | Copyright 2007-2021 Particular Programs Ltd. 10 | 11 | Permission is hereby granted, free of charge, to any person 12 | obtaining a copy of this software and associated documentation 13 | files (the "Software"), to deal in the Software without 14 | restriction, including without limitation the rights to use, copy, 15 | modify, merge, publish, distribute, sublicense, and/or sell copies 16 | of the Software, and to permit persons to whom the Software is 17 | furnished to do so, subject to the following conditions: 18 | 19 | The above copyright notice and this permission notice shall be 20 | included in all copies or substantial portions of the Software. 21 | 22 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 24 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 26 | ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 27 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 | 30 | Except as contained in this notice, the names of Chris Cannam and 31 | Particular Programs Ltd shall not be used in advertising or 32 | otherwise to promote the sale, use or other dealings in this 33 | Software without prior written authorization. 34 | */ 35 | 36 | #include "bqfft/FFT.h" 37 | 38 | #include 39 | #include 40 | #include 41 | 42 | // Define USE_FFTW_WISDOM if you are defining HAVE_FFTW3 and you want 43 | // to use FFTW_MEASURE mode with persistent wisdom files. This will 44 | // make things much slower on first use if no suitable wisdom has been 45 | // saved, but may be faster during subsequent use. 46 | //#define USE_FFTW_WISDOM 1 47 | 48 | // Define FFT_MEASUREMENT to include timing measurement code callable 49 | // via the static method FFT::tune(). Must be defined when the header 50 | // is included as well. 51 | //#define FFT_MEASUREMENT 1 52 | 53 | #ifdef FFT_MEASUREMENT 54 | #include 55 | #endif 56 | 57 | #ifdef HAVE_IPP 58 | #include 59 | #include 60 | #endif 61 | 62 | #ifdef HAVE_FFTW3 63 | #include 64 | #endif 65 | 66 | #ifdef HAVE_SLEEF 67 | extern "C" { 68 | #include 69 | #include 70 | } 71 | #endif 72 | 73 | #ifdef HAVE_VDSP 74 | #include 75 | #endif 76 | 77 | #ifdef HAVE_KISSFFT 78 | #include "kiss_fftr.h" 79 | #endif 80 | 81 | #ifndef HAVE_IPP 82 | #ifndef HAVE_FFTW3 83 | #ifndef HAVE_SLEEF 84 | #ifndef HAVE_KISSFFT 85 | #ifndef USE_BUILTIN_FFT 86 | #ifndef HAVE_VDSP 87 | #error No FFT implementation selected! 88 | #endif 89 | #endif 90 | #endif 91 | #endif 92 | #endif 93 | #endif 94 | 95 | #include 96 | #include 97 | #include 98 | #include 99 | #include 100 | #include 101 | 102 | #ifdef FFT_MEASUREMENT 103 | #ifndef _WIN32 104 | #include 105 | #endif 106 | #endif 107 | 108 | #ifdef _WIN32 109 | #include 110 | #endif 111 | 112 | namespace breakfastquay { 113 | 114 | class FFTImpl 115 | { 116 | public: 117 | virtual ~FFTImpl() { } 118 | 119 | virtual FFT::Precisions getSupportedPrecisions() const = 0; 120 | 121 | virtual int getSize() const = 0; 122 | 123 | virtual void initFloat() = 0; 124 | virtual void initDouble() = 0; 125 | 126 | virtual void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) = 0; 127 | virtual void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) = 0; 128 | virtual void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) = 0; 129 | virtual void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) = 0; 130 | 131 | virtual void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) = 0; 132 | virtual void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) = 0; 133 | virtual void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) = 0; 134 | virtual void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) = 0; 135 | 136 | virtual void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) = 0; 137 | virtual void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) = 0; 138 | virtual void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) = 0; 139 | virtual void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) = 0; 140 | 141 | virtual void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) = 0; 142 | virtual void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) = 0; 143 | virtual void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) = 0; 144 | virtual void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) = 0; 145 | }; 146 | 147 | namespace FFTs { 148 | 149 | #ifdef HAVE_IPP 150 | 151 | class D_IPP : public FFTImpl 152 | { 153 | public: 154 | D_IPP(int size) : 155 | m_size(size), m_fspec(0), m_dspec(0) 156 | { 157 | for (int i = 0; ; ++i) { 158 | if (m_size & (1 << i)) { 159 | m_order = i; 160 | break; 161 | } 162 | } 163 | } 164 | 165 | ~D_IPP() { 166 | if (m_fspec) { 167 | #if (IPP_VERSION_MAJOR >= 9) 168 | ippsFree(m_fspecbuf); 169 | #else 170 | ippsFFTFree_R_32f(m_fspec); 171 | #endif 172 | ippsFree(m_fbuf); 173 | ippsFree(m_fpacked); 174 | ippsFree(m_fspare); 175 | } 176 | if (m_dspec) { 177 | #if (IPP_VERSION_MAJOR >= 9) 178 | ippsFree(m_dspecbuf); 179 | #else 180 | ippsFFTFree_R_64f(m_dspec); 181 | #endif 182 | ippsFree(m_dbuf); 183 | ippsFree(m_dpacked); 184 | ippsFree(m_dspare); 185 | } 186 | } 187 | 188 | int getSize() const { 189 | return m_size; 190 | } 191 | 192 | FFT::Precisions 193 | getSupportedPrecisions() const { 194 | return FFT::SinglePrecision | FFT::DoublePrecision; 195 | } 196 | 197 | //!!! rv check 198 | 199 | void initFloat() { 200 | if (m_fspec) return; 201 | #if (IPP_VERSION_MAJOR >= 9) 202 | int specSize, specBufferSize, bufferSize; 203 | ippsFFTGetSize_R_32f(m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 204 | &specSize, &specBufferSize, &bufferSize); 205 | m_fspecbuf = ippsMalloc_8u(specSize); 206 | Ipp8u *tmp = ippsMalloc_8u(specBufferSize); 207 | m_fbuf = ippsMalloc_8u(bufferSize); 208 | m_fpacked = ippsMalloc_32f(m_size + 2); 209 | m_fspare = ippsMalloc_32f(m_size / 2 + 1); 210 | ippsFFTInit_R_32f(&m_fspec, 211 | m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 212 | m_fspecbuf, tmp); 213 | ippsFree(tmp); 214 | #else 215 | int specSize, specBufferSize, bufferSize; 216 | ippsFFTGetSize_R_32f(m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 217 | &specSize, &specBufferSize, &bufferSize); 218 | m_fbuf = ippsMalloc_8u(bufferSize); 219 | m_fpacked = ippsMalloc_32f(m_size + 2); 220 | m_fspare = ippsMalloc_32f(m_size / 2 + 1); 221 | ippsFFTInitAlloc_R_32f(&m_fspec, m_order, IPP_FFT_NODIV_BY_ANY, 222 | ippAlgHintFast); 223 | #endif 224 | } 225 | 226 | void initDouble() { 227 | if (m_dspec) return; 228 | #if (IPP_VERSION_MAJOR >= 9) 229 | int specSize, specBufferSize, bufferSize; 230 | ippsFFTGetSize_R_64f(m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 231 | &specSize, &specBufferSize, &bufferSize); 232 | m_dspecbuf = ippsMalloc_8u(specSize); 233 | Ipp8u *tmp = ippsMalloc_8u(specBufferSize); 234 | m_dbuf = ippsMalloc_8u(bufferSize); 235 | m_dpacked = ippsMalloc_64f(m_size + 2); 236 | m_dspare = ippsMalloc_64f(m_size / 2 + 1); 237 | ippsFFTInit_R_64f(&m_dspec, 238 | m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 239 | m_dspecbuf, tmp); 240 | ippsFree(tmp); 241 | #else 242 | int specSize, specBufferSize, bufferSize; 243 | ippsFFTGetSize_R_64f(m_order, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast, 244 | &specSize, &specBufferSize, &bufferSize); 245 | m_dbuf = ippsMalloc_8u(bufferSize); 246 | m_dpacked = ippsMalloc_64f(m_size + 2); 247 | m_dspare = ippsMalloc_64f(m_size / 2 + 1); 248 | ippsFFTInitAlloc_R_64f(&m_dspec, m_order, IPP_FFT_NODIV_BY_ANY, 249 | ippAlgHintFast); 250 | #endif 251 | } 252 | 253 | void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { 254 | int index = 0; 255 | const int hs = m_size/2; 256 | for (int i = 0; i <= hs; ++i) { 257 | m_fpacked[index++] = re[i]; 258 | index++; 259 | } 260 | index = 0; 261 | if (im) { 262 | for (int i = 0; i <= hs; ++i) { 263 | index++; 264 | m_fpacked[index++] = im[i]; 265 | } 266 | } else { 267 | for (int i = 0; i <= hs; ++i) { 268 | index++; 269 | m_fpacked[index++] = 0.f; 270 | } 271 | } 272 | } 273 | 274 | void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { 275 | int index = 0; 276 | const int hs = m_size/2; 277 | for (int i = 0; i <= hs; ++i) { 278 | m_dpacked[index++] = re[i]; 279 | index++; 280 | } 281 | index = 0; 282 | if (im) { 283 | for (int i = 0; i <= hs; ++i) { 284 | index++; 285 | m_dpacked[index++] = im[i]; 286 | } 287 | } else { 288 | for (int i = 0; i <= hs; ++i) { 289 | index++; 290 | m_dpacked[index++] = 0.0; 291 | } 292 | } 293 | } 294 | 295 | void unpackFloat(float *re, float *BQ_R__ im) { // re may be equal to m_fpacked 296 | int index = 0; 297 | const int hs = m_size/2; 298 | if (im) { 299 | for (int i = 0; i <= hs; ++i) { 300 | index++; 301 | im[i] = m_fpacked[index++]; 302 | } 303 | } 304 | index = 0; 305 | for (int i = 0; i <= hs; ++i) { 306 | re[i] = m_fpacked[index++]; 307 | index++; 308 | } 309 | } 310 | 311 | void unpackDouble(double *re, double *BQ_R__ im) { // re may be equal to m_dpacked 312 | int index = 0; 313 | const int hs = m_size/2; 314 | if (im) { 315 | for (int i = 0; i <= hs; ++i) { 316 | index++; 317 | im[i] = m_dpacked[index++]; 318 | } 319 | } 320 | index = 0; 321 | for (int i = 0; i <= hs; ++i) { 322 | re[i] = m_dpacked[index++]; 323 | index++; 324 | } 325 | } 326 | 327 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 328 | if (!m_dspec) initDouble(); 329 | ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf); 330 | unpackDouble(realOut, imagOut); 331 | } 332 | 333 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 334 | if (!m_dspec) initDouble(); 335 | ippsFFTFwd_RToCCS_64f(realIn, complexOut, m_dspec, m_dbuf); 336 | } 337 | 338 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 339 | if (!m_dspec) initDouble(); 340 | ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf); 341 | unpackDouble(m_dpacked, m_dspare); 342 | ippsCartToPolar_64f(m_dpacked, m_dspare, magOut, phaseOut, m_size/2+1); 343 | } 344 | 345 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 346 | if (!m_dspec) initDouble(); 347 | ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf); 348 | unpackDouble(m_dpacked, m_dspare); 349 | ippsMagnitude_64f(m_dpacked, m_dspare, magOut, m_size/2+1); 350 | } 351 | 352 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 353 | if (!m_fspec) initFloat(); 354 | ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf); 355 | unpackFloat(realOut, imagOut); 356 | } 357 | 358 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 359 | if (!m_fspec) initFloat(); 360 | ippsFFTFwd_RToCCS_32f(realIn, complexOut, m_fspec, m_fbuf); 361 | } 362 | 363 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 364 | if (!m_fspec) initFloat(); 365 | ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf); 366 | unpackFloat(m_fpacked, m_fspare); 367 | ippsCartToPolar_32f(m_fpacked, m_fspare, magOut, phaseOut, m_size/2+1); 368 | } 369 | 370 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 371 | if (!m_fspec) initFloat(); 372 | ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf); 373 | unpackFloat(m_fpacked, m_fspare); 374 | ippsMagnitude_32f(m_fpacked, m_fspare, magOut, m_size/2+1); 375 | } 376 | 377 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 378 | if (!m_dspec) initDouble(); 379 | packDouble(realIn, imagIn); 380 | ippsFFTInv_CCSToR_64f(m_dpacked, realOut, m_dspec, m_dbuf); 381 | } 382 | 383 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 384 | if (!m_dspec) initDouble(); 385 | ippsFFTInv_CCSToR_64f(complexIn, realOut, m_dspec, m_dbuf); 386 | } 387 | 388 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 389 | if (!m_dspec) initDouble(); 390 | ippsPolarToCart_64f(magIn, phaseIn, realOut, m_dspare, m_size/2+1); 391 | packDouble(realOut, m_dspare); // to m_dpacked 392 | ippsFFTInv_CCSToR_64f(m_dpacked, realOut, m_dspec, m_dbuf); 393 | } 394 | 395 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 396 | if (!m_dspec) initDouble(); 397 | const int hs1 = m_size/2 + 1; 398 | ippsCopy_64f(magIn, m_dspare, hs1); 399 | ippsAddC_64f_I(0.000001, m_dspare, hs1); 400 | ippsLn_64f_I(m_dspare, hs1); 401 | packDouble(m_dspare, 0); 402 | ippsFFTInv_CCSToR_64f(m_dpacked, cepOut, m_dspec, m_dbuf); 403 | } 404 | 405 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 406 | if (!m_fspec) initFloat(); 407 | packFloat(realIn, imagIn); 408 | ippsFFTInv_CCSToR_32f(m_fpacked, realOut, m_fspec, m_fbuf); 409 | } 410 | 411 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 412 | if (!m_fspec) initFloat(); 413 | ippsFFTInv_CCSToR_32f(complexIn, realOut, m_fspec, m_fbuf); 414 | } 415 | 416 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 417 | if (!m_fspec) initFloat(); 418 | ippsPolarToCart_32f(magIn, phaseIn, realOut, m_fspare, m_size/2+1); 419 | packFloat(realOut, m_fspare); // to m_fpacked 420 | ippsFFTInv_CCSToR_32f(m_fpacked, realOut, m_fspec, m_fbuf); 421 | } 422 | 423 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 424 | if (!m_fspec) initFloat(); 425 | const int hs1 = m_size/2 + 1; 426 | ippsCopy_32f(magIn, m_fspare, hs1); 427 | ippsAddC_32f_I(0.000001f, m_fspare, hs1); 428 | ippsLn_32f_I(m_fspare, hs1); 429 | packFloat(m_fspare, 0); 430 | ippsFFTInv_CCSToR_32f(m_fpacked, cepOut, m_fspec, m_fbuf); 431 | } 432 | 433 | private: 434 | const int m_size; 435 | int m_order; 436 | IppsFFTSpec_R_32f *m_fspec; 437 | IppsFFTSpec_R_64f *m_dspec; 438 | Ipp8u *m_fspecbuf; 439 | Ipp8u *m_dspecbuf; 440 | Ipp8u *m_fbuf; 441 | Ipp8u *m_dbuf; 442 | float *m_fpacked; 443 | float *m_fspare; 444 | double *m_dpacked; 445 | double *m_dspare; 446 | }; 447 | 448 | #endif /* HAVE_IPP */ 449 | 450 | #ifdef HAVE_VDSP 451 | 452 | class D_VDSP : public FFTImpl 453 | { 454 | public: 455 | D_VDSP(int size) : 456 | m_size(size), m_fspec(0), m_dspec(0), 457 | m_fpacked(0), m_fspare(0), 458 | m_dpacked(0), m_dspare(0) 459 | { 460 | for (int i = 0; ; ++i) { 461 | if (m_size & (1 << i)) { 462 | m_order = i; 463 | break; 464 | } 465 | } 466 | } 467 | 468 | ~D_VDSP() { 469 | if (m_fspec) { 470 | vDSP_destroy_fftsetup(m_fspec); 471 | deallocate(m_fspare); 472 | deallocate(m_fspare2); 473 | deallocate(m_fbuf->realp); 474 | deallocate(m_fbuf->imagp); 475 | delete m_fbuf; 476 | deallocate(m_fpacked->realp); 477 | deallocate(m_fpacked->imagp); 478 | delete m_fpacked; 479 | } 480 | if (m_dspec) { 481 | vDSP_destroy_fftsetupD(m_dspec); 482 | deallocate(m_dspare); 483 | deallocate(m_dspare2); 484 | deallocate(m_dbuf->realp); 485 | deallocate(m_dbuf->imagp); 486 | delete m_dbuf; 487 | deallocate(m_dpacked->realp); 488 | deallocate(m_dpacked->imagp); 489 | delete m_dpacked; 490 | } 491 | } 492 | 493 | int getSize() const { 494 | return m_size; 495 | } 496 | 497 | FFT::Precisions 498 | getSupportedPrecisions() const { 499 | return FFT::SinglePrecision | FFT::DoublePrecision; 500 | } 501 | 502 | //!!! rv check 503 | 504 | void initFloat() { 505 | if (m_fspec) return; 506 | m_fspec = vDSP_create_fftsetup(m_order, FFT_RADIX2); 507 | m_fbuf = new DSPSplitComplex; 508 | //!!! "If possible, tempBuffer->realp and tempBuffer->imagp should be 32-byte aligned for best performance." 509 | m_fbuf->realp = allocate(m_size); 510 | m_fbuf->imagp = allocate(m_size); 511 | m_fpacked = new DSPSplitComplex; 512 | m_fpacked->realp = allocate(m_size / 2 + 1); 513 | m_fpacked->imagp = allocate(m_size / 2 + 1); 514 | m_fspare = allocate(m_size + 2); 515 | m_fspare2 = allocate(m_size + 2); 516 | } 517 | 518 | void initDouble() { 519 | if (m_dspec) return; 520 | m_dspec = vDSP_create_fftsetupD(m_order, FFT_RADIX2); 521 | m_dbuf = new DSPDoubleSplitComplex; 522 | //!!! "If possible, tempBuffer->realp and tempBuffer->imagp should be 32-byte aligned for best performance." 523 | m_dbuf->realp = allocate(m_size); 524 | m_dbuf->imagp = allocate(m_size); 525 | m_dpacked = new DSPDoubleSplitComplex; 526 | m_dpacked->realp = allocate(m_size / 2 + 1); 527 | m_dpacked->imagp = allocate(m_size / 2 + 1); 528 | m_dspare = allocate(m_size + 2); 529 | m_dspare2 = allocate(m_size + 2); 530 | } 531 | 532 | void packReal(const float *BQ_R__ const re) { 533 | // Pack input for forward transform 534 | vDSP_ctoz((DSPComplex *)re, 2, m_fpacked, 1, m_size/2); 535 | } 536 | void packComplex(const float *BQ_R__ const re, const float *BQ_R__ const im) { 537 | // Pack input for inverse transform 538 | if (re) v_copy(m_fpacked->realp, re, m_size/2 + 1); 539 | else v_zero(m_fpacked->realp, m_size/2 + 1); 540 | if (im) v_copy(m_fpacked->imagp, im, m_size/2 + 1); 541 | else v_zero(m_fpacked->imagp, m_size/2 + 1); 542 | fnyq(); 543 | } 544 | 545 | void unpackReal(float *BQ_R__ const re) { 546 | // Unpack output for inverse transform 547 | vDSP_ztoc(m_fpacked, 1, (DSPComplex *)re, 2, m_size/2); 548 | } 549 | void unpackComplex(float *BQ_R__ const re, float *BQ_R__ const im) { 550 | // Unpack output for forward transform 551 | // vDSP forward FFTs are scaled 2x (for some reason) 552 | float two = 2.f; 553 | vDSP_vsdiv(m_fpacked->realp, 1, &two, re, 1, m_size/2 + 1); 554 | vDSP_vsdiv(m_fpacked->imagp, 1, &two, im, 1, m_size/2 + 1); 555 | } 556 | void unpackComplex(float *BQ_R__ const cplx) { 557 | // Unpack output for forward transform 558 | // vDSP forward FFTs are scaled 2x (for some reason) 559 | const int hs1 = m_size/2 + 1; 560 | for (int i = 0; i < hs1; ++i) { 561 | cplx[i*2] = m_fpacked->realp[i] * 0.5f; 562 | cplx[i*2+1] = m_fpacked->imagp[i] * 0.5f; 563 | } 564 | } 565 | 566 | void packReal(const double *BQ_R__ const re) { 567 | // Pack input for forward transform 568 | vDSP_ctozD((DSPDoubleComplex *)re, 2, m_dpacked, 1, m_size/2); 569 | } 570 | void packComplex(const double *BQ_R__ const re, const double *BQ_R__ const im) { 571 | // Pack input for inverse transform 572 | if (re) v_copy(m_dpacked->realp, re, m_size/2 + 1); 573 | else v_zero(m_dpacked->realp, m_size/2 + 1); 574 | if (im) v_copy(m_dpacked->imagp, im, m_size/2 + 1); 575 | else v_zero(m_dpacked->imagp, m_size/2 + 1); 576 | dnyq(); 577 | } 578 | 579 | void unpackReal(double *BQ_R__ const re) { 580 | // Unpack output for inverse transform 581 | vDSP_ztocD(m_dpacked, 1, (DSPDoubleComplex *)re, 2, m_size/2); 582 | } 583 | void unpackComplex(double *BQ_R__ const re, double *BQ_R__ const im) { 584 | // Unpack output for forward transform 585 | // vDSP forward FFTs are scaled 2x (for some reason) 586 | double two = 2.0; 587 | vDSP_vsdivD(m_dpacked->realp, 1, &two, re, 1, m_size/2 + 1); 588 | vDSP_vsdivD(m_dpacked->imagp, 1, &two, im, 1, m_size/2 + 1); 589 | } 590 | void unpackComplex(double *BQ_R__ const cplx) { 591 | // Unpack output for forward transform 592 | // vDSP forward FFTs are scaled 2x (for some reason) 593 | const int hs1 = m_size/2 + 1; 594 | for (int i = 0; i < hs1; ++i) { 595 | cplx[i*2] = m_dpacked->realp[i] * 0.5; 596 | cplx[i*2+1] = m_dpacked->imagp[i] * 0.5; 597 | } 598 | } 599 | 600 | void fdenyq() { 601 | // for fft result in packed form, unpack the DC and Nyquist bins 602 | const int hs = m_size/2; 603 | m_fpacked->realp[hs] = m_fpacked->imagp[0]; 604 | m_fpacked->imagp[hs] = 0.f; 605 | m_fpacked->imagp[0] = 0.f; 606 | } 607 | void ddenyq() { 608 | // for fft result in packed form, unpack the DC and Nyquist bins 609 | const int hs = m_size/2; 610 | m_dpacked->realp[hs] = m_dpacked->imagp[0]; 611 | m_dpacked->imagp[hs] = 0.; 612 | m_dpacked->imagp[0] = 0.; 613 | } 614 | 615 | void fnyq() { 616 | // for ifft input in packed form, pack the DC and Nyquist bins 617 | const int hs = m_size/2; 618 | m_fpacked->imagp[0] = m_fpacked->realp[hs]; 619 | m_fpacked->realp[hs] = 0.f; 620 | m_fpacked->imagp[hs] = 0.f; 621 | } 622 | void dnyq() { 623 | // for ifft input in packed form, pack the DC and Nyquist bins 624 | const int hs = m_size/2; 625 | m_dpacked->imagp[0] = m_dpacked->realp[hs]; 626 | m_dpacked->realp[hs] = 0.; 627 | m_dpacked->imagp[hs] = 0.; 628 | } 629 | 630 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 631 | if (!m_dspec) initDouble(); 632 | packReal(realIn); 633 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD); 634 | ddenyq(); 635 | unpackComplex(realOut, imagOut); 636 | } 637 | 638 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 639 | if (!m_dspec) initDouble(); 640 | packReal(realIn); 641 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD); 642 | ddenyq(); 643 | unpackComplex(complexOut); 644 | } 645 | 646 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 647 | if (!m_dspec) initDouble(); 648 | const int hs1 = m_size/2+1; 649 | packReal(realIn); 650 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD); 651 | ddenyq(); 652 | // vDSP forward FFTs are scaled 2x (for some reason) 653 | for (int i = 0; i < hs1; ++i) m_dpacked->realp[i] *= 0.5; 654 | for (int i = 0; i < hs1; ++i) m_dpacked->imagp[i] *= 0.5; 655 | v_cartesian_to_polar(magOut, phaseOut, 656 | m_dpacked->realp, m_dpacked->imagp, hs1); 657 | } 658 | 659 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 660 | if (!m_dspec) initDouble(); 661 | packReal(realIn); 662 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD); 663 | ddenyq(); 664 | const int hs1 = m_size/2+1; 665 | vDSP_zvmagsD(m_dpacked, 1, m_dspare, 1, hs1); 666 | vvsqrt(m_dspare2, m_dspare, &hs1); 667 | // vDSP forward FFTs are scaled 2x (for some reason) 668 | double two = 2.0; 669 | vDSP_vsdivD(m_dspare2, 1, &two, magOut, 1, hs1); 670 | } 671 | 672 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 673 | if (!m_fspec) initFloat(); 674 | packReal(realIn); 675 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD); 676 | fdenyq(); 677 | unpackComplex(realOut, imagOut); 678 | } 679 | 680 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 681 | if (!m_fspec) initFloat(); 682 | packReal(realIn); 683 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD); 684 | fdenyq(); 685 | unpackComplex(complexOut); 686 | } 687 | 688 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 689 | if (!m_fspec) initFloat(); 690 | const int hs1 = m_size/2+1; 691 | packReal(realIn); 692 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD); 693 | fdenyq(); 694 | // vDSP forward FFTs are scaled 2x (for some reason) 695 | for (int i = 0; i < hs1; ++i) m_fpacked->realp[i] *= 0.5f; 696 | for (int i = 0; i < hs1; ++i) m_fpacked->imagp[i] *= 0.5f; 697 | v_cartesian_to_polar(magOut, phaseOut, 698 | m_fpacked->realp, m_fpacked->imagp, hs1); 699 | } 700 | 701 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 702 | if (!m_fspec) initFloat(); 703 | packReal(realIn); 704 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD); 705 | fdenyq(); 706 | const int hs1 = m_size/2 + 1; 707 | vDSP_zvmags(m_fpacked, 1, m_fspare, 1, hs1); 708 | vvsqrtf(m_fspare2, m_fspare, &hs1); 709 | // vDSP forward FFTs are scaled 2x (for some reason) 710 | float two = 2.f; 711 | vDSP_vsdiv(m_fspare2, 1, &two, magOut, 1, hs1); 712 | } 713 | 714 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 715 | if (!m_dspec) initDouble(); 716 | packComplex(realIn, imagIn); 717 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_INVERSE); 718 | unpackReal(realOut); 719 | } 720 | 721 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 722 | if (!m_dspec) initDouble(); 723 | double *d[2] = { m_dpacked->realp, m_dpacked->imagp }; 724 | v_deinterleave(d, complexIn, 2, m_size/2 + 1); 725 | dnyq(); 726 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_INVERSE); 727 | unpackReal(realOut); 728 | } 729 | 730 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 731 | if (!m_dspec) initDouble(); 732 | const int hs1 = m_size/2+1; 733 | vvsincos(m_dpacked->imagp, m_dpacked->realp, phaseIn, &hs1); 734 | double *const rp = m_dpacked->realp; 735 | double *const ip = m_dpacked->imagp; 736 | for (int i = 0; i < hs1; ++i) rp[i] *= magIn[i]; 737 | for (int i = 0; i < hs1; ++i) ip[i] *= magIn[i]; 738 | dnyq(); 739 | vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_INVERSE); 740 | unpackReal(realOut); 741 | } 742 | 743 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 744 | if (!m_dspec) initDouble(); 745 | const int hs1 = m_size/2 + 1; 746 | v_copy(m_dspare, magIn, hs1); 747 | for (int i = 0; i < hs1; ++i) m_dspare[i] += 0.000001; 748 | vvlog(m_dspare2, m_dspare, &hs1); 749 | inverse(m_dspare2, 0, cepOut); 750 | } 751 | 752 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 753 | if (!m_fspec) initFloat(); 754 | packComplex(realIn, imagIn); 755 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_INVERSE); 756 | unpackReal(realOut); 757 | } 758 | 759 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 760 | if (!m_fspec) initFloat(); 761 | float *f[2] = { m_fpacked->realp, m_fpacked->imagp }; 762 | v_deinterleave(f, complexIn, 2, m_size/2 + 1); 763 | fnyq(); 764 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_INVERSE); 765 | unpackReal(realOut); 766 | } 767 | 768 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 769 | if (!m_fspec) initFloat(); 770 | 771 | const int hs1 = m_size/2+1; 772 | vvsincosf(m_fpacked->imagp, m_fpacked->realp, phaseIn, &hs1); 773 | float *const rp = m_fpacked->realp; 774 | float *const ip = m_fpacked->imagp; 775 | for (int i = 0; i < hs1; ++i) rp[i] *= magIn[i]; 776 | for (int i = 0; i < hs1; ++i) ip[i] *= magIn[i]; 777 | fnyq(); 778 | vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_INVERSE); 779 | unpackReal(realOut); 780 | } 781 | 782 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 783 | if (!m_fspec) initFloat(); 784 | const int hs1 = m_size/2 + 1; 785 | v_copy(m_fspare, magIn, hs1); 786 | for (int i = 0; i < hs1; ++i) m_fspare[i] += 0.000001f; 787 | vvlogf(m_fspare2, m_fspare, &hs1); 788 | inverse(m_fspare2, 0, cepOut); 789 | } 790 | 791 | private: 792 | const int m_size; 793 | int m_order; 794 | FFTSetup m_fspec; 795 | FFTSetupD m_dspec; 796 | DSPSplitComplex *m_fbuf; 797 | DSPDoubleSplitComplex *m_dbuf; 798 | DSPSplitComplex *m_fpacked; 799 | float *m_fspare; 800 | float *m_fspare2; 801 | DSPDoubleSplitComplex *m_dpacked; 802 | double *m_dspare; 803 | double *m_dspare2; 804 | }; 805 | 806 | #endif /* HAVE_VDSP */ 807 | 808 | #ifdef HAVE_FFTW3 809 | 810 | /* 811 | Define FFTW_DOUBLE_ONLY to make all uses of FFTW functions be 812 | double-precision (so "float" FFTs are calculated by casting to 813 | doubles and using the double-precision FFTW function). 814 | 815 | Define FFTW_SINGLE_ONLY to make all uses of FFTW functions be 816 | single-precision (so "double" FFTs are calculated by casting to 817 | floats and using the single-precision FFTW function). 818 | 819 | Neither of these flags is desirable for either performance or 820 | precision. The main reason to define either flag is to avoid linking 821 | against both fftw3 and fftw3f libraries. 822 | */ 823 | 824 | //#define FFTW_DOUBLE_ONLY 1 825 | //#define FFTW_SINGLE_ONLY 1 826 | 827 | #if defined(FFTW_DOUBLE_ONLY) && defined(FFTW_SINGLE_ONLY) 828 | // Can't meaningfully define both 829 | #error Can only define one of FFTW_DOUBLE_ONLY and FFTW_SINGLE_ONLY 830 | #endif 831 | 832 | #if defined(FFTW_FLOAT_ONLY) 833 | #warning FFTW_FLOAT_ONLY is deprecated, use FFTW_SINGLE_ONLY instead 834 | #define FFTW_SINGLE_ONLY 1 835 | #endif 836 | 837 | #ifdef FFTW_DOUBLE_ONLY 838 | #define fft_float_type double 839 | #define fftwf_complex fftw_complex 840 | #define fftwf_plan fftw_plan 841 | #define fftwf_plan_dft_r2c_1d fftw_plan_dft_r2c_1d 842 | #define fftwf_plan_dft_c2r_1d fftw_plan_dft_c2r_1d 843 | #define fftwf_destroy_plan fftw_destroy_plan 844 | #define fftwf_malloc fftw_malloc 845 | #define fftwf_free fftw_free 846 | #define fftwf_execute fftw_execute 847 | #define atan2f atan2 848 | #define sqrtf sqrt 849 | #define cosf cos 850 | #define sinf sin 851 | #else 852 | #define fft_float_type float 853 | #endif /* FFTW_DOUBLE_ONLY */ 854 | 855 | #ifdef FFTW_SINGLE_ONLY 856 | #define fft_double_type float 857 | #define fftw_complex fftwf_complex 858 | #define fftw_plan fftwf_plan 859 | #define fftw_plan_dft_r2c_1d fftwf_plan_dft_r2c_1d 860 | #define fftw_plan_dft_c2r_1d fftwf_plan_dft_c2r_1d 861 | #define fftw_destroy_plan fftwf_destroy_plan 862 | #define fftw_malloc fftwf_malloc 863 | #define fftw_free fftwf_free 864 | #define fftw_execute fftwf_execute 865 | #define atan2 atan2f 866 | #define sqrt sqrtf 867 | #define cos cosf 868 | #define sin sinf 869 | #else 870 | #define fft_double_type double 871 | #endif /* FFTW_SINGLE_ONLY */ 872 | 873 | class D_FFTW : public FFTImpl 874 | { 875 | public: 876 | D_FFTW(int size) : 877 | m_fplanf(0), m_dplanf(0), m_size(size) 878 | { 879 | } 880 | 881 | ~D_FFTW() { 882 | if (m_fplanf) { 883 | lock(); 884 | bool save = false; 885 | if (m_extantf > 0 && --m_extantf == 0) save = true; 886 | (void)save; // avoid compiler warning 887 | #ifdef USE_FFTW_WISDOM 888 | #ifndef FFTW_DOUBLE_ONLY 889 | if (save) saveWisdom('f'); 890 | #endif 891 | #endif 892 | fftwf_destroy_plan(m_fplanf); 893 | fftwf_destroy_plan(m_fplani); 894 | fftwf_free(m_fbuf); 895 | fftwf_free(m_fpacked); 896 | unlock(); 897 | } 898 | if (m_dplanf) { 899 | lock(); 900 | bool save = false; 901 | if (m_extantd > 0 && --m_extantd == 0) save = true; 902 | (void)save; // avoid compiler warning 903 | #ifdef USE_FFTW_WISDOM 904 | #ifndef FFTW_SINGLE_ONLY 905 | if (save) saveWisdom('d'); 906 | #endif 907 | #endif 908 | fftw_destroy_plan(m_dplanf); 909 | fftw_destroy_plan(m_dplani); 910 | fftw_free(m_dbuf); 911 | fftw_free(m_dpacked); 912 | unlock(); 913 | } 914 | lock(); 915 | if (m_extantf <= 0 && m_extantd <= 0) { 916 | #ifndef FFTW_DOUBLE_ONLY 917 | fftwf_cleanup(); 918 | #endif 919 | #ifndef FFTW_SINGLE_ONLY 920 | fftw_cleanup(); 921 | #endif 922 | } 923 | unlock(); 924 | } 925 | 926 | int getSize() const { 927 | return m_size; 928 | } 929 | 930 | FFT::Precisions 931 | getSupportedPrecisions() const { 932 | #ifdef FFTW_SINGLE_ONLY 933 | return FFT::SinglePrecision; 934 | #else 935 | #ifdef FFTW_DOUBLE_ONLY 936 | return FFT::DoublePrecision; 937 | #else 938 | return FFT::SinglePrecision | FFT::DoublePrecision; 939 | #endif 940 | #endif 941 | } 942 | 943 | void initFloat() { 944 | if (m_fplanf) return; 945 | bool load = false; 946 | lock(); 947 | if (m_extantf++ == 0) load = true; 948 | (void)load; // avoid compiler warning 949 | #ifdef USE_FFTW_WISDOM 950 | #ifdef FFTW_DOUBLE_ONLY 951 | if (load) loadWisdom('d'); 952 | #else 953 | if (load) loadWisdom('f'); 954 | #endif 955 | #endif 956 | m_fbuf = (fft_float_type *)fftw_malloc(m_size * sizeof(fft_float_type)); 957 | m_fpacked = (fftwf_complex *)fftw_malloc 958 | ((m_size/2 + 1) * sizeof(fftwf_complex)); 959 | #ifdef USE_FFTW_WISDOM 960 | m_fplanf = fftwf_plan_dft_r2c_1d 961 | (m_size, m_fbuf, m_fpacked, FFTW_MEASURE); 962 | m_fplani = fftwf_plan_dft_c2r_1d 963 | (m_size, m_fpacked, m_fbuf, FFTW_MEASURE); 964 | #else 965 | m_fplanf = fftwf_plan_dft_r2c_1d 966 | (m_size, m_fbuf, m_fpacked, FFTW_ESTIMATE); 967 | m_fplani = fftwf_plan_dft_c2r_1d 968 | (m_size, m_fpacked, m_fbuf, FFTW_ESTIMATE); 969 | #endif 970 | unlock(); 971 | } 972 | 973 | void initDouble() { 974 | if (m_dplanf) return; 975 | bool load = false; 976 | lock(); 977 | if (m_extantd++ == 0) load = true; 978 | (void)load; // avoid compiler warning 979 | #ifdef USE_FFTW_WISDOM 980 | #ifdef FFTW_SINGLE_ONLY 981 | if (load) loadWisdom('f'); 982 | #else 983 | if (load) loadWisdom('d'); 984 | #endif 985 | #endif 986 | m_dbuf = (fft_double_type *)fftw_malloc(m_size * sizeof(fft_double_type)); 987 | m_dpacked = (fftw_complex *)fftw_malloc 988 | ((m_size/2 + 1) * sizeof(fftw_complex)); 989 | #ifdef USE_FFTW_WISDOM 990 | m_dplanf = fftw_plan_dft_r2c_1d 991 | (m_size, m_dbuf, m_dpacked, FFTW_MEASURE); 992 | m_dplani = fftw_plan_dft_c2r_1d 993 | (m_size, m_dpacked, m_dbuf, FFTW_MEASURE); 994 | #else 995 | m_dplanf = fftw_plan_dft_r2c_1d 996 | (m_size, m_dbuf, m_dpacked, FFTW_ESTIMATE); 997 | m_dplani = fftw_plan_dft_c2r_1d 998 | (m_size, m_dpacked, m_dbuf, FFTW_ESTIMATE); 999 | #endif 1000 | unlock(); 1001 | } 1002 | 1003 | void loadWisdom(char type) { wisdom(false, type); } 1004 | void saveWisdom(char type) { wisdom(true, type); } 1005 | 1006 | void wisdom(bool save, char type) { 1007 | #ifdef USE_FFTW_WISDOM 1008 | #ifdef FFTW_DOUBLE_ONLY 1009 | if (type == 'f') return; 1010 | #endif 1011 | #ifdef FFTW_SINGLE_ONLY 1012 | if (type == 'd') return; 1013 | #endif 1014 | 1015 | const char *home = getenv("HOME"); 1016 | if (!home) return; 1017 | 1018 | char fn[256]; 1019 | snprintf(fn, 256, "%s/%s.%c", home, ".bqfft.wisdom", type); 1020 | 1021 | FILE *f = fopen(fn, save ? "wb" : "rb"); 1022 | if (!f) return; 1023 | 1024 | if (save) { 1025 | switch (type) { 1026 | #ifdef FFTW_DOUBLE_ONLY 1027 | case 'f': break; 1028 | #else 1029 | case 'f': fftwf_export_wisdom_to_file(f); break; 1030 | #endif 1031 | #ifdef FFTW_SINGLE_ONLY 1032 | case 'd': break; 1033 | #else 1034 | case 'd': fftw_export_wisdom_to_file(f); break; 1035 | #endif 1036 | default: break; 1037 | } 1038 | } else { 1039 | switch (type) { 1040 | #ifdef FFTW_DOUBLE_ONLY 1041 | case 'f': break; 1042 | #else 1043 | case 'f': fftwf_import_wisdom_from_file(f); break; 1044 | #endif 1045 | #ifdef FFTW_SINGLE_ONLY 1046 | case 'd': break; 1047 | #else 1048 | case 'd': fftw_import_wisdom_from_file(f); break; 1049 | #endif 1050 | default: break; 1051 | } 1052 | } 1053 | 1054 | fclose(f); 1055 | #else 1056 | (void)save; 1057 | (void)type; 1058 | #endif 1059 | } 1060 | 1061 | void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { 1062 | const int hs = m_size/2; 1063 | fftwf_complex *const BQ_R__ fpacked = m_fpacked; 1064 | for (int i = 0; i <= hs; ++i) { 1065 | fpacked[i][0] = re[i]; 1066 | } 1067 | if (im) { 1068 | for (int i = 0; i <= hs; ++i) { 1069 | fpacked[i][1] = im[i]; 1070 | } 1071 | } else { 1072 | for (int i = 0; i <= hs; ++i) { 1073 | fpacked[i][1] = 0.f; 1074 | } 1075 | } 1076 | } 1077 | 1078 | void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { 1079 | const int hs = m_size/2; 1080 | fftw_complex *const BQ_R__ dpacked = m_dpacked; 1081 | for (int i = 0; i <= hs; ++i) { 1082 | dpacked[i][0] = re[i]; 1083 | } 1084 | if (im) { 1085 | for (int i = 0; i <= hs; ++i) { 1086 | dpacked[i][1] = im[i]; 1087 | } 1088 | } else { 1089 | for (int i = 0; i <= hs; ++i) { 1090 | dpacked[i][1] = 0.0; 1091 | } 1092 | } 1093 | } 1094 | 1095 | void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) { 1096 | const int hs = m_size/2; 1097 | for (int i = 0; i <= hs; ++i) { 1098 | re[i] = m_fpacked[i][0]; 1099 | } 1100 | if (im) { 1101 | for (int i = 0; i <= hs; ++i) { 1102 | im[i] = m_fpacked[i][1]; 1103 | } 1104 | } 1105 | } 1106 | 1107 | void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) { 1108 | const int hs = m_size/2; 1109 | for (int i = 0; i <= hs; ++i) { 1110 | re[i] = m_dpacked[i][0]; 1111 | } 1112 | if (im) { 1113 | for (int i = 0; i <= hs; ++i) { 1114 | im[i] = m_dpacked[i][1]; 1115 | } 1116 | } 1117 | } 1118 | 1119 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 1120 | if (!m_dplanf) initDouble(); 1121 | const int sz = m_size; 1122 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1123 | #ifndef FFTW_SINGLE_ONLY 1124 | if (realIn != dbuf) 1125 | #endif 1126 | for (int i = 0; i < sz; ++i) { 1127 | dbuf[i] = realIn[i]; 1128 | } 1129 | fftw_execute(m_dplanf); 1130 | unpackDouble(realOut, imagOut); 1131 | } 1132 | 1133 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 1134 | if (!m_dplanf) initDouble(); 1135 | const int sz = m_size; 1136 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1137 | #ifndef FFTW_SINGLE_ONLY 1138 | if (realIn != dbuf) 1139 | #endif 1140 | for (int i = 0; i < sz; ++i) { 1141 | dbuf[i] = realIn[i]; 1142 | } 1143 | fftw_execute(m_dplanf); 1144 | v_convert(complexOut, (const fft_double_type *)m_dpacked, sz + 2); 1145 | } 1146 | 1147 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 1148 | if (!m_dplanf) initDouble(); 1149 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1150 | const int sz = m_size; 1151 | #ifndef FFTW_SINGLE_ONLY 1152 | if (realIn != dbuf) 1153 | #endif 1154 | for (int i = 0; i < sz; ++i) { 1155 | dbuf[i] = realIn[i]; 1156 | } 1157 | fftw_execute(m_dplanf); 1158 | v_cartesian_interleaved_to_polar 1159 | (magOut, phaseOut, (const fft_double_type *)m_dpacked, m_size/2+1); 1160 | } 1161 | 1162 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 1163 | if (!m_dplanf) initDouble(); 1164 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1165 | const int sz = m_size; 1166 | #ifndef FFTW_SINGLE_ONLY 1167 | if (realIn != m_dbuf) 1168 | #endif 1169 | for (int i = 0; i < sz; ++i) { 1170 | dbuf[i] = realIn[i]; 1171 | } 1172 | fftw_execute(m_dplanf); 1173 | v_cartesian_interleaved_to_magnitudes 1174 | (magOut, (const fft_double_type *)m_dpacked, m_size/2+1); 1175 | } 1176 | 1177 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 1178 | if (!m_fplanf) initFloat(); 1179 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1180 | const int sz = m_size; 1181 | #ifndef FFTW_DOUBLE_ONLY 1182 | if (realIn != fbuf) 1183 | #endif 1184 | for (int i = 0; i < sz; ++i) { 1185 | fbuf[i] = realIn[i]; 1186 | } 1187 | fftwf_execute(m_fplanf); 1188 | unpackFloat(realOut, imagOut); 1189 | } 1190 | 1191 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 1192 | if (!m_fplanf) initFloat(); 1193 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1194 | const int sz = m_size; 1195 | #ifndef FFTW_DOUBLE_ONLY 1196 | if (realIn != fbuf) 1197 | #endif 1198 | for (int i = 0; i < sz; ++i) { 1199 | fbuf[i] = realIn[i]; 1200 | } 1201 | fftwf_execute(m_fplanf); 1202 | v_convert(complexOut, (const fft_float_type *)m_fpacked, sz + 2); 1203 | } 1204 | 1205 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 1206 | if (!m_fplanf) initFloat(); 1207 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1208 | const int sz = m_size; 1209 | #ifndef FFTW_DOUBLE_ONLY 1210 | if (realIn != fbuf) 1211 | #endif 1212 | for (int i = 0; i < sz; ++i) { 1213 | fbuf[i] = realIn[i]; 1214 | } 1215 | fftwf_execute(m_fplanf); 1216 | v_cartesian_interleaved_to_polar 1217 | (magOut, phaseOut, (const fft_float_type *)m_fpacked, m_size/2+1); 1218 | } 1219 | 1220 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 1221 | if (!m_fplanf) initFloat(); 1222 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1223 | const int sz = m_size; 1224 | #ifndef FFTW_DOUBLE_ONLY 1225 | if (realIn != fbuf) 1226 | #endif 1227 | for (int i = 0; i < sz; ++i) { 1228 | fbuf[i] = realIn[i]; 1229 | } 1230 | fftwf_execute(m_fplanf); 1231 | v_cartesian_interleaved_to_magnitudes 1232 | (magOut, (const fft_float_type *)m_fpacked, m_size/2+1); 1233 | } 1234 | 1235 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 1236 | if (!m_dplanf) initDouble(); 1237 | packDouble(realIn, imagIn); 1238 | fftw_execute(m_dplani); 1239 | const int sz = m_size; 1240 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1241 | #ifndef FFTW_SINGLE_ONLY 1242 | if (realOut != dbuf) 1243 | #endif 1244 | for (int i = 0; i < sz; ++i) { 1245 | realOut[i] = dbuf[i]; 1246 | } 1247 | } 1248 | 1249 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 1250 | if (!m_dplanf) initDouble(); 1251 | v_convert((fft_double_type *)m_dpacked, complexIn, m_size + 2); 1252 | fftw_execute(m_dplani); 1253 | const int sz = m_size; 1254 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1255 | #ifndef FFTW_SINGLE_ONLY 1256 | if (realOut != dbuf) 1257 | #endif 1258 | for (int i = 0; i < sz; ++i) { 1259 | realOut[i] = dbuf[i]; 1260 | } 1261 | } 1262 | 1263 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 1264 | if (!m_dplanf) initDouble(); 1265 | v_polar_to_cartesian_interleaved 1266 | ((fft_double_type *)m_dpacked, magIn, phaseIn, m_size/2+1); 1267 | fftw_execute(m_dplani); 1268 | const int sz = m_size; 1269 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1270 | #ifndef FFTW_SINGLE_ONLY 1271 | if (realOut != dbuf) 1272 | #endif 1273 | for (int i = 0; i < sz; ++i) { 1274 | realOut[i] = dbuf[i]; 1275 | } 1276 | } 1277 | 1278 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 1279 | if (!m_dplanf) initDouble(); 1280 | fft_double_type *const BQ_R__ dbuf = m_dbuf; 1281 | fftw_complex *const BQ_R__ dpacked = m_dpacked; 1282 | const int hs = m_size/2; 1283 | for (int i = 0; i <= hs; ++i) { 1284 | dpacked[i][0] = log(magIn[i] + 0.000001); 1285 | } 1286 | for (int i = 0; i <= hs; ++i) { 1287 | dpacked[i][1] = 0.0; 1288 | } 1289 | fftw_execute(m_dplani); 1290 | const int sz = m_size; 1291 | #ifndef FFTW_SINGLE_ONLY 1292 | if (cepOut != dbuf) 1293 | #endif 1294 | for (int i = 0; i < sz; ++i) { 1295 | cepOut[i] = dbuf[i]; 1296 | } 1297 | } 1298 | 1299 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 1300 | if (!m_fplanf) initFloat(); 1301 | packFloat(realIn, imagIn); 1302 | fftwf_execute(m_fplani); 1303 | const int sz = m_size; 1304 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1305 | #ifndef FFTW_DOUBLE_ONLY 1306 | if (realOut != fbuf) 1307 | #endif 1308 | for (int i = 0; i < sz; ++i) { 1309 | realOut[i] = fbuf[i]; 1310 | } 1311 | } 1312 | 1313 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 1314 | if (!m_fplanf) initFloat(); 1315 | v_convert((fft_float_type *)m_fpacked, complexIn, m_size + 2); 1316 | fftwf_execute(m_fplani); 1317 | const int sz = m_size; 1318 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1319 | #ifndef FFTW_DOUBLE_ONLY 1320 | if (realOut != fbuf) 1321 | #endif 1322 | for (int i = 0; i < sz; ++i) { 1323 | realOut[i] = fbuf[i]; 1324 | } 1325 | } 1326 | 1327 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 1328 | if (!m_fplanf) initFloat(); 1329 | v_polar_to_cartesian_interleaved 1330 | ((fft_float_type *)m_fpacked, magIn, phaseIn, m_size/2+1); 1331 | fftwf_execute(m_fplani); 1332 | const int sz = m_size; 1333 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1334 | #ifndef FFTW_DOUBLE_ONLY 1335 | if (realOut != fbuf) 1336 | #endif 1337 | for (int i = 0; i < sz; ++i) { 1338 | realOut[i] = fbuf[i]; 1339 | } 1340 | } 1341 | 1342 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 1343 | if (!m_fplanf) initFloat(); 1344 | const int hs = m_size/2; 1345 | fftwf_complex *const BQ_R__ fpacked = m_fpacked; 1346 | for (int i = 0; i <= hs; ++i) { 1347 | fpacked[i][0] = logf(magIn[i] + 0.000001f); 1348 | } 1349 | for (int i = 0; i <= hs; ++i) { 1350 | fpacked[i][1] = 0.f; 1351 | } 1352 | fftwf_execute(m_fplani); 1353 | const int sz = m_size; 1354 | fft_float_type *const BQ_R__ fbuf = m_fbuf; 1355 | #ifndef FFTW_DOUBLE_ONLY 1356 | if (cepOut != fbuf) 1357 | #endif 1358 | for (int i = 0; i < sz; ++i) { 1359 | cepOut[i] = fbuf[i]; 1360 | } 1361 | } 1362 | 1363 | private: 1364 | fftwf_plan m_fplanf; 1365 | fftwf_plan m_fplani; 1366 | #ifdef FFTW_DOUBLE_ONLY 1367 | double *m_fbuf; 1368 | #else 1369 | float *m_fbuf; 1370 | #endif 1371 | fftwf_complex *m_fpacked; 1372 | fftw_plan m_dplanf; 1373 | fftw_plan m_dplani; 1374 | #ifdef FFTW_SINGLE_ONLY 1375 | float *m_dbuf; 1376 | #else 1377 | double *m_dbuf; 1378 | #endif 1379 | fftw_complex *m_dpacked; 1380 | const int m_size; 1381 | static int m_extantf; 1382 | static int m_extantd; 1383 | #ifdef NO_THREADING 1384 | void lock() {} 1385 | void unlock() {} 1386 | #else 1387 | #ifdef _WIN32 1388 | static HANDLE m_commonMutex; 1389 | void lock() { WaitForSingleObject(m_commonMutex, INFINITE); } 1390 | void unlock() { ReleaseMutex(m_commonMutex); } 1391 | #else 1392 | static pthread_mutex_t m_commonMutex; 1393 | static bool m_haveMutex; 1394 | void lock() { pthread_mutex_lock(&m_commonMutex); } 1395 | void unlock() { pthread_mutex_unlock(&m_commonMutex); } 1396 | #endif 1397 | #endif 1398 | }; 1399 | 1400 | int 1401 | D_FFTW::m_extantf = 0; 1402 | 1403 | int 1404 | D_FFTW::m_extantd = 0; 1405 | 1406 | #ifndef NO_THREADING 1407 | #ifdef _WIN32 1408 | HANDLE D_FFTW::m_commonMutex = CreateMutex(NULL, FALSE, NULL); 1409 | #else 1410 | pthread_mutex_t D_FFTW::m_commonMutex = PTHREAD_MUTEX_INITIALIZER; 1411 | #endif 1412 | #endif 1413 | 1414 | #undef fft_float_type 1415 | #undef fft_double_type 1416 | 1417 | #ifdef FFTW_DOUBLE_ONLY 1418 | #undef fftwf_complex 1419 | #undef fftwf_plan 1420 | #undef fftwf_plan_dft_r2c_1d 1421 | #undef fftwf_plan_dft_c2r_1d 1422 | #undef fftwf_destroy_plan 1423 | #undef fftwf_malloc 1424 | #undef fftwf_free 1425 | #undef fftwf_execute 1426 | #undef atan2f 1427 | #undef sqrtf 1428 | #undef cosf 1429 | #undef sinf 1430 | #endif /* FFTW_DOUBLE_ONLY */ 1431 | 1432 | #ifdef FFTW_SINGLE_ONLY 1433 | #undef fftw_complex 1434 | #undef fftw_plan 1435 | #undef fftw_plan_dft_r2c_1d 1436 | #undef fftw_plan_dft_c2r_1d 1437 | #undef fftw_destroy_plan 1438 | #undef fftw_malloc 1439 | #undef fftw_free 1440 | #undef fftw_execute 1441 | #undef atan2 1442 | #undef sqrt 1443 | #undef cos 1444 | #undef sin 1445 | #endif /* FFTW_SINGLE_ONLY */ 1446 | 1447 | #endif /* HAVE_FFTW3 */ 1448 | 1449 | #ifdef HAVE_SLEEF 1450 | 1451 | class D_SLEEF : public FFTImpl 1452 | { 1453 | bool isAligned(const void *ptr) { 1454 | return ! ((uintptr_t)ptr & 63); 1455 | } 1456 | 1457 | public: 1458 | D_SLEEF(int size) : 1459 | m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0), 1460 | m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0), 1461 | m_size(size) 1462 | { 1463 | } 1464 | 1465 | ~D_SLEEF() { 1466 | if (m_fplanf) { 1467 | SleefDFT_dispose(m_fplanf); 1468 | SleefDFT_dispose(m_fplani); 1469 | Sleef_free(m_fbuf); 1470 | Sleef_free(m_fpacked); 1471 | } 1472 | if (m_dplanf) { 1473 | SleefDFT_dispose(m_dplanf); 1474 | SleefDFT_dispose(m_dplani); 1475 | Sleef_free(m_dbuf); 1476 | Sleef_free(m_dpacked); 1477 | } 1478 | } 1479 | 1480 | int getSize() const { 1481 | return m_size; 1482 | } 1483 | 1484 | FFT::Precisions 1485 | getSupportedPrecisions() const { 1486 | return FFT::SinglePrecision | FFT::DoublePrecision; 1487 | } 1488 | 1489 | void initFloat() { 1490 | if (m_fplanf) return; 1491 | 1492 | m_fbuf = static_cast 1493 | (Sleef_malloc(m_size * sizeof(float))); 1494 | m_fpacked = static_cast 1495 | (Sleef_malloc((m_size + 2) * sizeof(float))); 1496 | 1497 | m_fplanf = SleefDFT_float_init1d 1498 | (m_size, m_fbuf, m_fpacked, 1499 | SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); 1500 | 1501 | m_fplani = SleefDFT_float_init1d 1502 | (m_size, m_fpacked, m_fbuf, 1503 | SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); 1504 | } 1505 | 1506 | void initDouble() { 1507 | if (m_dplanf) return; 1508 | 1509 | m_dbuf = static_cast 1510 | (Sleef_malloc(m_size * sizeof(double))); 1511 | m_dpacked = static_cast 1512 | (Sleef_malloc((m_size + 2) * sizeof(double))); 1513 | 1514 | m_dplanf = SleefDFT_double_init1d 1515 | (m_size, m_dbuf, m_dpacked, 1516 | SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); 1517 | 1518 | m_dplani = SleefDFT_double_init1d 1519 | (m_size, m_dpacked, m_dbuf, 1520 | SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); 1521 | } 1522 | 1523 | void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { 1524 | const float *src[2] = { re, im }; 1525 | v_interleave(m_fpacked, src, 2, m_size/2 + 1); 1526 | } 1527 | 1528 | void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { 1529 | const double *src[2] = { re, im }; 1530 | v_interleave(m_dpacked, src, 2, m_size/2 + 1); 1531 | } 1532 | 1533 | void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) { 1534 | float *dst[2] = { re, im }; 1535 | v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1); 1536 | } 1537 | 1538 | void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) { 1539 | double *dst[2] = { re, im }; 1540 | v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1); 1541 | } 1542 | 1543 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 1544 | if (!m_dplanf) initDouble(); 1545 | if (isAligned(realIn)) { 1546 | SleefDFT_double_execute(m_dplanf, realIn, 0); 1547 | } else { 1548 | v_copy(m_dbuf, realIn, m_size); 1549 | SleefDFT_double_execute(m_dplanf, 0, 0); 1550 | } 1551 | unpackDouble(realOut, imagOut); 1552 | } 1553 | 1554 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 1555 | if (!m_dplanf) initDouble(); 1556 | if (isAligned(realIn) && isAligned(complexOut)) { 1557 | SleefDFT_double_execute(m_dplanf, realIn, complexOut); 1558 | } else { 1559 | v_copy(m_dbuf, realIn, m_size); 1560 | SleefDFT_double_execute(m_dplanf, 0, 0); 1561 | v_copy(complexOut, m_dpacked, m_size + 2); 1562 | } 1563 | } 1564 | 1565 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 1566 | if (!m_dplanf) initDouble(); 1567 | if (isAligned(realIn)) { 1568 | SleefDFT_double_execute(m_dplanf, realIn, 0); 1569 | } else { 1570 | v_copy(m_dbuf, realIn, m_size); 1571 | SleefDFT_double_execute(m_dplanf, 0, 0); 1572 | } 1573 | v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1); 1574 | } 1575 | 1576 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 1577 | if (!m_dplanf) initDouble(); 1578 | if (isAligned(realIn)) { 1579 | SleefDFT_double_execute(m_dplanf, realIn, 0); 1580 | } else { 1581 | v_copy(m_dbuf, realIn, m_size); 1582 | SleefDFT_double_execute(m_dplanf, 0, 0); 1583 | } 1584 | v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1); 1585 | } 1586 | 1587 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 1588 | if (!m_fplanf) initFloat(); 1589 | if (isAligned(realIn)) { 1590 | SleefDFT_float_execute(m_fplanf, realIn, 0); 1591 | } else { 1592 | v_copy(m_fbuf, realIn, m_size); 1593 | SleefDFT_float_execute(m_fplanf, 0, 0); 1594 | } 1595 | unpackFloat(realOut, imagOut); 1596 | } 1597 | 1598 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 1599 | if (!m_fplanf) initFloat(); 1600 | if (isAligned(realIn) && isAligned(complexOut)) { 1601 | SleefDFT_float_execute(m_fplanf, realIn, complexOut); 1602 | } else { 1603 | v_copy(m_fbuf, realIn, m_size); 1604 | SleefDFT_float_execute(m_fplanf, 0, 0); 1605 | v_copy(complexOut, m_fpacked, m_size + 2); 1606 | } 1607 | } 1608 | 1609 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 1610 | if (!m_fplanf) initFloat(); 1611 | if (isAligned(realIn)) { 1612 | SleefDFT_float_execute(m_fplanf, realIn, 0); 1613 | } else { 1614 | v_copy(m_fbuf, realIn, m_size); 1615 | SleefDFT_float_execute(m_fplanf, 0, 0); 1616 | } 1617 | v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1); 1618 | } 1619 | 1620 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 1621 | if (!m_fplanf) initFloat(); 1622 | if (isAligned(realIn)) { 1623 | SleefDFT_float_execute(m_fplanf, realIn, 0); 1624 | } else { 1625 | v_copy(m_fbuf, realIn, m_size); 1626 | SleefDFT_float_execute(m_fplanf, 0, 0); 1627 | } 1628 | v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1); 1629 | } 1630 | 1631 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 1632 | if (!m_dplanf) initDouble(); 1633 | packDouble(realIn, imagIn); 1634 | if (isAligned(realOut)) { 1635 | SleefDFT_double_execute(m_dplani, 0, realOut); 1636 | } else { 1637 | SleefDFT_double_execute(m_dplani, 0, 0); 1638 | v_copy(realOut, m_dbuf, m_size); 1639 | } 1640 | } 1641 | 1642 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 1643 | if (!m_dplanf) initDouble(); 1644 | if (isAligned(complexIn) && isAligned(realOut)) { 1645 | SleefDFT_double_execute(m_dplani, complexIn, realOut); 1646 | } else { 1647 | v_copy(m_dpacked, complexIn, m_size + 2); 1648 | SleefDFT_double_execute(m_dplani, 0, 0); 1649 | v_copy(realOut, m_dbuf, m_size); 1650 | } 1651 | } 1652 | 1653 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 1654 | if (!m_dplanf) initDouble(); 1655 | v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1); 1656 | if (isAligned(realOut)) { 1657 | SleefDFT_double_execute(m_dplani, 0, realOut); 1658 | } else { 1659 | SleefDFT_double_execute(m_dplani, 0, 0); 1660 | v_copy(realOut, m_dbuf, m_size); 1661 | } 1662 | } 1663 | 1664 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 1665 | if (!m_dplanf) initDouble(); 1666 | const int hs = m_size/2; 1667 | for (int i = 0; i <= hs; ++i) { 1668 | m_dpacked[i*2] = log(magIn[i] + 0.000001); 1669 | m_dpacked[i*2+1] = 0.0; 1670 | } 1671 | if (isAligned(cepOut)) { 1672 | SleefDFT_double_execute(m_dplani, 0, cepOut); 1673 | } else { 1674 | SleefDFT_double_execute(m_dplani, 0, 0); 1675 | v_copy(cepOut, m_dbuf, m_size); 1676 | } 1677 | } 1678 | 1679 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 1680 | if (!m_fplanf) initFloat(); 1681 | packFloat(realIn, imagIn); 1682 | if (isAligned(realOut)) { 1683 | SleefDFT_float_execute(m_dplani, 0, realOut); 1684 | } else { 1685 | SleefDFT_float_execute(m_fplani, 0, 0); 1686 | v_copy(realOut, m_fbuf, m_size); 1687 | } 1688 | } 1689 | 1690 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 1691 | if (!m_fplanf) initFloat(); 1692 | if (isAligned(complexIn) && isAligned(realOut)) { 1693 | SleefDFT_float_execute(m_fplani, complexIn, realOut); 1694 | } else { 1695 | v_copy(m_fpacked, complexIn, m_size + 2); 1696 | SleefDFT_float_execute(m_fplani, 0, 0); 1697 | v_copy(realOut, m_fbuf, m_size); 1698 | } 1699 | } 1700 | 1701 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 1702 | if (!m_fplanf) initFloat(); 1703 | v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1); 1704 | if (isAligned(realOut)) { 1705 | SleefDFT_float_execute(m_fplani, 0, realOut); 1706 | } else { 1707 | SleefDFT_float_execute(m_fplani, 0, 0); 1708 | v_copy(realOut, m_fbuf, m_size); 1709 | } 1710 | } 1711 | 1712 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 1713 | if (!m_fplanf) initFloat(); 1714 | const int hs = m_size/2; 1715 | for (int i = 0; i <= hs; ++i) { 1716 | m_fpacked[i*2] = logf(magIn[i] + 0.000001f); 1717 | m_fpacked[i*2+1] = 0.0; 1718 | } 1719 | if (isAligned(cepOut)) { 1720 | SleefDFT_float_execute(m_fplani, 0, cepOut); 1721 | } else { 1722 | SleefDFT_float_execute(m_fplani, 0, 0); 1723 | v_copy(cepOut, m_fbuf, m_size); 1724 | } 1725 | } 1726 | 1727 | private: 1728 | SleefDFT *m_fplanf; 1729 | SleefDFT *m_fplani; 1730 | 1731 | float *m_fbuf; 1732 | float *m_fpacked; 1733 | 1734 | SleefDFT *m_dplanf; 1735 | SleefDFT *m_dplani; 1736 | 1737 | double *m_dbuf; 1738 | double *m_dpacked; 1739 | 1740 | const int m_size; 1741 | }; 1742 | 1743 | #endif /* HAVE_SLEEF */ 1744 | 1745 | #ifdef HAVE_KISSFFT 1746 | 1747 | class D_KISSFFT : public FFTImpl 1748 | { 1749 | public: 1750 | D_KISSFFT(int size) : 1751 | m_size(size), 1752 | m_fplanf(0), 1753 | m_fplani(0) 1754 | { 1755 | #ifdef FIXED_POINT 1756 | #error KISSFFT is not configured for float values 1757 | #endif 1758 | if (sizeof(kiss_fft_scalar) != sizeof(float)) { 1759 | std::cerr << "ERROR: KISSFFT is not configured for float values" 1760 | << std::endl; 1761 | } 1762 | 1763 | m_fbuf = new kiss_fft_scalar[m_size + 2]; 1764 | m_fpacked = new kiss_fft_cpx[m_size + 2]; 1765 | m_fplanf = kiss_fftr_alloc(m_size, 0, NULL, NULL); 1766 | m_fplani = kiss_fftr_alloc(m_size, 1, NULL, NULL); 1767 | } 1768 | 1769 | ~D_KISSFFT() { 1770 | kiss_fftr_free(m_fplanf); 1771 | kiss_fftr_free(m_fplani); 1772 | 1773 | delete[] m_fbuf; 1774 | delete[] m_fpacked; 1775 | } 1776 | 1777 | int getSize() const { 1778 | return m_size; 1779 | } 1780 | 1781 | FFT::Precisions 1782 | getSupportedPrecisions() const { 1783 | return FFT::SinglePrecision; 1784 | } 1785 | 1786 | void initFloat() { } 1787 | void initDouble() { } 1788 | 1789 | void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { 1790 | const int hs = m_size/2; 1791 | for (int i = 0; i <= hs; ++i) { 1792 | m_fpacked[i].r = re[i]; 1793 | } 1794 | if (im) { 1795 | for (int i = 0; i <= hs; ++i) { 1796 | m_fpacked[i].i = im[i]; 1797 | } 1798 | } else { 1799 | for (int i = 0; i <= hs; ++i) { 1800 | m_fpacked[i].i = 0.f; 1801 | } 1802 | } 1803 | } 1804 | 1805 | void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) { 1806 | const int hs = m_size/2; 1807 | for (int i = 0; i <= hs; ++i) { 1808 | re[i] = m_fpacked[i].r; 1809 | } 1810 | if (im) { 1811 | for (int i = 0; i <= hs; ++i) { 1812 | im[i] = m_fpacked[i].i; 1813 | } 1814 | } 1815 | } 1816 | 1817 | void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { 1818 | const int hs = m_size/2; 1819 | for (int i = 0; i <= hs; ++i) { 1820 | m_fpacked[i].r = float(re[i]); 1821 | } 1822 | if (im) { 1823 | for (int i = 0; i <= hs; ++i) { 1824 | m_fpacked[i].i = float(im[i]); 1825 | } 1826 | } else { 1827 | for (int i = 0; i <= hs; ++i) { 1828 | m_fpacked[i].i = 0.f; 1829 | } 1830 | } 1831 | } 1832 | 1833 | void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) { 1834 | const int hs = m_size/2; 1835 | for (int i = 0; i <= hs; ++i) { 1836 | re[i] = double(m_fpacked[i].r); 1837 | } 1838 | if (im) { 1839 | for (int i = 0; i <= hs; ++i) { 1840 | im[i] = double(m_fpacked[i].i); 1841 | } 1842 | } 1843 | } 1844 | 1845 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 1846 | v_convert(m_fbuf, realIn, m_size); 1847 | kiss_fftr(m_fplanf, m_fbuf, m_fpacked); 1848 | unpackDouble(realOut, imagOut); 1849 | } 1850 | 1851 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 1852 | v_convert(m_fbuf, realIn, m_size); 1853 | kiss_fftr(m_fplanf, m_fbuf, m_fpacked); 1854 | v_convert(complexOut, (float *)m_fpacked, m_size + 2); 1855 | } 1856 | 1857 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 1858 | v_convert(m_fbuf, realIn, m_size); 1859 | kiss_fftr(m_fplanf, m_fbuf, m_fpacked); 1860 | v_cartesian_interleaved_to_polar 1861 | (magOut, phaseOut, (float *)m_fpacked, m_size/2+1); 1862 | } 1863 | 1864 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 1865 | v_convert(m_fbuf, realIn, m_size); 1866 | kiss_fftr(m_fplanf, m_fbuf, m_fpacked); 1867 | v_cartesian_interleaved_to_magnitudes 1868 | (magOut, (float *)m_fpacked, m_size/2+1); 1869 | } 1870 | 1871 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 1872 | kiss_fftr(m_fplanf, realIn, m_fpacked); 1873 | unpackFloat(realOut, imagOut); 1874 | } 1875 | 1876 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 1877 | kiss_fftr(m_fplanf, realIn, (kiss_fft_cpx *)complexOut); 1878 | } 1879 | 1880 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 1881 | kiss_fftr(m_fplanf, realIn, m_fpacked); 1882 | v_cartesian_interleaved_to_polar 1883 | (magOut, phaseOut, (float *)m_fpacked, m_size/2+1); 1884 | } 1885 | 1886 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 1887 | kiss_fftr(m_fplanf, realIn, m_fpacked); 1888 | v_cartesian_interleaved_to_magnitudes 1889 | (magOut, (float *)m_fpacked, m_size/2+1); 1890 | } 1891 | 1892 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 1893 | packDouble(realIn, imagIn); 1894 | kiss_fftri(m_fplani, m_fpacked, m_fbuf); 1895 | v_convert(realOut, m_fbuf, m_size); 1896 | } 1897 | 1898 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 1899 | v_convert((float *)m_fpacked, complexIn, m_size + 2); 1900 | kiss_fftri(m_fplani, m_fpacked, m_fbuf); 1901 | v_convert(realOut, m_fbuf, m_size); 1902 | } 1903 | 1904 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 1905 | v_polar_to_cartesian_interleaved 1906 | ((float *)m_fpacked, magIn, phaseIn, m_size/2+1); 1907 | kiss_fftri(m_fplani, m_fpacked, m_fbuf); 1908 | v_convert(realOut, m_fbuf, m_size); 1909 | } 1910 | 1911 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 1912 | const int hs = m_size/2; 1913 | for (int i = 0; i <= hs; ++i) { 1914 | m_fpacked[i].r = float(log(magIn[i] + 0.000001)); 1915 | m_fpacked[i].i = 0.0f; 1916 | } 1917 | kiss_fftri(m_fplani, m_fpacked, m_fbuf); 1918 | v_convert(cepOut, m_fbuf, m_size); 1919 | } 1920 | 1921 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 1922 | packFloat(realIn, imagIn); 1923 | kiss_fftri(m_fplani, m_fpacked, realOut); 1924 | } 1925 | 1926 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 1927 | v_copy((float *)m_fpacked, complexIn, m_size + 2); 1928 | kiss_fftri(m_fplani, m_fpacked, realOut); 1929 | } 1930 | 1931 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 1932 | v_polar_to_cartesian_interleaved 1933 | ((float *)m_fpacked, magIn, phaseIn, m_size/2+1); 1934 | kiss_fftri(m_fplani, m_fpacked, realOut); 1935 | } 1936 | 1937 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 1938 | const int hs = m_size/2; 1939 | for (int i = 0; i <= hs; ++i) { 1940 | m_fpacked[i].r = logf(magIn[i] + 0.000001f); 1941 | m_fpacked[i].i = 0.0f; 1942 | } 1943 | kiss_fftri(m_fplani, m_fpacked, cepOut); 1944 | } 1945 | 1946 | private: 1947 | const int m_size; 1948 | kiss_fftr_cfg m_fplanf; 1949 | kiss_fftr_cfg m_fplani; 1950 | kiss_fft_scalar *m_fbuf; 1951 | kiss_fft_cpx *m_fpacked; 1952 | }; 1953 | 1954 | #endif /* HAVE_KISSFFT */ 1955 | 1956 | #ifdef USE_BUILTIN_FFT 1957 | 1958 | class D_Builtin : public FFTImpl 1959 | { 1960 | public: 1961 | D_Builtin(int size) : 1962 | m_size(size), 1963 | m_half(size/2), 1964 | m_blockTableSize(16), 1965 | m_maxTabledBlock(1 << m_blockTableSize) 1966 | { 1967 | m_table = allocate_and_zero(m_half); 1968 | m_sincos = allocate_and_zero(m_blockTableSize * 4); 1969 | m_sincos_r = allocate_and_zero(m_half); 1970 | m_vr = allocate_and_zero(m_half); 1971 | m_vi = allocate_and_zero(m_half); 1972 | m_a = allocate_and_zero(m_half + 1); 1973 | m_b = allocate_and_zero(m_half + 1); 1974 | m_c = allocate_and_zero(m_half + 1); 1975 | m_d = allocate_and_zero(m_half + 1); 1976 | m_a_and_b[0] = m_a; 1977 | m_a_and_b[1] = m_b; 1978 | m_c_and_d[0] = m_c; 1979 | m_c_and_d[1] = m_d; 1980 | makeTables(); 1981 | } 1982 | 1983 | ~D_Builtin() { 1984 | deallocate(m_table); 1985 | deallocate(m_sincos); 1986 | deallocate(m_sincos_r); 1987 | deallocate(m_vr); 1988 | deallocate(m_vi); 1989 | deallocate(m_a); 1990 | deallocate(m_b); 1991 | deallocate(m_c); 1992 | deallocate(m_d); 1993 | } 1994 | 1995 | int getSize() const { 1996 | return m_size; 1997 | } 1998 | 1999 | FFT::Precisions 2000 | getSupportedPrecisions() const { 2001 | return FFT::DoublePrecision; 2002 | } 2003 | 2004 | void initFloat() { } 2005 | void initDouble() { } 2006 | 2007 | void forward(const double *BQ_R__ realIn, 2008 | double *BQ_R__ realOut, double *BQ_R__ imagOut) { 2009 | transformF(realIn, realOut, imagOut); 2010 | } 2011 | 2012 | void forwardInterleaved(const double *BQ_R__ realIn, 2013 | double *BQ_R__ complexOut) { 2014 | transformF(realIn, m_c, m_d); 2015 | v_interleave(complexOut, m_c_and_d, 2, m_half + 1); 2016 | } 2017 | 2018 | void forwardPolar(const double *BQ_R__ realIn, 2019 | double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 2020 | transformF(realIn, m_c, m_d); 2021 | v_cartesian_to_polar(magOut, phaseOut, m_c, m_d, m_half + 1); 2022 | } 2023 | 2024 | void forwardMagnitude(const double *BQ_R__ realIn, 2025 | double *BQ_R__ magOut) { 2026 | transformF(realIn, m_c, m_d); 2027 | v_cartesian_to_magnitudes(magOut, m_c, m_d, m_half + 1); 2028 | } 2029 | 2030 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, 2031 | float *BQ_R__ imagOut) { 2032 | transformF(realIn, m_c, m_d); 2033 | v_convert(realOut, m_c, m_half + 1); 2034 | v_convert(imagOut, m_d, m_half + 1); 2035 | } 2036 | 2037 | void forwardInterleaved(const float *BQ_R__ realIn, 2038 | float *BQ_R__ complexOut) { 2039 | transformF(realIn, m_c, m_d); 2040 | for (int i = 0; i <= m_half; ++i) complexOut[i*2] = m_c[i]; 2041 | for (int i = 0; i <= m_half; ++i) complexOut[i*2+1] = m_d[i]; 2042 | } 2043 | 2044 | void forwardPolar(const float *BQ_R__ realIn, 2045 | float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 2046 | transformF(realIn, m_c, m_d); 2047 | v_cartesian_to_polar(magOut, phaseOut, m_c, m_d, m_half + 1); 2048 | } 2049 | 2050 | void forwardMagnitude(const float *BQ_R__ realIn, 2051 | float *BQ_R__ magOut) { 2052 | transformF(realIn, m_c, m_d); 2053 | v_cartesian_to_magnitudes(magOut, m_c, m_d, m_half + 1); 2054 | } 2055 | 2056 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, 2057 | double *BQ_R__ realOut) { 2058 | transformI(realIn, imagIn, realOut); 2059 | } 2060 | 2061 | void inverseInterleaved(const double *BQ_R__ complexIn, 2062 | double *BQ_R__ realOut) { 2063 | v_deinterleave(m_a_and_b, complexIn, 2, m_half + 1); 2064 | transformI(m_a, m_b, realOut); 2065 | } 2066 | 2067 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, 2068 | double *BQ_R__ realOut) { 2069 | v_polar_to_cartesian(m_a, m_b, magIn, phaseIn, m_half + 1); 2070 | transformI(m_a, m_b, realOut); 2071 | } 2072 | 2073 | void inverseCepstral(const double *BQ_R__ magIn, 2074 | double *BQ_R__ cepOut) { 2075 | for (int i = 0; i <= m_half; ++i) { 2076 | double real = log(magIn[i] + 0.000001); 2077 | m_a[i] = real; 2078 | m_b[i] = 0.0; 2079 | } 2080 | transformI(m_a, m_b, cepOut); 2081 | } 2082 | 2083 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, 2084 | float *BQ_R__ realOut) { 2085 | v_convert(m_a, realIn, m_half + 1); 2086 | v_convert(m_b, imagIn, m_half + 1); 2087 | transformI(m_a, m_b, realOut); 2088 | } 2089 | 2090 | void inverseInterleaved(const float *BQ_R__ complexIn, 2091 | float *BQ_R__ realOut) { 2092 | for (int i = 0; i <= m_half; ++i) m_a[i] = complexIn[i*2]; 2093 | for (int i = 0; i <= m_half; ++i) m_b[i] = complexIn[i*2+1]; 2094 | transformI(m_a, m_b, realOut); 2095 | } 2096 | 2097 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, 2098 | float *BQ_R__ realOut) { 2099 | v_polar_to_cartesian(m_a, m_b, magIn, phaseIn, m_half + 1); 2100 | transformI(m_a, m_b, realOut); 2101 | } 2102 | 2103 | void inverseCepstral(const float *BQ_R__ magIn, 2104 | float *BQ_R__ cepOut) { 2105 | for (int i = 0; i <= m_half; ++i) { 2106 | float real = logf(magIn[i] + 0.000001); 2107 | m_a[i] = real; 2108 | m_b[i] = 0.0; 2109 | } 2110 | transformI(m_a, m_b, cepOut); 2111 | } 2112 | 2113 | private: 2114 | const int m_size; 2115 | const int m_half; 2116 | const int m_blockTableSize; 2117 | const int m_maxTabledBlock; 2118 | int *m_table; 2119 | double *m_sincos; 2120 | double *m_sincos_r; 2121 | double *m_vr; 2122 | double *m_vi; 2123 | double *m_a; 2124 | double *m_b; 2125 | double *m_c; 2126 | double *m_d; 2127 | double *m_a_and_b[2]; 2128 | double *m_c_and_d[2]; 2129 | 2130 | void makeTables() { 2131 | 2132 | // main table for complex fft - this is of size m_half, 2133 | // because we are at heart a real-complex fft only 2134 | 2135 | int bits; 2136 | int i, j, k, m; 2137 | 2138 | int n = m_half; 2139 | 2140 | for (i = 0; ; ++i) { 2141 | if (n & (1 << i)) { 2142 | bits = i; 2143 | break; 2144 | } 2145 | } 2146 | 2147 | for (i = 0; i < n; ++i) { 2148 | m = i; 2149 | for (j = k = 0; j < bits; ++j) { 2150 | k = (k << 1) | (m & 1); 2151 | m >>= 1; 2152 | } 2153 | m_table[i] = k; 2154 | } 2155 | 2156 | // sin and cos tables for complex fft 2157 | int ix = 0; 2158 | for (i = 2; i <= m_maxTabledBlock; i <<= 1) { 2159 | double phase = 2.0 * M_PI / double(i); 2160 | m_sincos[ix++] = sin(phase); 2161 | m_sincos[ix++] = sin(2.0 * phase); 2162 | m_sincos[ix++] = cos(phase); 2163 | m_sincos[ix++] = cos(2.0 * phase); 2164 | } 2165 | 2166 | // sin and cos tables for real-complex transform 2167 | ix = 0; 2168 | for (i = 0; i < n/2; ++i) { 2169 | double phase = M_PI * (double(i + 1) / double(m_half) + 0.5); 2170 | m_sincos_r[ix++] = sin(phase); 2171 | m_sincos_r[ix++] = cos(phase); 2172 | } 2173 | } 2174 | 2175 | // Uses m_a and m_b internally; does not touch m_c or m_d 2176 | template 2177 | void transformF(const T *BQ_R__ ri, 2178 | double *BQ_R__ ro, double *BQ_R__ io) { 2179 | 2180 | int halfhalf = m_half / 2; 2181 | for (int i = 0; i < m_half; ++i) { 2182 | m_a[i] = ri[i * 2]; 2183 | m_b[i] = ri[i * 2 + 1]; 2184 | } 2185 | transformComplex(m_a, m_b, m_vr, m_vi, false); 2186 | ro[0] = m_vr[0] + m_vi[0]; 2187 | ro[m_half] = m_vr[0] - m_vi[0]; 2188 | io[0] = io[m_half] = 0.0; 2189 | int ix = 0; 2190 | for (int i = 0; i < halfhalf; ++i) { 2191 | double s = -m_sincos_r[ix++]; 2192 | double c = m_sincos_r[ix++]; 2193 | int k = i + 1; 2194 | double r0 = m_vr[k]; 2195 | double i0 = m_vi[k]; 2196 | double r1 = m_vr[m_half - k]; 2197 | double i1 = -m_vi[m_half - k]; 2198 | double tw_r = (r0 - r1) * c - (i0 - i1) * s; 2199 | double tw_i = (r0 - r1) * s + (i0 - i1) * c; 2200 | ro[k] = (r0 + r1 + tw_r) * 0.5; 2201 | ro[m_half - k] = (r0 + r1 - tw_r) * 0.5; 2202 | io[k] = (i0 + i1 + tw_i) * 0.5; 2203 | io[m_half - k] = (tw_i - i0 - i1) * 0.5; 2204 | } 2205 | } 2206 | 2207 | // Uses m_c and m_d internally; does not touch m_a or m_b 2208 | template 2209 | void transformI(const double *BQ_R__ ri, const double *BQ_R__ ii, 2210 | T *BQ_R__ ro) { 2211 | 2212 | int halfhalf = m_half / 2; 2213 | m_vr[0] = ri[0] + ri[m_half]; 2214 | m_vi[0] = ri[0] - ri[m_half]; 2215 | int ix = 0; 2216 | for (int i = 0; i < halfhalf; ++i) { 2217 | double s = m_sincos_r[ix++]; 2218 | double c = m_sincos_r[ix++]; 2219 | int k = i + 1; 2220 | double r0 = ri[k]; 2221 | double r1 = ri[m_half - k]; 2222 | double i0 = ii[k]; 2223 | double i1 = -ii[m_half - k]; 2224 | double tw_r = (r0 - r1) * c - (i0 - i1) * s; 2225 | double tw_i = (r0 - r1) * s + (i0 - i1) * c; 2226 | m_vr[k] = (r0 + r1 + tw_r); 2227 | m_vr[m_half - k] = (r0 + r1 - tw_r); 2228 | m_vi[k] = (i0 + i1 + tw_i); 2229 | m_vi[m_half - k] = (tw_i - i0 - i1); 2230 | } 2231 | transformComplex(m_vr, m_vi, m_c, m_d, true); 2232 | for (int i = 0; i < m_half; ++i) { 2233 | ro[i*2] = m_c[i]; 2234 | ro[i*2+1] = m_d[i]; 2235 | } 2236 | } 2237 | 2238 | void transformComplex(const double *BQ_R__ ri, const double *BQ_R__ ii, 2239 | double *BQ_R__ ro, double *BQ_R__ io, 2240 | bool inverse) { 2241 | 2242 | // Following Don Cross's 1998 implementation, described by its 2243 | // author as public domain. 2244 | 2245 | // Because we are at heart a real-complex fft only, and we know that: 2246 | const int n = m_half; 2247 | 2248 | for (int i = 0; i < n; ++i) { 2249 | int j = m_table[i]; 2250 | ro[j] = ri[i]; 2251 | io[j] = ii[i]; 2252 | } 2253 | 2254 | int ix = 0; 2255 | int blockEnd = 1; 2256 | double ifactor = (inverse ? -1.0 : 1.0); 2257 | 2258 | for (int blockSize = 2; blockSize <= n; blockSize <<= 1) { 2259 | 2260 | double sm1, sm2, cm1, cm2; 2261 | 2262 | if (blockSize <= m_maxTabledBlock) { 2263 | sm1 = ifactor * m_sincos[ix++]; 2264 | sm2 = ifactor * m_sincos[ix++]; 2265 | cm1 = m_sincos[ix++]; 2266 | cm2 = m_sincos[ix++]; 2267 | } else { 2268 | double phase = 2.0 * M_PI / double(blockSize); 2269 | sm1 = ifactor * sin(phase); 2270 | sm2 = ifactor * sin(2.0 * phase); 2271 | cm1 = cos(phase); 2272 | cm2 = cos(2.0 * phase); 2273 | } 2274 | 2275 | double w = 2 * cm1; 2276 | double ar[3], ai[3]; 2277 | 2278 | for (int i = 0; i < n; i += blockSize) { 2279 | 2280 | ar[2] = cm2; 2281 | ar[1] = cm1; 2282 | 2283 | ai[2] = sm2; 2284 | ai[1] = sm1; 2285 | 2286 | int j = i; 2287 | 2288 | for (int m = 0; m < blockEnd; ++m) { 2289 | 2290 | ar[0] = w * ar[1] - ar[2]; 2291 | ar[2] = ar[1]; 2292 | ar[1] = ar[0]; 2293 | 2294 | ai[0] = w * ai[1] - ai[2]; 2295 | ai[2] = ai[1]; 2296 | ai[1] = ai[0]; 2297 | 2298 | int k = j + blockEnd; 2299 | double tr = ar[0] * ro[k] - ai[0] * io[k]; 2300 | double ti = ar[0] * io[k] + ai[0] * ro[k]; 2301 | 2302 | ro[k] = ro[j] - tr; 2303 | io[k] = io[j] - ti; 2304 | 2305 | ro[j] += tr; 2306 | io[j] += ti; 2307 | 2308 | ++j; 2309 | } 2310 | } 2311 | 2312 | blockEnd = blockSize; 2313 | } 2314 | } 2315 | }; 2316 | 2317 | #endif /* USE_BUILTIN_FFT */ 2318 | 2319 | class D_DFT : public FFTImpl 2320 | { 2321 | private: 2322 | template 2323 | class DFT 2324 | { 2325 | public: 2326 | DFT(int size) : m_size(size), m_bins(size/2 + 1) { 2327 | 2328 | m_sin = allocate_channels(m_size, m_size); 2329 | m_cos = allocate_channels(m_size, m_size); 2330 | 2331 | for (int i = 0; i < m_size; ++i) { 2332 | for (int j = 0; j < m_size; ++j) { 2333 | double arg = (double(i) * double(j) * M_PI * 2.0) / m_size; 2334 | m_sin[i][j] = sin(arg); 2335 | m_cos[i][j] = cos(arg); 2336 | } 2337 | } 2338 | 2339 | m_tmp = allocate_channels(2, m_size); 2340 | } 2341 | 2342 | ~DFT() { 2343 | deallocate_channels(m_tmp, 2); 2344 | deallocate_channels(m_sin, m_size); 2345 | deallocate_channels(m_cos, m_size); 2346 | } 2347 | 2348 | void forward(const T *BQ_R__ realIn, T *BQ_R__ realOut, T *BQ_R__ imagOut) { 2349 | for (int i = 0; i < m_bins; ++i) { 2350 | double re = 0.0, im = 0.0; 2351 | for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j]; 2352 | for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j]; 2353 | realOut[i] = T(re); 2354 | imagOut[i] = T(im); 2355 | } 2356 | } 2357 | 2358 | void forwardInterleaved(const T *BQ_R__ realIn, T *BQ_R__ complexOut) { 2359 | for (int i = 0; i < m_bins; ++i) { 2360 | double re = 0.0, im = 0.0; 2361 | for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j]; 2362 | for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j]; 2363 | complexOut[i*2] = T(re); 2364 | complexOut[i*2 + 1] = T(im); 2365 | } 2366 | } 2367 | 2368 | void forwardPolar(const T *BQ_R__ realIn, T *BQ_R__ magOut, T *BQ_R__ phaseOut) { 2369 | forward(realIn, magOut, phaseOut); // temporarily 2370 | for (int i = 0; i < m_bins; ++i) { 2371 | T re = magOut[i], im = phaseOut[i]; 2372 | c_magphase(magOut + i, phaseOut + i, re, im); 2373 | } 2374 | } 2375 | 2376 | void forwardMagnitude(const T *BQ_R__ realIn, T *BQ_R__ magOut) { 2377 | for (int i = 0; i < m_bins; ++i) { 2378 | double re = 0.0, im = 0.0; 2379 | for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j]; 2380 | for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j]; 2381 | magOut[i] = T(sqrt(re * re + im * im)); 2382 | } 2383 | } 2384 | 2385 | void inverse(const T *BQ_R__ realIn, const T *BQ_R__ imagIn, T *BQ_R__ realOut) { 2386 | for (int i = 0; i < m_bins; ++i) { 2387 | m_tmp[0][i] = realIn[i]; 2388 | m_tmp[1][i] = imagIn[i]; 2389 | } 2390 | for (int i = m_bins; i < m_size; ++i) { 2391 | m_tmp[0][i] = realIn[m_size - i]; 2392 | m_tmp[1][i] = -imagIn[m_size - i]; 2393 | } 2394 | for (int i = 0; i < m_size; ++i) { 2395 | double re = 0.0; 2396 | const double *const cos = m_cos[i]; 2397 | const double *const sin = m_sin[i]; 2398 | for (int j = 0; j < m_size; ++j) re += m_tmp[0][j] * cos[j]; 2399 | for (int j = 0; j < m_size; ++j) re -= m_tmp[1][j] * sin[j]; 2400 | realOut[i] = T(re); 2401 | } 2402 | } 2403 | 2404 | void inverseInterleaved(const T *BQ_R__ complexIn, T *BQ_R__ realOut) { 2405 | for (int i = 0; i < m_bins; ++i) { 2406 | m_tmp[0][i] = complexIn[i*2]; 2407 | m_tmp[1][i] = complexIn[i*2+1]; 2408 | } 2409 | for (int i = m_bins; i < m_size; ++i) { 2410 | m_tmp[0][i] = complexIn[(m_size - i) * 2]; 2411 | m_tmp[1][i] = -complexIn[(m_size - i) * 2 + 1]; 2412 | } 2413 | for (int i = 0; i < m_size; ++i) { 2414 | double re = 0.0; 2415 | const double *const cos = m_cos[i]; 2416 | const double *const sin = m_sin[i]; 2417 | for (int j = 0; j < m_size; ++j) re += m_tmp[0][j] * cos[j]; 2418 | for (int j = 0; j < m_size; ++j) re -= m_tmp[1][j] * sin[j]; 2419 | realOut[i] = T(re); 2420 | } 2421 | } 2422 | 2423 | void inversePolar(const T *BQ_R__ magIn, const T *BQ_R__ phaseIn, T *BQ_R__ realOut) { 2424 | T *complexIn = allocate(m_bins * 2); 2425 | v_polar_to_cartesian_interleaved(complexIn, magIn, phaseIn, m_bins); 2426 | inverseInterleaved(complexIn, realOut); 2427 | deallocate(complexIn); 2428 | } 2429 | 2430 | void inverseCepstral(const T *BQ_R__ magIn, T *BQ_R__ cepOut) { 2431 | T *complexIn = allocate_and_zero(m_bins * 2); 2432 | for (int i = 0; i < m_bins; ++i) { 2433 | complexIn[i*2] = T(log(magIn[i] + 0.000001)); 2434 | } 2435 | inverseInterleaved(complexIn, cepOut); 2436 | deallocate(complexIn); 2437 | } 2438 | 2439 | private: 2440 | const int m_size; 2441 | const int m_bins; 2442 | double **m_sin; 2443 | double **m_cos; 2444 | double **m_tmp; 2445 | }; 2446 | 2447 | public: 2448 | D_DFT(int size) : m_size(size), m_double(0), m_float(0) { } 2449 | 2450 | ~D_DFT() { 2451 | delete m_double; 2452 | delete m_float; 2453 | } 2454 | 2455 | int getSize() const { 2456 | return m_size; 2457 | } 2458 | 2459 | FFT::Precisions 2460 | getSupportedPrecisions() const { 2461 | return FFT::DoublePrecision; 2462 | } 2463 | 2464 | void initFloat() { 2465 | if (!m_float) { 2466 | m_float = new DFT(m_size); 2467 | } 2468 | } 2469 | 2470 | void initDouble() { 2471 | if (!m_double) { 2472 | m_double = new DFT(m_size); 2473 | } 2474 | } 2475 | 2476 | void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { 2477 | initDouble(); 2478 | m_double->forward(realIn, realOut, imagOut); 2479 | } 2480 | 2481 | void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { 2482 | initDouble(); 2483 | m_double->forwardInterleaved(realIn, complexOut); 2484 | } 2485 | 2486 | void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { 2487 | initDouble(); 2488 | m_double->forwardPolar(realIn, magOut, phaseOut); 2489 | } 2490 | 2491 | void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { 2492 | initDouble(); 2493 | m_double->forwardMagnitude(realIn, magOut); 2494 | } 2495 | 2496 | void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { 2497 | initFloat(); 2498 | m_float->forward(realIn, realOut, imagOut); 2499 | } 2500 | 2501 | void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { 2502 | initFloat(); 2503 | m_float->forwardInterleaved(realIn, complexOut); 2504 | } 2505 | 2506 | void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { 2507 | initFloat(); 2508 | m_float->forwardPolar(realIn, magOut, phaseOut); 2509 | } 2510 | 2511 | void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { 2512 | initFloat(); 2513 | m_float->forwardMagnitude(realIn, magOut); 2514 | } 2515 | 2516 | void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { 2517 | initDouble(); 2518 | m_double->inverse(realIn, imagIn, realOut); 2519 | } 2520 | 2521 | void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { 2522 | initDouble(); 2523 | m_double->inverseInterleaved(complexIn, realOut); 2524 | } 2525 | 2526 | void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { 2527 | initDouble(); 2528 | m_double->inversePolar(magIn, phaseIn, realOut); 2529 | } 2530 | 2531 | void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { 2532 | initDouble(); 2533 | m_double->inverseCepstral(magIn, cepOut); 2534 | } 2535 | 2536 | void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { 2537 | initFloat(); 2538 | m_float->inverse(realIn, imagIn, realOut); 2539 | } 2540 | 2541 | void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { 2542 | initFloat(); 2543 | m_float->inverseInterleaved(complexIn, realOut); 2544 | } 2545 | 2546 | void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { 2547 | initFloat(); 2548 | m_float->inversePolar(magIn, phaseIn, realOut); 2549 | } 2550 | 2551 | void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { 2552 | initFloat(); 2553 | m_float->inverseCepstral(magIn, cepOut); 2554 | } 2555 | 2556 | private: 2557 | int m_size; 2558 | DFT *m_double; 2559 | DFT *m_float; 2560 | }; 2561 | 2562 | } /* end namespace FFTs */ 2563 | 2564 | enum SizeConstraint { 2565 | SizeConstraintNone = 0x0, 2566 | SizeConstraintEven = 0x1, 2567 | SizeConstraintPowerOfTwo = 0x2, 2568 | SizeConstraintEvenPowerOfTwo = 0x3 // i.e. 0x1 | 0x2. Excludes size 1 obvs 2569 | }; 2570 | 2571 | typedef std::map ImplMap; 2572 | 2573 | static std::string defaultImplementation; 2574 | 2575 | static ImplMap 2576 | getImplementationDetails() 2577 | { 2578 | ImplMap impls; 2579 | 2580 | #ifdef HAVE_IPP 2581 | impls["ipp"] = SizeConstraintEvenPowerOfTwo; 2582 | #endif 2583 | #ifdef HAVE_FFTW3 2584 | impls["fftw"] = SizeConstraintNone; 2585 | #endif 2586 | #ifdef HAVE_SLEEF 2587 | impls["sleef"] = SizeConstraintEvenPowerOfTwo; 2588 | #endif 2589 | #ifdef HAVE_KISSFFT 2590 | impls["kissfft"] = SizeConstraintEven; 2591 | #endif 2592 | #ifdef HAVE_VDSP 2593 | impls["vdsp"] = SizeConstraintEvenPowerOfTwo; 2594 | #endif 2595 | #ifdef USE_BUILTIN_FFT 2596 | impls["builtin"] = SizeConstraintEvenPowerOfTwo; 2597 | #endif 2598 | 2599 | impls["dft"] = SizeConstraintNone; 2600 | 2601 | return impls; 2602 | } 2603 | 2604 | static std::string 2605 | pickImplementation(int size) 2606 | { 2607 | ImplMap impls = getImplementationDetails(); 2608 | 2609 | bool isPowerOfTwo = !(size & (size-1)); 2610 | bool isEven = !(size & 1); 2611 | 2612 | if (defaultImplementation != "") { 2613 | ImplMap::const_iterator itr = impls.find(defaultImplementation); 2614 | if (itr != impls.end()) { 2615 | if (((itr->second & SizeConstraintPowerOfTwo) && !isPowerOfTwo) || 2616 | ((itr->second & SizeConstraintEven) && !isEven)) { 2617 | // std::cerr << "NOTE: bqfft: Explicitly-set default " 2618 | // << "implementation \"" << defaultImplementation 2619 | // << "\" does not support size " << size 2620 | // << ", trying other compiled-in implementations" 2621 | // << std::endl; 2622 | } else { 2623 | return defaultImplementation; 2624 | } 2625 | } else { 2626 | std::cerr << "WARNING: bqfft: Default implementation \"" 2627 | << defaultImplementation << "\" is not compiled in" 2628 | << std::endl; 2629 | } 2630 | } 2631 | 2632 | std::string preference[] = { 2633 | "ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft" 2634 | }; 2635 | 2636 | for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) { 2637 | ImplMap::const_iterator itr = impls.find(preference[i]); 2638 | if (itr != impls.end()) { 2639 | if ((itr->second & SizeConstraintPowerOfTwo) && 2640 | // out of an abundance of caution we don't attempt to 2641 | // use power-of-two implementations with size 2 2642 | // either, as they may involve a half-half 2643 | // complex-complex underneath (which would end up with 2644 | // size 0) 2645 | (!isPowerOfTwo || size < 4)) { 2646 | continue; 2647 | } 2648 | if ((itr->second & SizeConstraintEven) && !isEven) { 2649 | continue; 2650 | } 2651 | return preference[i]; 2652 | } 2653 | } 2654 | 2655 | std::cerr << "WARNING: bqfft: No compiled-in implementation supports size " 2656 | << size << ", falling back to slow DFT" << std::endl; 2657 | 2658 | return "dft"; 2659 | } 2660 | 2661 | std::set 2662 | FFT::getImplementations() 2663 | { 2664 | ImplMap impls = getImplementationDetails(); 2665 | std::set toReturn; 2666 | for (ImplMap::const_iterator i = impls.begin(); i != impls.end(); ++i) { 2667 | toReturn.insert(i->first); 2668 | } 2669 | return toReturn; 2670 | } 2671 | 2672 | std::string 2673 | FFT::getDefaultImplementation() 2674 | { 2675 | return defaultImplementation; 2676 | } 2677 | 2678 | void 2679 | FFT::setDefaultImplementation(std::string i) 2680 | { 2681 | if (i == "") { 2682 | defaultImplementation = i; 2683 | return; 2684 | } 2685 | ImplMap impls = getImplementationDetails(); 2686 | ImplMap::const_iterator itr = impls.find(i); 2687 | if (itr == impls.end()) { 2688 | std::cerr << "WARNING: bqfft: setDefaultImplementation: " 2689 | << "requested implementation \"" << i 2690 | << "\" is not compiled in" << std::endl; 2691 | } else { 2692 | defaultImplementation = i; 2693 | } 2694 | } 2695 | 2696 | FFT::FFT(int size, int debugLevel) : 2697 | d(0) 2698 | { 2699 | std::string impl = pickImplementation(size); 2700 | 2701 | if (debugLevel > 0) { 2702 | std::cerr << "FFT::FFT(" << size << "): using implementation: " 2703 | << impl << std::endl; 2704 | } 2705 | 2706 | if (impl == "ipp") { 2707 | #ifdef HAVE_IPP 2708 | d = new FFTs::D_IPP(size); 2709 | #endif 2710 | } else if (impl == "fftw") { 2711 | #ifdef HAVE_FFTW3 2712 | d = new FFTs::D_FFTW(size); 2713 | #endif 2714 | } else if (impl == "sleef") { 2715 | #ifdef HAVE_SLEEF 2716 | d = new FFTs::D_SLEEF(size); 2717 | #endif 2718 | } else if (impl == "kissfft") { 2719 | #ifdef HAVE_KISSFFT 2720 | d = new FFTs::D_KISSFFT(size); 2721 | #endif 2722 | } else if (impl == "vdsp") { 2723 | #ifdef HAVE_VDSP 2724 | d = new FFTs::D_VDSP(size); 2725 | #endif 2726 | } else if (impl == "builtin") { 2727 | #ifdef USE_BUILTIN_FFT 2728 | d = new FFTs::D_Builtin(size); 2729 | #endif 2730 | } else if (impl == "dft") { 2731 | d = new FFTs::D_DFT(size); 2732 | } 2733 | 2734 | if (!d) { 2735 | std::cerr << "FFT::FFT(" << size << "): ERROR: implementation " 2736 | << impl << " is not compiled in" << std::endl; 2737 | #ifndef NO_EXCEPTIONS 2738 | throw InvalidImplementation; 2739 | #else 2740 | abort(); 2741 | #endif 2742 | } 2743 | } 2744 | 2745 | FFT::~FFT() 2746 | { 2747 | delete d; 2748 | } 2749 | 2750 | #ifndef NO_EXCEPTIONS 2751 | #define CHECK_NOT_NULL(x) \ 2752 | if (!(x)) { \ 2753 | std::cerr << "FFT: ERROR: Null argument " #x << std::endl; \ 2754 | throw NullArgument; \ 2755 | } 2756 | #else 2757 | #define CHECK_NOT_NULL(x) \ 2758 | if (!(x)) { \ 2759 | std::cerr << "FFT: ERROR: Null argument " #x << std::endl; \ 2760 | std::cerr << "FFT: Would be throwing NullArgument here, if exceptions were not disabled" << std::endl; \ 2761 | return; \ 2762 | } 2763 | #endif 2764 | 2765 | void 2766 | FFT::forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) 2767 | { 2768 | CHECK_NOT_NULL(realIn); 2769 | CHECK_NOT_NULL(realOut); 2770 | CHECK_NOT_NULL(imagOut); 2771 | d->forward(realIn, realOut, imagOut); 2772 | } 2773 | 2774 | void 2775 | FFT::forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) 2776 | { 2777 | CHECK_NOT_NULL(realIn); 2778 | CHECK_NOT_NULL(complexOut); 2779 | d->forwardInterleaved(realIn, complexOut); 2780 | } 2781 | 2782 | void 2783 | FFT::forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) 2784 | { 2785 | CHECK_NOT_NULL(realIn); 2786 | CHECK_NOT_NULL(magOut); 2787 | CHECK_NOT_NULL(phaseOut); 2788 | d->forwardPolar(realIn, magOut, phaseOut); 2789 | } 2790 | 2791 | void 2792 | FFT::forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) 2793 | { 2794 | CHECK_NOT_NULL(realIn); 2795 | CHECK_NOT_NULL(magOut); 2796 | d->forwardMagnitude(realIn, magOut); 2797 | } 2798 | 2799 | void 2800 | FFT::forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) 2801 | { 2802 | CHECK_NOT_NULL(realIn); 2803 | CHECK_NOT_NULL(realOut); 2804 | CHECK_NOT_NULL(imagOut); 2805 | d->forward(realIn, realOut, imagOut); 2806 | } 2807 | 2808 | void 2809 | FFT::forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) 2810 | { 2811 | CHECK_NOT_NULL(realIn); 2812 | CHECK_NOT_NULL(complexOut); 2813 | d->forwardInterleaved(realIn, complexOut); 2814 | } 2815 | 2816 | void 2817 | FFT::forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) 2818 | { 2819 | CHECK_NOT_NULL(realIn); 2820 | CHECK_NOT_NULL(magOut); 2821 | CHECK_NOT_NULL(phaseOut); 2822 | d->forwardPolar(realIn, magOut, phaseOut); 2823 | } 2824 | 2825 | void 2826 | FFT::forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) 2827 | { 2828 | CHECK_NOT_NULL(realIn); 2829 | CHECK_NOT_NULL(magOut); 2830 | d->forwardMagnitude(realIn, magOut); 2831 | } 2832 | 2833 | void 2834 | FFT::inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) 2835 | { 2836 | CHECK_NOT_NULL(realIn); 2837 | CHECK_NOT_NULL(imagIn); 2838 | CHECK_NOT_NULL(realOut); 2839 | d->inverse(realIn, imagIn, realOut); 2840 | } 2841 | 2842 | void 2843 | FFT::inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) 2844 | { 2845 | CHECK_NOT_NULL(complexIn); 2846 | CHECK_NOT_NULL(realOut); 2847 | d->inverseInterleaved(complexIn, realOut); 2848 | } 2849 | 2850 | void 2851 | FFT::inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) 2852 | { 2853 | CHECK_NOT_NULL(magIn); 2854 | CHECK_NOT_NULL(phaseIn); 2855 | CHECK_NOT_NULL(realOut); 2856 | d->inversePolar(magIn, phaseIn, realOut); 2857 | } 2858 | 2859 | void 2860 | FFT::inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) 2861 | { 2862 | CHECK_NOT_NULL(magIn); 2863 | CHECK_NOT_NULL(cepOut); 2864 | d->inverseCepstral(magIn, cepOut); 2865 | } 2866 | 2867 | void 2868 | FFT::inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) 2869 | { 2870 | CHECK_NOT_NULL(realIn); 2871 | CHECK_NOT_NULL(imagIn); 2872 | CHECK_NOT_NULL(realOut); 2873 | d->inverse(realIn, imagIn, realOut); 2874 | } 2875 | 2876 | void 2877 | FFT::inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) 2878 | { 2879 | CHECK_NOT_NULL(complexIn); 2880 | CHECK_NOT_NULL(realOut); 2881 | d->inverseInterleaved(complexIn, realOut); 2882 | } 2883 | 2884 | void 2885 | FFT::inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) 2886 | { 2887 | CHECK_NOT_NULL(magIn); 2888 | CHECK_NOT_NULL(phaseIn); 2889 | CHECK_NOT_NULL(realOut); 2890 | d->inversePolar(magIn, phaseIn, realOut); 2891 | } 2892 | 2893 | void 2894 | FFT::inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) 2895 | { 2896 | CHECK_NOT_NULL(magIn); 2897 | CHECK_NOT_NULL(cepOut); 2898 | d->inverseCepstral(magIn, cepOut); 2899 | } 2900 | 2901 | void 2902 | FFT::initFloat() 2903 | { 2904 | d->initFloat(); 2905 | } 2906 | 2907 | void 2908 | FFT::initDouble() 2909 | { 2910 | d->initDouble(); 2911 | } 2912 | 2913 | int 2914 | FFT::getSize() const 2915 | { 2916 | return d->getSize(); 2917 | } 2918 | 2919 | FFT::Precisions 2920 | FFT::getSupportedPrecisions() const 2921 | { 2922 | return d->getSupportedPrecisions(); 2923 | } 2924 | 2925 | #ifdef FFT_MEASUREMENT 2926 | 2927 | #ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT 2928 | std::string 2929 | #else 2930 | void 2931 | #endif 2932 | FFT::tune() 2933 | { 2934 | #ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT 2935 | std::ostringstream os; 2936 | #else 2937 | #define os std::cerr 2938 | #endif 2939 | os << "FFT::tune()..." << std::endl; 2940 | 2941 | std::vector sizes; 2942 | std::map candidates; 2943 | std::map wins; 2944 | 2945 | sizes.push_back(512); 2946 | sizes.push_back(1024); 2947 | sizes.push_back(2048); 2948 | sizes.push_back(4096); 2949 | 2950 | for (unsigned int si = 0; si < sizes.size(); ++si) { 2951 | 2952 | int size = sizes[si]; 2953 | 2954 | while (!candidates.empty()) { 2955 | delete candidates.begin()->second; 2956 | candidates.erase(candidates.begin()); 2957 | } 2958 | 2959 | FFTImpl *d; 2960 | 2961 | #ifdef HAVE_IPP 2962 | os << "Constructing new IPP FFT object for size " << size << "..." << std::endl; 2963 | d = new FFTs::D_IPP(size); 2964 | d->initFloat(); 2965 | d->initDouble(); 2966 | candidates["ipp"] = d; 2967 | #endif 2968 | 2969 | #ifdef HAVE_FFTW3 2970 | os << "Constructing new FFTW3 FFT object for size " << size << "..." << std::endl; 2971 | d = new FFTs::D_FFTW(size); 2972 | d->initFloat(); 2973 | d->initDouble(); 2974 | candidates["fftw"] = d; 2975 | #endif 2976 | 2977 | #ifdef HAVE_SLEEF 2978 | os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl; 2979 | d = new FFTs::D_SLEEF(size); 2980 | d->initFloat(); 2981 | d->initDouble(); 2982 | candidates["sleef"] = d; 2983 | #endif 2984 | 2985 | #ifdef HAVE_KISSFFT 2986 | os << "Constructing new KISSFFT object for size " << size << "..." << std::endl; 2987 | d = new FFTs::D_KISSFFT(size); 2988 | d->initFloat(); 2989 | d->initDouble(); 2990 | candidates["kissfft"] = d; 2991 | #endif 2992 | 2993 | #ifdef USE_BUILTIN_FFT 2994 | os << "Constructing new Builtin FFT object for size " << size << "..." << std::endl; 2995 | d = new FFTs::D_Builtin(size); 2996 | d->initFloat(); 2997 | d->initDouble(); 2998 | candidates["builtin"] = d; 2999 | #endif 3000 | 3001 | #ifdef HAVE_VDSP 3002 | os << "Constructing new vDSP FFT object for size " << size << "..." << std::endl; 3003 | d = new FFTs::D_VDSP(size); 3004 | d->initFloat(); 3005 | d->initDouble(); 3006 | candidates["vdsp"] = d; 3007 | #endif 3008 | 3009 | os << "Constructing new DFT object for size " << size << "..." << std::endl; 3010 | d = new FFTs::D_DFT(size); 3011 | d->initFloat(); 3012 | d->initDouble(); 3013 | candidates["dft"] = d; 3014 | 3015 | os << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << std::endl; 3016 | float divisor = float(CLOCKS_PER_SEC) / 1000.f; 3017 | 3018 | os << "Timing order is: "; 3019 | for (std::map::iterator ci = candidates.begin(); 3020 | ci != candidates.end(); ++ci) { 3021 | os << ci->first << " "; 3022 | } 3023 | os << std::endl; 3024 | 3025 | int iterations = 500; 3026 | os << "Iterations: " << iterations << std::endl; 3027 | 3028 | double *da = allocate_and_zero(size); 3029 | double *db = allocate_and_zero(size); 3030 | double *dc = allocate_and_zero(size); 3031 | double *dd = allocate_and_zero(size); 3032 | 3033 | double *di = allocate_and_zero(size + 2); 3034 | double *dj = allocate_and_zero(size + 2); 3035 | 3036 | float *fa = allocate_and_zero(size); 3037 | float *fb = allocate_and_zero(size); 3038 | float *fc = allocate_and_zero(size); 3039 | float *fd = allocate_and_zero(size); 3040 | 3041 | float *fi = allocate_and_zero(size + 2); 3042 | float *fj = allocate_and_zero(size + 2); 3043 | 3044 | for (int type = 0; type < 16; ++type) { 3045 | 3046 | //!!! 3047 | if ((type > 3 && type < 8) || 3048 | (type > 11)) { 3049 | continue; 3050 | } 3051 | 3052 | if (type > 7) { 3053 | // inverse transform: bigger inputs, to simulate the 3054 | // fact that the forward transform is unscaled 3055 | for (int i = 0; i < size; ++i) { 3056 | da[i] = drand48() * size; 3057 | fa[i] = da[i]; 3058 | db[i] = drand48() * size; 3059 | fb[i] = db[i]; 3060 | } 3061 | } else { 3062 | for (int i = 0; i < size; ++i) { 3063 | da[i] = drand48(); 3064 | fa[i] = da[i]; 3065 | db[i] = drand48(); 3066 | fb[i] = db[i]; 3067 | } 3068 | } 3069 | 3070 | for (int i = 0; i < size + 2; ++i) { 3071 | di[i] = drand48(); 3072 | fi[i] = di[i]; 3073 | } 3074 | 3075 | std::string low; 3076 | clock_t lowscore = 0; 3077 | 3078 | const char *names[] = { 3079 | 3080 | "Forward Cartesian Double", 3081 | "Forward Interleaved Double", 3082 | "Forward Polar Double", 3083 | "Forward Magnitude Double", 3084 | "Forward Cartesian Float", 3085 | "Forward Interleaved Float", 3086 | "Forward Polar Float", 3087 | "Forward Magnitude Float", 3088 | 3089 | "Inverse Cartesian Double", 3090 | "Inverse Interleaved Double", 3091 | "Inverse Polar Double", 3092 | "Inverse Cepstral Double", 3093 | "Inverse Cartesian Float", 3094 | "Inverse Interleaved Float", 3095 | "Inverse Polar Float", 3096 | "Inverse Cepstral Float" 3097 | }; 3098 | os << names[type] << " :: "; 3099 | 3100 | for (std::map::iterator ci = candidates.begin(); 3101 | ci != candidates.end(); ++ci) { 3102 | 3103 | FFTImpl *d = ci->second; 3104 | 3105 | double mean = 0; 3106 | 3107 | clock_t start = clock(); 3108 | 3109 | for (int i = 0; i < iterations; ++i) { 3110 | 3111 | if (i == 0) { 3112 | for (int j = 0; j < size; ++j) { 3113 | dc[j] = 0; 3114 | dd[j] = 0; 3115 | fc[j] = 0; 3116 | fd[j] = 0; 3117 | fj[j] = 0; 3118 | dj[j] = 0; 3119 | } 3120 | } 3121 | 3122 | switch (type) { 3123 | case 0: d->forward(da, dc, dd); break; 3124 | case 1: d->forwardInterleaved(da, dj); break; 3125 | case 2: d->forwardPolar(da, dc, dd); break; 3126 | case 3: d->forwardMagnitude(da, dc); break; 3127 | case 4: d->forward(fa, fc, fd); break; 3128 | case 5: d->forwardInterleaved(fa, fj); break; 3129 | case 6: d->forwardPolar(fa, fc, fd); break; 3130 | case 7: d->forwardMagnitude(fa, fc); break; 3131 | case 8: d->inverse(da, db, dc); break; 3132 | case 9: d->inverseInterleaved(di, dc); break; 3133 | case 10: d->inversePolar(da, db, dc); break; 3134 | case 11: d->inverseCepstral(da, dc); break; 3135 | case 12: d->inverse(fa, fb, fc); break; 3136 | case 13: d->inverseInterleaved(fi, fc); break; 3137 | case 14: d->inversePolar(fa, fb, fc); break; 3138 | case 15: d->inverseCepstral(fa, fc); break; 3139 | } 3140 | 3141 | if (i == 0) { 3142 | mean = 0; 3143 | for (int j = 0; j < size; ++j) { 3144 | mean += dc[j]; 3145 | mean += dd[j]; 3146 | mean += fc[j]; 3147 | mean += fd[j]; 3148 | mean += fj[j]; 3149 | mean += dj[j]; 3150 | } 3151 | mean /= size * 6; 3152 | } 3153 | } 3154 | 3155 | clock_t end = clock(); 3156 | 3157 | os << float(end - start)/divisor << " (" << mean << ") "; 3158 | 3159 | if (low == "" || (end - start) < lowscore) { 3160 | low = ci->first; 3161 | lowscore = end - start; 3162 | } 3163 | } 3164 | 3165 | os << std::endl; 3166 | 3167 | os << " size " << size << ", type " << type << ": fastest is " << low << " (time " << float(lowscore)/divisor << ")" << std::endl; 3168 | 3169 | wins[low]++; 3170 | } 3171 | 3172 | deallocate(da); 3173 | deallocate(db); 3174 | deallocate(dc); 3175 | deallocate(dd); 3176 | 3177 | deallocate(di); 3178 | deallocate(dj); 3179 | 3180 | deallocate(fa); 3181 | deallocate(fb); 3182 | deallocate(fc); 3183 | deallocate(fd); 3184 | 3185 | deallocate(fi); 3186 | deallocate(fj); 3187 | } 3188 | 3189 | while (!candidates.empty()) { 3190 | delete candidates.begin()->second; 3191 | candidates.erase(candidates.begin()); 3192 | } 3193 | 3194 | int bestscore = 0; 3195 | std::string best; 3196 | 3197 | for (std::map::iterator wi = wins.begin(); wi != wins.end(); ++wi) { 3198 | if (best == "" || wi->second > bestscore) { 3199 | best = wi->first; 3200 | bestscore = wi->second; 3201 | } 3202 | } 3203 | 3204 | os << "overall winner is " << best << " with " << bestscore << " wins" << std::endl; 3205 | 3206 | #ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT 3207 | return os.str(); 3208 | #endif 3209 | } 3210 | 3211 | #endif 3212 | 3213 | } 3214 | --------------------------------------------------------------------------------