├── absolute.png ├── relative.png ├── linIntError.png ├── RawResults ├── Results.xlsx ├── Relative.tsv ├── Parsed.tsv ├── laptop_32.txt ├── laptop_64.txt ├── desktop_32.txt ├── desktop_64.txt └── tablet_32.txt ├── ParseResults ├── App.config ├── Properties │ └── AssemblyInfo.cs ├── ParseResults.csproj └── Program.cs ├── .gitignore ├── SinCosPolyPrecision ├── stdafx.h ├── stdafx.cpp ├── targetver.h ├── SinCosPolyPrecision.vcxproj.filters ├── SinCosPolyPrecision.cpp └── SinCosPolyPrecision.vcxproj ├── Test ├── SinCos │ ├── SinCos.h │ ├── SinCosLinInt.hpp │ ├── SinCos.cpp │ ├── GTEngineDEF.h │ └── Mathematics │ │ ├── GteSinEstimate.h │ │ ├── GteCosEstimate.h │ │ └── GteConstants.h ├── stdafx.cpp ├── targetver.h ├── BitCount │ ├── align.cpp │ ├── BitCount.h │ ├── BitCountBuiltin.cpp │ ├── BitCountLookup.cpp │ ├── BitCount.cpp │ ├── BitCountSse.cpp │ └── align.h ├── stdafx.h ├── LookupTables.cpp ├── LookupTables.vcxproj.filters └── LookupTables.vcxproj ├── LookupTables.sln └── readme.md /absolute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/absolute.png -------------------------------------------------------------------------------- /relative.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/relative.png -------------------------------------------------------------------------------- /linIntError.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/linIntError.png -------------------------------------------------------------------------------- /RawResults/Results.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/RawResults/Results.xlsx -------------------------------------------------------------------------------- /ParseResults/App.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | LookupTables.VC.db 3 | LookupTables.VC.VC.opendb 4 | Test/Win32/ 5 | Test/x64/ 6 | ParseResults/obj/ 7 | ParseResults/bin/ 8 | ipch/ 9 | *.user 10 | SinCosPolyPrecision/Win32/ 11 | SinCosPolyPrecision/x64/ -------------------------------------------------------------------------------- /SinCosPolyPrecision/stdafx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "targetver.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #define _USE_MATH_DEFINES 9 | #include 10 | #include 11 | #include -------------------------------------------------------------------------------- /Test/SinCos/SinCos.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace nsSinCos 4 | { 5 | enum struct eAlgo: uint8_t 6 | { 7 | StdLib, 8 | 9 | Lookup, 10 | 11 | PolyGTE, 12 | 13 | #ifdef _WIN32 14 | PolyDX, 15 | PolyDxLow, 16 | #endif 17 | }; 18 | 19 | template 20 | stopwatch::duration testSinCos(); 21 | } -------------------------------------------------------------------------------- /Test/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp : source file that includes just the standard includes 2 | // LookupTables.pch will be the pre-compiled header 3 | // stdafx.obj will contain the pre-compiled type information 4 | 5 | #include "stdafx.h" 6 | 7 | // TODO: reference any additional headers you need in STDAFX.H 8 | // and not in this file 9 | -------------------------------------------------------------------------------- /Test/targetver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Including SDKDDKVer.h defines the highest available Windows platform. 4 | 5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 7 | 8 | #include 9 | -------------------------------------------------------------------------------- /SinCosPolyPrecision/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp : source file that includes just the standard includes 2 | // SinCosPolyPrecision.pch will be the pre-compiled header 3 | // stdafx.obj will contain the pre-compiled type information 4 | 5 | #include "stdafx.h" 6 | 7 | // TODO: reference any additional headers you need in STDAFX.H 8 | // and not in this file 9 | -------------------------------------------------------------------------------- /SinCosPolyPrecision/targetver.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // Including SDKDDKVer.h defines the highest available Windows platform. 4 | 5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and 6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. 7 | 8 | #include 9 | -------------------------------------------------------------------------------- /Test/BitCount/align.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "align.h" 3 | 4 | void* detail::allocate_aligned_memory( size_t align, size_t size ) noexcept 5 | { 6 | assert( align >= sizeof( void* ) ); 7 | if( size == 0 ) 8 | return nullptr; 9 | return _aligned_malloc( size, align ); 10 | } 11 | 12 | void detail::deallocate_aligned_memory( void *ptr ) noexcept 13 | { 14 | return _aligned_free( ptr ); 15 | } -------------------------------------------------------------------------------- /Test/BitCount/BitCount.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "align.h" 3 | 4 | namespace nsBitCnt 5 | { 6 | stopwatch::duration test( int nAlgo ); 7 | 8 | uint64_t testLookup( const alignedVector& vec ); 9 | 10 | uint64_t testSse2( const alignedVector& vec ); 11 | uint64_t testSsse3( const alignedVector& vec ); 12 | uint64_t testXop( const alignedVector& vec ); 13 | uint64_t testBuiltin( const alignedVector& vec ); 14 | } -------------------------------------------------------------------------------- /RawResults/Relative.tsv: -------------------------------------------------------------------------------- 1 | SinCos 2 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32 3 | StdLib 179.9536957 92.98304131 162.8084785 123.2181684 157.78789 4 | Lookup 100 100 100 100 100 5 | GTE 82.28514215 92.95617369 85.07954026 102.7868762 108.8143041 6 | DX 65.3408725 80.64719148 64.31137149 91.00110594 83.03161227 7 | DX_Est 57.72547152 61.91282228 57.50528739 66.57133087 64.1036295 8 | 9 | BitCount 10 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32 11 | Lookup 100 100 100 100 100 12 | SSE2 14.58106797 45.97619823 20.09557259 54.19812104 32.25625402 13 | SSSE3 19.56030359 59.17546616 24.53645872 74.53063189 49.57154478 14 | POPCNT 19.48515159 30.89744576 20.69146076 33.29716138 30.94231022 15 | -------------------------------------------------------------------------------- /Test/BitCount/BitCountBuiltin.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "BitCount.h" 3 | 4 | namespace nsBitCnt 5 | { 6 | // Calculate bits using POPCNT instruction. 7 | uint64_t testBuiltin( const alignedVector& vec ) 8 | { 9 | #ifdef _MSC_VER 10 | #define popcnt64 __popcnt64 11 | #define popcnt32 __popcnt 12 | #else 13 | #define popcnt64 __popcntq 14 | #define popcnt32 __popcntd 15 | #endif 16 | 17 | #if defined(_M_X64) || defined(__amd64__) 18 | #define _pc popcnt64 19 | using tElt = uint64_t; 20 | #else 21 | #define _pc popcnt32 22 | using tElt = uint32_t; 23 | #endif 24 | const tElt* p = (const tElt*)vec.data(); 25 | const tElt* pEnd = p + ( vec.size() * sizeof( uint16_t ) / sizeof( tElt ) ); 26 | 27 | uint64_t bits = _pc( *p ); 28 | p++; 29 | while( p < pEnd ) 30 | { 31 | bits += _pc( *p ); 32 | p++; 33 | } 34 | return bits; 35 | } 36 | } -------------------------------------------------------------------------------- /RawResults/Parsed.tsv: -------------------------------------------------------------------------------- 1 | SinCos 2 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32 3 | StdLib 404.756525666667 183.099744 461.678577666667 345.527862 1522.45437133333 4 | Lookup 224.922597 196.917353333333 283.571581666667 280.419573333333 964.874029 5 | GTE 185.077878666667 183.046837 241.261398 288.234519666667 1049.92096033333 6 | DX 146.966387333333 158.808315 182.368773333333 255.184913 801.150462666667 7 | DX_Est 129.837629666667 121.917091 163.068653 186.679042 618.519272666667 8 | 9 | BitCount 10 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32 11 | Lookup 240.437594 74.874501 284.062135666667 91.4256473333333 597.494531 12 | SSE2 35.058369 34.424449 57.0839126666667 49.550983 192.729353666667 13 | SSSE3 47.0303233333333 44.307335 69.6987886666667 68.1401126666667 296.187269 14 | POPCNT 46.8496296666667 23.1343083333333 58.7766053333333 30.4421453333333 184.878611333333 15 | -------------------------------------------------------------------------------- /Test/stdafx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "targetver.h" 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #define _USE_MATH_DEFINES 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | using std::vector; 15 | 16 | #include 17 | using std::array; 18 | 19 | #include 20 | typedef std::chrono::high_resolution_clock stopwatch; 21 | 22 | #ifdef _WIN32 23 | #include 24 | #define WIN32_LEAN_AND_MEAN 25 | #include 26 | #endif 27 | 28 | inline uint32_t getTickCount() 29 | { 30 | #ifdef _WIN32 31 | return GetTickCount(); 32 | #else 33 | struct timespec ts; 34 | unsigned theTick = 0U; 35 | clock_gettime( CLOCK_REALTIME, &ts ); 36 | theTick = ts.tv_nsec / 1000000; 37 | theTick += ts.tv_sec * 1000; 38 | return theTick; 39 | #endif 40 | } 41 | 42 | inline void resetRand() 43 | { 44 | srand( getTickCount() ); 45 | } -------------------------------------------------------------------------------- /RawResults/laptop_32.txt: -------------------------------------------------------------------------------- 1 | SinCos 2 | 1 0.000331 / 0.000091 457.499649 3 | 1 -0.000006 / 0.000265 464.936045 4 | 1 0.000216 / 0.000202 462.600039 5 | 2 -0.000353 / 0.000094 282.346982 6 | 2 -0.000003 / 0.000185 282.180539 7 | 2 0.000293 / -0.000104 286.187224 8 | 3 0.000628 / -0.000044 240.207853 9 | 3 0.000173 / 0.000198 240.642925 10 | 3 -0.000336 / -0.000260 242.933416 11 | 4 0.000349 / 0.000232 183.415030 12 | 4 0.000075 / 0.000633 179.753280 13 | 4 0.000352 / -0.000016 183.938010 14 | 5 0.000067 / -0.000190 160.336992 15 | 5 -0.000266 / 0.000151 168.412387 16 | 5 0.000115 / -0.000147 160.456580 17 | BitCount 18 | 1 750005609 263.460813 19 | 1 750005461 268.941390 20 | 1 750006700 319.784204 21 | 2 749997104 57.007905 22 | 2 749998576 57.073947 23 | 2 750001899 57.169886 24 | 3 749990732 69.994489 25 | 3 750008066 69.566557 26 | 3 749994005 69.535320 27 | 4 This CPU doesn't support XOP instruction set.0.000000 28 | 4 This CPU doesn't support XOP instruction set.0.000000 29 | 4 This CPU doesn't support XOP instruction set.0.000000 30 | 5 749995957 58.769168 31 | 5 750007034 58.758905 32 | 5 750003094 58.801743 33 | -------------------------------------------------------------------------------- /RawResults/laptop_64.txt: -------------------------------------------------------------------------------- 1 | SinCos 2 | 1 -0.000087 / 0.000315 338.650739 3 | 1 -0.000218 / 0.000161 348.276331 4 | 1 -0.000228 / -0.000103 349.656516 5 | 2 0.037992 / -0.000081 285.009627 6 | 2 0.037861 / -0.000156 280.632127 7 | 2 0.038111 / -0.000174 275.616966 8 | 3 0.000152 / -0.000035 268.059197 9 | 3 -0.000075 / -0.000456 273.469715 10 | 3 -0.000150 / 0.000081 323.174647 11 | 4 0.000159 / -0.000192 248.901720 12 | 4 0.000113 / 0.000009 255.936064 13 | 4 -0.000143 / 0.000206 260.716955 14 | 5 0.000180 / 0.000084 188.482845 15 | 5 0.000038 / -0.000145 191.554236 16 | 5 0.000019 / 0.000190 180.000045 17 | BitCount 18 | 1 749983487 91.259502 19 | 1 749989718 91.397386 20 | 1 750000247 91.620054 21 | 2 749996617 49.734383 22 | 2 749995530 49.489403 23 | 2 749996927 49.429163 24 | 3 750012749 65.660273 25 | 3 750000511 73.860166 26 | 3 749991648 64.899899 27 | 4 This CPU doesn't support XOP instruction set.0.000000 28 | 4 This CPU doesn't support XOP instruction set.0.000000 29 | 4 This CPU doesn't support XOP instruction set.0.000000 30 | 5 749990728 30.728178 31 | 5 749996849 30.154328 32 | 5 749982555 30.443930 33 | -------------------------------------------------------------------------------- /RawResults/desktop_32.txt: -------------------------------------------------------------------------------- 1 | SinCos 2 | 1 0.000183 / 0.000024 404.772739 3 | 1 0.000014 / 0.000351 404.965059 4 | 1 -0.000211 / -0.000226 404.531779 5 | 2 0.000122 / 0.000149 225.209424 6 | 2 -0.000095 / 0.000403 225.211664 7 | 2 -0.000121 / -0.000078 224.346703 8 | 3 -0.000146 / 0.000479 184.801719 9 | 3 0.000012 / 0.000415 185.113718 10 | 3 0.000102 / -0.000369 185.318199 11 | 4 -0.000041 / -0.000243 147.260254 12 | 4 0.000016 / 0.000089 146.934494 13 | 4 -0.000207 / -0.000020 146.704414 14 | 5 0.000009 / 0.000110 130.457043 15 | 5 0.000217 / -0.000466 130.508883 16 | 5 0.000253 / 0.000013 128.546963 17 | BitCount 18 | 1 749995608 230.947348 19 | 1 750002573 257.653925 20 | 1 749994654 232.711509 21 | 2 750015525 35.056022 22 | 2 749994490 35.063383 23 | 2 750003652 35.055702 24 | 3 750002897 46.917470 25 | 3 749987174 47.343070 26 | 3 749992602 46.830430 27 | 4 This CPU doesn't support XOP instruction set.0.000000 28 | 4 This CPU doesn't support XOP instruction set.0.000000 29 | 4 This CPU doesn't support XOP instruction set.0.000000 30 | 5 749978439 47.672031 31 | 5 750001358 46.298909 32 | 5 750013174 46.577949 33 | -------------------------------------------------------------------------------- /RawResults/desktop_64.txt: -------------------------------------------------------------------------------- 1 | SinCos 2 | 1 -0.000108 / 0.000542 182.713397 3 | 1 -0.000241 / -0.000410 182.296117 4 | 1 -0.000193 / -0.000140 184.289718 5 | 2 0.037735 / -0.000581 197.624127 6 | 2 0.038014 / -0.000103 197.432127 7 | 2 0.037950 / 0.000446 195.695806 8 | 3 0.000164 / -0.000187 182.557557 9 | 3 0.000094 / -0.000511 183.252917 10 | 3 0.000387 / 0.000001 183.330037 11 | 4 -0.000488 / 0.000444 158.287461 12 | 4 0.000104 / -0.000164 160.676903 13 | 4 0.000164 / -0.000182 157.460581 14 | 5 -0.000037 / 0.000055 122.187278 15 | 5 -0.000200 / 0.000086 121.219917 16 | 5 -0.000116 / -0.000263 122.344078 17 | BitCount 18 | 1 750018142 75.096048 19 | 1 749985960 74.529967 20 | 1 749995042 74.997488 21 | 2 750002267 34.381462 22 | 2 750003782 34.414422 23 | 2 750002314 34.477463 24 | 3 750004016 44.472028 25 | 3 749991643 44.144029 26 | 3 750015061 44.305948 27 | 4 This CPU doesn't support XOP instruction set.0.000000 28 | 4 This CPU doesn't support XOP instruction set.0.000000 29 | 4 This CPU doesn't support XOP instruction set.0.000000 30 | 5 750023961 22.786255 31 | 5 749998210 23.233295 32 | 5 750000563 23.383375 33 | -------------------------------------------------------------------------------- /RawResults/tablet_32.txt: -------------------------------------------------------------------------------- 1 | SinCos 2 | 1 -0.000134 / -0.000098 1567.576618 3 | 1 -0.000204 / -0.000125 1492.351882 4 | 1 0.000310 / -0.000196 1507.434614 5 | 2 -0.000009 / 0.000008 997.259779 6 | 2 0.000061 / 0.000159 899.369089 7 | 2 0.000183 / 0.000105 997.993219 8 | 3 0.000042 / 0.000321 1051.851454 9 | 3 -0.000120 / 0.000155 1035.472339 10 | 3 0.000279 / 0.000181 1062.439088 11 | 4 -0.000158 / -0.000164 797.009412 12 | 4 0.000155 / 0.000139 793.296905 13 | 4 -0.000175 / 0.000232 813.145071 14 | 5 -0.000433 / 0.000279 610.483187 15 | 5 -0.000309 / 0.000145 628.213980 16 | 5 0.000007 / 0.000118 616.860651 17 | BitCount 18 | 1 749995519 605.079546 19 | 1 749997515 580.843800 20 | 1 749992402 606.560247 21 | 2 750009716 163.519279 22 | 2 749978422 173.830434 23 | 2 749996628 240.838348 24 | 3 750016545 338.547023 25 | 3 749997743 284.511380 26 | 3 750001774 265.503404 27 | 4 This CPU doesn't support XOP instruction set.0.000000 28 | 4 This CPU doesn't support XOP instruction set.0.000000 29 | 4 This CPU doesn't support XOP instruction set.0.000000 30 | 5 750000321 184.418068 31 | 5 749995447 186.380305 32 | 5 749993234 183.837461 33 | -------------------------------------------------------------------------------- /SinCosPolyPrecision/SinCosPolyPrecision.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | -------------------------------------------------------------------------------- /Test/BitCount/BitCountLookup.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "BitCount.h" 3 | 4 | namespace nsBitCnt 5 | { 6 | // http://stackoverflow.com/a/21455308/126995 7 | static uint8_t BYTE_BIT_COUNTS[ 256 ] = 8 | { 9 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 10 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 11 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 12 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 13 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 14 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 15 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 16 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 17 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 18 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 19 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 20 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 21 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 22 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 23 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 24 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 25 | }; 26 | 27 | uint64_t testLookup( const alignedVector& vec ) 28 | { 29 | const uint8_t* p = (const uint8_t*)vec.data(); 30 | const uint8_t* const pEnd = p + vec.size() * sizeof( uint16_t ); 31 | 32 | uint64_t res = BYTE_BIT_COUNTS[ *p ]; 33 | p++; 34 | 35 | for( ; p < pEnd; p++ ) 36 | res += BYTE_BIT_COUNTS[ *p ]; 37 | return res; 38 | } 39 | } -------------------------------------------------------------------------------- /ParseResults/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle( "ParseResults" )] 9 | [assembly: AssemblyDescription( "" )] 10 | [assembly: AssemblyConfiguration( "" )] 11 | [assembly: AssemblyCompany( "" )] 12 | [assembly: AssemblyProduct( "ParseResults" )] 13 | [assembly: AssemblyCopyright( "Copyright © 2016" )] 14 | [assembly: AssemblyTrademark( "" )] 15 | [assembly: AssemblyCulture( "" )] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible( false )] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid( "4b4bf54c-38cd-438e-823d-727f0d77d1d0" )] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion( "1.0.0.0" )] 36 | [assembly: AssemblyFileVersion( "1.0.0.0" )] 37 | -------------------------------------------------------------------------------- /SinCosPolyPrecision/SinCosPolyPrecision.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "../Test/SinCos/SinCosLinInt.hpp" 3 | 4 | class Stat 5 | { 6 | double m_sum; 7 | double m_max; 8 | int m_count; 9 | 10 | public: 11 | Stat() : m_sum( 0 ), m_max( 0 ), m_count( 0 ) {} 12 | 13 | void add( double v ) 14 | { 15 | v = abs( v ); 16 | m_count++; 17 | m_sum += v; 18 | m_max = std::max( m_max, v ); 19 | } 20 | 21 | void print( const char* title ) 22 | { 23 | const int digs = DBL_DECIMAL_DIG; 24 | printf( "%s\t%.*e\t%.*e\n", 25 | title, 26 | digs, m_sum / m_count, 27 | digs, m_max ); 28 | } 29 | }; 30 | 31 | int main() 32 | { 33 | using namespace DirectX; 34 | const int nTests = 1000; 35 | 36 | Stat sinHi, sinLo, cosHi, cosLo, sinLinInt, cosLinInt; 37 | 38 | const LinInt linInt; 39 | 40 | for( int i = 0; i < nTests; i++ ) 41 | { 42 | float angle = float( M_PI * i / double( nTests ) ); 43 | 44 | float sinPrecise = sinf( angle ); 45 | float cosPrecise = cosf( angle ); 46 | 47 | float s, c; 48 | XMScalarSinCos( &s, &c, angle ); 49 | sinHi.add( s - sinPrecise ); 50 | cosHi.add( c - cosPrecise ); 51 | 52 | XMScalarSinCosEst( &s, &c, angle ); 53 | sinLo.add( s - sinPrecise ); 54 | cosLo.add( c - cosPrecise ); 55 | 56 | linInt.sinCos( angle, s, c ); 57 | sinLinInt.add( s - sinPrecise ); 58 | cosLinInt.add( c - cosPrecise ); 59 | } 60 | 61 | sinHi.print( "High-degree sine" ); 62 | cosHi.print( "High-degree cosine" ); 63 | sinLo.print( "Low-degree sine" ); 64 | cosLo.print( "Low-degree cosine" ); 65 | sinLinInt.print( "LinInt sine" ); 66 | cosLinInt.print( "LinInt cosine" ); 67 | 68 | return 0; 69 | } -------------------------------------------------------------------------------- /Test/LookupTables.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "SinCos.h" 3 | #include "BitCount/BitCount.h" 4 | 5 | int result( stopwatch::duration dr ) 6 | { 7 | typedef std::chrono::duration ms; 8 | ms d = dr; 9 | printf( "%f\n", d.count() ); 10 | return 0; 11 | } 12 | 13 | int sinCos( int argc, char** argv ) 14 | { 15 | int a = -1; 16 | if( argc > 0 ) 17 | a = atoi( *argv ); 18 | printf( "%i\t", a ); 19 | 20 | using namespace nsSinCos; 21 | switch( a ) 22 | { 23 | case 1: 24 | return result( testSinCos() ); 25 | case 2: 26 | return result( testSinCos() ); 27 | case 3: 28 | return result( testSinCos() ); 29 | case 4: 30 | return result( testSinCos() ); 31 | case 5: 32 | return result( testSinCos() ); 33 | } 34 | 35 | printf( "Unknown algorithm.\nPossible values: 1 = C standard library, 2 = lookup table, 3 = GTEngine high-degree poly, 4 = DirectX high-degree poly, 5 = DirectX low-degree poly" ); 36 | return -1; 37 | } 38 | 39 | int bitCnt( int argc, char** argv ) 40 | { 41 | if( argc <= 0 ) 42 | { 43 | nsBitCnt::test( -1 ); 44 | return 1; 45 | } 46 | 47 | int a = atoi( *argv ); 48 | return result( nsBitCnt::test( a ) ); 49 | } 50 | 51 | int main( int argc, char** argv ) 52 | { 53 | if( argc <= 1 || argc > 3 ) 54 | { 55 | printf( "Usage: LookupTables \n" ); 56 | return 1; 57 | } 58 | 59 | if( 0 == _strcmpi( argv[ 1 ], "sincos" ) ) 60 | { 61 | return sinCos( argc - 2, argv + 2 ); 62 | } 63 | 64 | if( 0 == _strcmpi( argv[ 1 ], "bitcount" ) ) 65 | { 66 | return bitCnt( argc - 2, argv + 2 ); 67 | } 68 | 69 | 70 | printf( "E_NOTIMPL\n" ); 71 | return 1; 72 | } -------------------------------------------------------------------------------- /Test/SinCos/SinCosLinInt.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define _PI (float)( M_PI ) 4 | #define _2PI (float)( M_PI * 2 ) 5 | #define _PIDIV2 (float)( M_PI / 2 ) 6 | #define _1DIV2PI (float)( 1.0 / ( M_PI * 2 ) ) 7 | 8 | class LinInt 9 | { 10 | static const size_t size = 256; 11 | // Two tables for sin and cos, from -PI/2 to +PI/2, interleaved for cache friendliness 12 | std::array lookupTable; 13 | const float indexMul; 14 | 15 | // value should be from -PI/2 to +PI/2 16 | inline void lookup( float value, float& sin, float& cos ) const 17 | { 18 | // Calculate index + coefficients for linear interpolation 19 | value *= indexMul; 20 | int i1 = int( floor( value ) ); 21 | value -= i1; 22 | i1 += ( size / 2 ); 23 | const float b = 1.0f - value; 24 | 25 | // Interpolate both sin + cos using same coefficients. 26 | const float* entries = lookupTable.data() + ( i1 << 1 ); 27 | sin = entries[ 0 ] * b + entries[ 2 ] * value; 28 | cos = entries[ 1 ] * b + entries[ 3 ] * value; 29 | } 30 | public: 31 | LinInt() : indexMul( float( size / M_PI ) ) 32 | { 33 | for( int i = 0; i <= size; i++ ) 34 | { 35 | double val = M_PI * ( ( i - ( size / 2 ) ) / double( size ) ); 36 | lookupTable[ i * 2 ] = float( sin( val ) ); 37 | lookupTable[ i * 2 + 1 ] = float( cos( val ) ); 38 | } 39 | } 40 | 41 | inline void sinCos( float Value, float& sin, float& cos ) const 42 | { 43 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 44 | float quotient = _1DIV2PI * Value; 45 | if( Value >= 0.0f ) 46 | { 47 | quotient = (float)( (int)( quotient + 0.5f ) ); 48 | } 49 | else 50 | { 51 | quotient = (float)( (int)( quotient - 0.5f ) ); 52 | } 53 | float y = Value - _2PI * quotient; 54 | 55 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 56 | float sign; 57 | if( y > _PIDIV2 ) 58 | { 59 | y = _PI - y; 60 | sign = -1.0f; 61 | } 62 | else if( y < -_PIDIV2 ) 63 | { 64 | y = -_PI - y; 65 | sign = -1.0f; 66 | } 67 | else 68 | { 69 | sign = +1.0f; 70 | } 71 | // Interpolate both 72 | float cosNoSign; 73 | lookup( y, sin, cosNoSign ); 74 | // Restore cos sign 75 | cos = cosNoSign * sign; 76 | } 77 | }; -------------------------------------------------------------------------------- /Test/LookupTables.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 6 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 7 | 8 | 9 | {949c2519-00bd-4a39-9f34-5af9561dc569} 10 | 11 | 12 | {ee34c638-c5f4-47f4-9863-fdfdab6f36c8} 13 | 14 | 15 | 16 | 17 | 18 | 19 | BitCount 20 | 21 | 22 | BitCount 23 | 24 | 25 | SinCos 26 | 27 | 28 | SinCos 29 | 30 | 31 | SinCos 32 | 33 | 34 | SinCos 35 | 36 | 37 | SinCos 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | BitCount 46 | 47 | 48 | BitCount 49 | 50 | 51 | BitCount 52 | 53 | 54 | BitCount 55 | 56 | 57 | BitCount 58 | 59 | 60 | SinCos 61 | 62 | 63 | -------------------------------------------------------------------------------- /Test/SinCos/SinCos.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "SinCos.h" 3 | #include 4 | #include 5 | #include "SinCosLinInt.hpp" 6 | 7 | using namespace nsSinCos; 8 | 9 | struct StdLib 10 | { 11 | inline void sinCos( float val, float& s, float& c ) const 12 | { 13 | s = sinf( val ); 14 | c = cosf( val ); 15 | } 16 | }; 17 | 18 | 19 | #ifdef _WIN32 20 | struct DX 21 | { 22 | inline void sinCos( float val, float& s, float& c ) const 23 | { 24 | DirectX::XMScalarSinCos( &s, &c, val ); 25 | } 26 | }; 27 | struct DxLow 28 | { 29 | inline void sinCos( float val, float& s, float& c ) const 30 | { 31 | DirectX::XMScalarSinCosEst( &s, &c, val ); 32 | } 33 | }; 34 | #endif 35 | 36 | struct Gte 37 | { 38 | inline void sinCos( float val, float& s, float& c ) const 39 | { 40 | s = gte::SinEstimate::DegreeRR<11>( val ); 41 | c = gte::CosEstimate::DegreeRR<10>( val ); 42 | } 43 | }; 44 | 45 | 46 | template 47 | stopwatch::duration testImpl( const vector& src, const Algo& algo ) 48 | { 49 | float rs = 0, rc = 0; 50 | float s, c; 51 | auto start = stopwatch::now(); 52 | for( float i : src ) 53 | { 54 | algo.sinCos( i, s, c ); 55 | rs += s; 56 | rc += c; 57 | } 58 | auto stop = stopwatch::now(); 59 | auto duration = stop - start; 60 | 61 | const double avgSin = double( rs ) / double( src.size() ); 62 | const double avgCos = double( rc ) / double( src.size() ); 63 | printf( "%f / %f\t", avgSin, avgCos ); 64 | return duration; 65 | } 66 | 67 | template 68 | static stopwatch::duration testImpl() 69 | { 70 | resetRand(); 71 | 72 | // static const size_t testSize = 10 * 1000; 73 | static const size_t testSize = 10 * 1000 * 1000; 74 | 75 | vector src; 76 | src.resize( testSize ); 77 | for( float& f : src ) 78 | f = float( 2.0 * rand() * M_PI / RAND_MAX ); 79 | 80 | // const StdLib algo; 81 | // const DX algo; 82 | const algo algo; 83 | return testImpl( src, algo ); 84 | } 85 | 86 | namespace nsSinCos 87 | { 88 | template<> 89 | stopwatch::duration testSinCos() 90 | { 91 | return testImpl(); 92 | } 93 | 94 | template<> 95 | stopwatch::duration testSinCos() 96 | { 97 | return testImpl(); 98 | } 99 | 100 | template<> 101 | stopwatch::duration testSinCos() 102 | { 103 | return testImpl(); 104 | } 105 | 106 | #ifdef _WIN32 107 | template<> 108 | stopwatch::duration testSinCos() 109 | { 110 | return testImpl(); 111 | } 112 | 113 | template<> 114 | stopwatch::duration testSinCos() 115 | { 116 | return testImpl(); 117 | } 118 | #endif 119 | } -------------------------------------------------------------------------------- /ParseResults/ParseResults.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0} 8 | Exe 9 | Properties 10 | ParseResults 11 | ParseResults 12 | v4.5.2 13 | 512 14 | true 15 | 16 | 17 | AnyCPU 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | AnyCPU 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 60 | -------------------------------------------------------------------------------- /Test/BitCount/BitCount.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "BitCount.h" 3 | #include "align.h" 4 | 5 | // static const size_t szArr = 10000; 6 | static const size_t szArr = 100000000; // 100M elements, ~190 MB RAM 7 | 8 | static_assert( 0 == ( szArr * sizeof( uint16_t ) % 16 ), "Invalid array length: must be a multiple of 16 bytes, because SSE" ); 9 | 10 | namespace nsBitCnt 11 | { 12 | using fnTest = uint64_t( * )( const alignedVector& ); 13 | 14 | static inline stopwatch::duration measure( const alignedVector& vec, fnTest test ) 15 | { 16 | const auto start = stopwatch::now(); 17 | const uint64_t bits = test( vec ); 18 | const auto stop = stopwatch::now(); 19 | printf( "%llu\t", bits ); 20 | return stop - start; 21 | } 22 | 23 | static bool hasSsse3() 24 | { 25 | int regs[ 4 ]; 26 | __cpuid( regs, 1 ); 27 | const int ecx = regs[ 2 ]; 28 | return 0 != ( ecx & ( 1 << 9 ) ); 29 | } 30 | 31 | static bool hasXop() 32 | { 33 | int regs[ 4 ]; 34 | __cpuid( regs, 0 ); 35 | 36 | static const int idAmd[ 3 ] = 37 | { 38 | 0x68747541, 0x444d4163, 0x69746e65 39 | }; 40 | 41 | if( 0 != memcmp( regs + 1, idAmd, 12 ) ) 42 | return false; 43 | 44 | __cpuid( regs, 0x80000001 ); 45 | const int ecx = regs[ 2 ]; 46 | return 0 != ( ecx & ( 1 << 11 ) ); 47 | } 48 | 49 | static bool hasBuiltin() 50 | { 51 | int regs[ 4 ]; 52 | __cpuid( regs, 1 ); 53 | const int ecx = regs[ 2 ]; 54 | return 0 != ( ecx & ( 1 << 23 ) ); 55 | } 56 | 57 | static inline fnTest testFunc( int nAlgo ) 58 | { 59 | switch( nAlgo ) 60 | { 61 | case 1: 62 | return &testLookup; 63 | case 2: 64 | return &testSse2; 65 | case 3: 66 | if( !hasSsse3() ) 67 | { 68 | printf( "This CPU doesn't support SSSE3 instruction set." ); 69 | return nullptr; 70 | } 71 | return &testSsse3; 72 | case 4: 73 | if( !hasXop() ) 74 | { 75 | printf( "This CPU doesn't support XOP instruction set." ); 76 | return nullptr; 77 | } 78 | return &testXop; 79 | case 5: 80 | if( !hasBuiltin() ) 81 | { 82 | printf( "This CPU doesn't support POPCNT instruction." ); 83 | return nullptr; 84 | } 85 | return &testBuiltin; 86 | default: 87 | break; 88 | } 89 | printf( "Unknown algorithm.\nPossible values: 1 = lookup table, 2 = SSE2, 3 = SSSE3, 4 = XOP, 5 = POPCNT\n" ); 90 | return nullptr; 91 | } 92 | 93 | stopwatch::duration test( int nAlgo ) 94 | { 95 | printf( "%i\t", nAlgo ); 96 | 97 | const fnTest fn = testFunc( nAlgo ); 98 | if( nullptr == fn ) 99 | return stopwatch::duration(); 100 | 101 | alignedVector vec; 102 | vec.resize( szArr ); 103 | 104 | resetRand(); 105 | for( uint16_t& i : vec ) 106 | i = rand(); 107 | 108 | return measure( vec, fn ); 109 | } 110 | } -------------------------------------------------------------------------------- /Test/BitCount/BitCountSse.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "BitCount.h" 3 | 4 | namespace nsBitCnt 5 | { 6 | static const __m128i popcount_mask1 = _mm_set1_epi8( 0x77 ); 7 | static const __m128i popcount_mask2 = _mm_set1_epi8( 0x0F ); 8 | static inline __m128i popcnt8_sse2( __m128i x ) 9 | { 10 | __m128i n; 11 | // Count bits in each 4-bit field. 12 | n = _mm_srli_epi64( x, 1 ); 13 | n = _mm_and_si128( popcount_mask1, n ); 14 | x = _mm_sub_epi8( x, n ); 15 | n = _mm_srli_epi64( n, 1 ); 16 | n = _mm_and_si128( popcount_mask1, n ); 17 | x = _mm_sub_epi8( x, n ); 18 | n = _mm_srli_epi64( n, 1 ); 19 | n = _mm_and_si128( popcount_mask1, n ); 20 | x = _mm_sub_epi8( x, n ); 21 | x = _mm_add_epi8( x, _mm_srli_epi16( x, 4 ) ); 22 | x = _mm_and_si128( popcount_mask2, x ); 23 | return x; 24 | } 25 | 26 | static inline __m128i popcnt8_ssse3( __m128i n ) 27 | { 28 | static const __m128i popcount_mask = _mm_set1_epi8( 0x0F ); 29 | static const __m128i popcount_table = _mm_setr_epi8( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 ); 30 | 31 | const __m128i pcnt0 = _mm_shuffle_epi8( popcount_table, _mm_and_si128( n, popcount_mask ) ); 32 | const __m128i pcnt1 = _mm_shuffle_epi8( popcount_table, _mm_and_si128( _mm_srli_epi16( n, 4 ), popcount_mask ) ); 33 | return _mm_add_epi8( pcnt0, pcnt1 ); 34 | } 35 | 36 | static inline __m128i popcount8_xop( __m128i n ) 37 | { 38 | static const __m128i popcount_mask = _mm_set1_epi8( 0x0F ); 39 | static const __m128i popcount_table = _mm_setr_epi8( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 ); 40 | static const __m128i popcount_shift = _mm_set1_epi8( -4 ); 41 | 42 | const __m128i pcnt0 = _mm_perm_epi8( popcount_table, popcount_table, _mm_and_si128( n, popcount_mask ) ); 43 | const __m128i pcnt1 = _mm_perm_epi8( popcount_table, popcount_table, _mm_shl_epi8( n, popcount_shift ) ); 44 | return _mm_add_epi8( pcnt0, pcnt1 ); 45 | } 46 | 47 | template 48 | static inline uint64_t countBitsImpl( const alignedVector& vec, pcnt8 pc8 ) 49 | { 50 | const __m128i* p = ( const __m128i* )vec.data(); 51 | const __m128i* pEnd = p + ( vec.size() * sizeof( uint16_t ) / sizeof( __m128i ) ); 52 | 53 | const __m128i zero = _mm_setzero_si128(); 54 | __m128i res = zero; 55 | for( ; p < pEnd; p++ ) 56 | { 57 | const __m128i cnt8 = pc8( *p ); 58 | const __m128i cnt64 = _mm_sad_epu8( cnt8, zero ); 59 | res = _mm_add_epi64( res, cnt64 ); 60 | } 61 | return res.m128i_u64[ 0 ] + res.m128i_u64[ 1 ]; 62 | } 63 | 64 | uint64_t testSse2( const alignedVector& vec ) 65 | { 66 | return countBitsImpl( vec, &popcnt8_sse2 ); 67 | } 68 | uint64_t testSsse3( const alignedVector& vec ) 69 | { 70 | return countBitsImpl( vec, &popcnt8_ssse3 ); 71 | } 72 | uint64_t testXop( const alignedVector& vec ) 73 | { 74 | return countBitsImpl( vec, &popcount8_xop ); 75 | } 76 | } -------------------------------------------------------------------------------- /Test/SinCos/GTEngineDEF.h: -------------------------------------------------------------------------------- 1 | // David Eberly, Geometric Tools, Redmond WA 98052 2 | // Copyright (c) 1998-2016 3 | // Distributed under the Boost Software License, Version 1.0. 4 | // http://www.boost.org/LICENSE_1_0.txt 5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt 6 | // File Version: 3.0.0 (2016/06/19) 7 | 8 | #pragma once 9 | 10 | //---------------------------------------------------------------------------- 11 | // The platform specification. 12 | // 13 | // __MSWINDOWS__ : Microsoft Windows (WIN32 or WIN64) 14 | // __APPLE__ : Macintosh OS X 15 | // __LINUX__ : Linux or Cygwin 16 | //---------------------------------------------------------------------------- 17 | 18 | #if !defined(__LINUX__) && (defined(WIN32) || defined(_WIN64)) 19 | #define __MSWINDOWS__ 20 | 21 | #if !defined(_MSC_VER) 22 | #error Microsoft Visual Studio 2013 or later is required. 23 | #endif 24 | 25 | // MSVC 6 is version 12.0 26 | // MSVC 7.0 is version 13.0 (MSVS 2002) 27 | // MSVC 7.1 is version 13.1 (MSVS 2003) 28 | // MSVC 8.0 is version 14.0 (MSVS 2005) 29 | // MSVC 9.0 is version 15.0 (MSVS 2008) 30 | // MSVC 10.0 is version 16.0 (MSVS 2010) 31 | // MSVC 11.0 is version 17.0 (MSVS 2012) 32 | // MSVC 12.0 is version 18.0 (MSVS 2013) 33 | // MSVC 14.0 is version 19.0 (MSVS 2015) 34 | // Currently, projects are provided only for MSVC 12.0 and 14.0. 35 | #if _MSC_VER < 1800 36 | #error Microsoft Visual Studio 2013 or later is required. 37 | #endif 38 | 39 | // Debug build values (choose_your_value is 0, 1, or 2) 40 | // 0: Disables checked iterators and disables iterator debugging. 41 | // 1: Enables checked iterators and disables iterator debugging. 42 | // 2: (default) Enables iterator debugging; checked iterators are not relevant. 43 | // 44 | // Release build values (choose_your_value is 0 or 1) 45 | // 0: (default) Disables checked iterators. 46 | // 1: Enables checked iterators; iterator debugging is not relevant. 47 | // 48 | // #define _ITERATOR_DEBUG_LEVEL choose_your_value 49 | 50 | #endif // WIN32 or _WIN64 51 | 52 | // TODO: Windows DLL configurations have not yet been added to the project, 53 | // but these defines are required to support them (when we do add them). 54 | // 55 | // Add GTE_EXPORT to project preprocessor options for dynamic library 56 | // configurations to export their symbols. 57 | #if defined(GTE_EXPORT) 58 | // For the dynamic library configurations. 59 | #define GTE_IMPEXP __declspec(dllexport) 60 | #else 61 | // For a client of the dynamic library or for the static library 62 | // configurations. 63 | #define GTE_IMPEXP 64 | #endif 65 | 66 | // Expose exactly one of these. 67 | #define GTE_USE_ROW_MAJOR 68 | //#define GTE_USE_COL_MAJOR 69 | 70 | // Expose exactly one of these. 71 | #define GTE_USE_MAT_VEC 72 | //#define GTE_USE_VEC_MAT 73 | 74 | #if (defined(GTE_USE_ROW_MAJOR) && defined(GTE_USE_COL_MAJOR)) || (!defined(GTE_USE_ROW_MAJOR) && !defined(GTE_USE_COL_MAJOR)) 75 | #error Exactly one storage order must be specified. 76 | #endif 77 | 78 | #if (defined(GTE_USE_MAT_VEC) && defined(GTE_USE_VEC_MAT)) || (!defined(GTE_USE_MAT_VEC) && !defined(GTE_USE_VEC_MAT)) 79 | #error Exactly one multiplication convention must be specified. 80 | #endif 81 | -------------------------------------------------------------------------------- /LookupTables.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LookupTables", "Test\LookupTables.vcxproj", "{476AD73F-6B56-4B38-B499-1FC642BB9AEC}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParseResults", "ParseResults\ParseResults.csproj", "{4B4BF54C-38CD-438E-823D-727F0D77D1D0}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SinCosPolyPrecision", "SinCosPolyPrecision\SinCosPolyPrecision.vcxproj", "{B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug|Any CPU = Debug|Any CPU 15 | Debug|x64 = Debug|x64 16 | Debug|x86 = Debug|x86 17 | Release|Any CPU = Release|Any CPU 18 | Release|x64 = Release|x64 19 | Release|x86 = Release|x86 20 | EndGlobalSection 21 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 22 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|Any CPU.ActiveCfg = Debug|Win32 23 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x64.ActiveCfg = Debug|x64 24 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x64.Build.0 = Debug|x64 25 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x86.ActiveCfg = Debug|Win32 26 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x86.Build.0 = Debug|Win32 27 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|Any CPU.ActiveCfg = Release|Win32 28 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x64.ActiveCfg = Release|x64 29 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x64.Build.0 = Release|x64 30 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x86.ActiveCfg = Release|Win32 31 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x86.Build.0 = Release|Win32 32 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x64.ActiveCfg = Debug|Any CPU 35 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x64.Build.0 = Debug|Any CPU 36 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x86.ActiveCfg = Debug|Any CPU 37 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x86.Build.0 = Debug|Any CPU 38 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|Any CPU.Build.0 = Release|Any CPU 40 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x64.ActiveCfg = Release|Any CPU 41 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x64.Build.0 = Release|Any CPU 42 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x86.ActiveCfg = Release|Any CPU 43 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x86.Build.0 = Release|Any CPU 44 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|Any CPU.ActiveCfg = Debug|Win32 45 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x64.ActiveCfg = Debug|x64 46 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x64.Build.0 = Debug|x64 47 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x86.ActiveCfg = Debug|Win32 48 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x86.Build.0 = Debug|Win32 49 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|Any CPU.ActiveCfg = Release|Win32 50 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x64.ActiveCfg = Release|x64 51 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x64.Build.0 = Release|x64 52 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x86.ActiveCfg = Release|Win32 53 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x86.Build.0 = Release|Win32 54 | EndGlobalSection 55 | GlobalSection(SolutionProperties) = preSolution 56 | HideSolutionNode = FALSE 57 | EndGlobalSection 58 | EndGlobal 59 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # A Case Against Lookup Tables In many places on the internet, there’s an advice “to improve performance of your code, use a pre-calculated lookup table”. My point is, now in 2016, in 80% cases, the advice is terribly wrong. I’ve wrote a couple of benchmarks to demonstrate. **The first test**, SinCos, calculates sine + cosine of random angles. [Inspired by a question on stackoverflow](http://stackoverflow.com/q/31814105/126995). I’ve implemented the following algorithms: 1. Standard C runtime library, i.e. sinf + cosf functions. 2. A lookup table, the length is 256 values, with linear interpolation between the values, and optimized for this test case (the test calculates both sin and cosine, so the table holds the sin and cos values in adjacent memory addresses). 3. Minimax high-degree (11 for sine, 12 for cosine) polynomial approximation, as implemented in [Geometric Tools](https://www.geometrictools.com/Source/Mathematics.html) library 4. Same polynomial approximation (11 for sine, 12 for cosine), as implemented in Microsoft DirectX SDK in [XMScalarSinCos](https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.scalar.xmscalarsincos%28v=vs.85%29.aspx) . Slightly faster than the previous one because optimized for simultaneous sin+cos case I have in my test. 5. Lower-degree polynomial approximation (7 for sine, 6 for cosine), as implemented in Microsoft DirectX SDK by [XMScalarSinCosEst](https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.scalar.xmscalarsincosest(v=vs.85).aspx) **The second test**, BitCount, calculates count of set bits in the array of random 16-bit values. [Inspired by a google interview question](http://www.gwan.com/blog/20160405.html), where an interviewer expected the incorrect “lookup table” answer. In the question there were merely 10000 short values. On today's hardware, the data set is too small for meaningful benchmark. So my array is larger, with 100 million short values, taking about 190 MB RAM. I’ve implemented following algorithms: 1. 256 bytes lookup table. 2. SSE2 manually vectorized. 3. SSSE3 manually vectorized. 4. XOP manually vectorized (untested because don't have the hardware). 5. POPCNT instruction. The algorithms 2-4 are [from the SO](http://stackoverflow.com/a/17355341/126995). ## Hardware Currently, I happen to have three PCs on my desk. * A desktop with [i5-4460](http://ark.intel.com/products/80817/Intel-Core-i5-4460-Processor-6M-Cache-up-to-3_40-GHz), 16GB RAM, Windows 10 x64 * A laptop with [i5-2410m](http://ark.intel.com/products/52224/Intel-Core-i5-2410M-Processor-3M-Cache-up-to-2_90-GHz), 8GB RAM, Windows 8.1 x64. * A tablet with Atom [Z3735](http://ark.intel.com/products/80275/Intel-Atom-Processor-Z3735G-2M-Cache-up-to-1_83-GHz), 1GB RAM, Windows 10 x86. ## Test Results Here’s absolute values in milliseconds: ![Screenshot](absolute.png) Here’s values in percent, relative to the lookup tables performance: ![Screenshot](relative.png) You see the pattern here? In 100% of my test cases, using a lookup tables is not good for performance. And in some cases the difference is **huge**, like 7 times slower than other implementation. ## Final Words The people who wrote that classic programming books, were using very old computers. Since those classic books were written, CPU computation speed improved by orders of magnitude compared to RAM latency. Even the highly-sophisticated multi-level caches only help to some extent. That's why on modern hardware, implementing a lookup table to save CPU time is very often a bad idea. Sure, there are cases when a lookup tables indeed improves performance. Like when the content is really hard to compute, [rainbow tables](https://en.wikipedia.org/wiki/Rainbow_table) being the extreme example. But please, stop using lookup tables for trivial things like trigonometry or bit counting. When running on modern hardware, a CPU computes stuff much faster than you think it does, and accesses memory much slower than you think it does. Also, please stop writing and reading those deprecated programming books that teach people to use lookup tables to optimize performance. ## Bonus Chapter: Polynomial Approximation Precision You might be wondering “polynomial approximation sounds scary, how precise is that thing?” It’s very precise. Even the low-degree approximation is more precise than my 256-values lookup table. Here’s the data: ![Screenshot](linIntError.png) The app that calculates those errors is also in this repository, SinCosPolyPrecision subfolder. Not only lookup table for sine and cosine is slower, it’s less precise as well. -------------------------------------------------------------------------------- /Test/SinCos/Mathematics/GteSinEstimate.h: -------------------------------------------------------------------------------- 1 | // David Eberly, Geometric Tools, Redmond WA 98052 2 | // Copyright (c) 1998-2016 3 | // Distributed under the Boost Software License, Version 1.0. 4 | // http://www.boost.org/LICENSE_1_0.txt 5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt 6 | // File Version: 3.0.0 (2016/06/19) 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | // Minimax polynomial approximations to sin(x). The polynomial p(x) of 13 | // degree D has only odd-power terms, is required to have linear term x, 14 | // and p(pi/2) = sin(pi/2) = 1. It minimizes the quantity 15 | // maximum{|sin(x) - p(x)| : x in [-pi/2,pi/2]} over all polynomials of 16 | // degree D subject to the constraints mentioned. 17 | 18 | namespace gte 19 | { 20 | 21 | template 22 | class SinEstimate 23 | { 24 | public: 25 | // The input constraint is x in [-pi/2,pi/2]. For example, 26 | // float x; // in [-pi/2,pi/2] 27 | // float result = SinEstimate::Degree<3>(x); 28 | template 29 | inline static Real Degree(Real x); 30 | 31 | // The input x can be any real number. Range reduction is used to 32 | // generate a value y in [-pi/2,pi/2] for which sin(y) = sin(x). 33 | // For example, 34 | // float x; // x any real number 35 | // float result = SinEstimate::DegreeRR<3>(x); 36 | template 37 | inline static Real DegreeRR(Real x); 38 | 39 | private: 40 | // Metaprogramming and private implementation to allow specialization of 41 | // a template member function. 42 | template struct degree {}; 43 | inline static Real Evaluate(degree<3>, Real x); 44 | inline static Real Evaluate(degree<5>, Real x); 45 | inline static Real Evaluate(degree<7>, Real x); 46 | inline static Real Evaluate(degree<9>, Real x); 47 | inline static Real Evaluate(degree<11>, Real x); 48 | 49 | // Support for range reduction. 50 | inline static Real Reduce(Real x); 51 | }; 52 | 53 | 54 | template 55 | template 56 | inline Real SinEstimate::Degree(Real x) 57 | { 58 | return Evaluate(degree(), x); 59 | } 60 | 61 | template 62 | template 63 | inline Real SinEstimate::DegreeRR(Real x) 64 | { 65 | return Degree(Reduce(x)); 66 | } 67 | 68 | template 69 | inline Real SinEstimate::Evaluate(degree<3>, Real x) 70 | { 71 | Real xsqr = x * x; 72 | Real poly; 73 | poly = (Real)GTE_C_SIN_DEG3_C1; 74 | poly = (Real)GTE_C_SIN_DEG3_C0 + poly * xsqr; 75 | poly = poly * x; 76 | return poly; 77 | } 78 | 79 | template 80 | inline Real SinEstimate::Evaluate(degree<5>, Real x) 81 | { 82 | Real xsqr = x * x; 83 | Real poly; 84 | poly = (Real)GTE_C_SIN_DEG5_C2; 85 | poly = (Real)GTE_C_SIN_DEG5_C1 + poly * xsqr; 86 | poly = (Real)GTE_C_SIN_DEG5_C0 + poly * xsqr; 87 | poly = poly * x; 88 | return poly; 89 | } 90 | 91 | template 92 | inline Real SinEstimate::Evaluate(degree<7>, Real x) 93 | { 94 | Real xsqr = x * x; 95 | Real poly; 96 | poly = (Real)GTE_C_SIN_DEG7_C3; 97 | poly = (Real)GTE_C_SIN_DEG7_C2 + poly * xsqr; 98 | poly = (Real)GTE_C_SIN_DEG7_C1 + poly * xsqr; 99 | poly = (Real)GTE_C_SIN_DEG7_C0 + poly * xsqr; 100 | poly = poly * x; 101 | return poly; 102 | } 103 | 104 | template 105 | inline Real SinEstimate::Evaluate(degree<9>, Real x) 106 | { 107 | Real xsqr = x * x; 108 | Real poly; 109 | poly = (Real)GTE_C_SIN_DEG9_C4; 110 | poly = (Real)GTE_C_SIN_DEG9_C3 + poly * xsqr; 111 | poly = (Real)GTE_C_SIN_DEG9_C2 + poly * xsqr; 112 | poly = (Real)GTE_C_SIN_DEG9_C1 + poly * xsqr; 113 | poly = (Real)GTE_C_SIN_DEG9_C0 + poly * xsqr; 114 | poly = poly * x; 115 | return poly; 116 | } 117 | 118 | template 119 | inline Real SinEstimate::Evaluate(degree<11>, Real x) 120 | { 121 | Real xsqr = x * x; 122 | Real poly; 123 | poly = (Real)GTE_C_SIN_DEG11_C5; 124 | poly = (Real)GTE_C_SIN_DEG11_C4 + poly * xsqr; 125 | poly = (Real)GTE_C_SIN_DEG11_C3 + poly * xsqr; 126 | poly = (Real)GTE_C_SIN_DEG11_C2 + poly * xsqr; 127 | poly = (Real)GTE_C_SIN_DEG11_C1 + poly * xsqr; 128 | poly = (Real)GTE_C_SIN_DEG11_C0 + poly * xsqr; 129 | poly = poly * x; 130 | return poly; 131 | } 132 | 133 | template 134 | inline Real SinEstimate::Reduce(Real x) 135 | { 136 | // Map x to y in [-pi,pi], x = 2*pi*quotient + remainder. 137 | Real quotient = (Real)GTE_C_INV_TWO_PI * x; 138 | if (x >= (Real)0) 139 | { 140 | quotient = (Real)((int)(quotient + (Real)0.5)); 141 | } 142 | else 143 | { 144 | quotient = (Real)((int)(quotient - (Real)0.5)); 145 | } 146 | Real y = x - (Real)GTE_C_TWO_PI * quotient; 147 | 148 | // Map y to [-pi/2,pi/2] with sin(y) = sin(x). 149 | if (y > (Real)GTE_C_HALF_PI) 150 | { 151 | y = (Real)GTE_C_PI - y; 152 | } 153 | else if (y < (Real)-GTE_C_HALF_PI) 154 | { 155 | y = (Real)-GTE_C_PI - y; 156 | } 157 | return y; 158 | } 159 | 160 | 161 | } 162 | -------------------------------------------------------------------------------- /Test/SinCos/Mathematics/GteCosEstimate.h: -------------------------------------------------------------------------------- 1 | // David Eberly, Geometric Tools, Redmond WA 98052 2 | // Copyright (c) 1998-2016 3 | // Distributed under the Boost Software License, Version 1.0. 4 | // http://www.boost.org/LICENSE_1_0.txt 5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt 6 | // File Version: 3.0.0 (2016/06/19) 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | // Minimax polynomial approximations to cos(x). The polynomial p(x) of 13 | // degree D has only even-power terms, is required to have constant term 1, 14 | // and p(pi/2) = cos(pi/2) = 0. It minimizes the quantity 15 | // maximum{|cos(x) - p(x)| : x in [-pi/2,pi/2]} over all polynomials of 16 | // degree D subject to the constraints mentioned. 17 | 18 | namespace gte 19 | { 20 | 21 | template 22 | class CosEstimate 23 | { 24 | public: 25 | // The input constraint is x in [-pi/2,pi/2]. For example, 26 | // float x; // in [-pi/2,pi/2] 27 | // float result = CosEstimate::Degree<4>(x); 28 | template 29 | inline static Real Degree(Real x); 30 | 31 | // The input x can be any real number. Range reduction is used to 32 | // generate a value y in [-pi/2,pi/2] and a sign s for which 33 | // cos(y) = s*cos(x). For example, 34 | // float x; // x any real number 35 | // float result = CosEstimate::DegreeRR<3>(x); 36 | template 37 | inline static Real DegreeRR(Real x); 38 | 39 | private: 40 | // Metaprogramming and private implementation to allow specialization of 41 | // a template member function. 42 | template struct degree {}; 43 | inline static Real Evaluate(degree<2>, Real x); 44 | inline static Real Evaluate(degree<4>, Real x); 45 | inline static Real Evaluate(degree<6>, Real x); 46 | inline static Real Evaluate(degree<8>, Real x); 47 | inline static Real Evaluate(degree<10>, Real x); 48 | 49 | // Support for range reduction. 50 | inline static void Reduce(Real x, Real& y, Real& sign); 51 | }; 52 | 53 | 54 | template 55 | template 56 | inline Real CosEstimate::Degree(Real x) 57 | { 58 | return Evaluate(degree(), x); 59 | } 60 | 61 | template 62 | template 63 | inline Real CosEstimate::DegreeRR(Real x) 64 | { 65 | Real y, sign; 66 | Reduce(x, y, sign); 67 | Real poly = sign * Degree(y); 68 | return poly; 69 | } 70 | 71 | template 72 | inline Real CosEstimate::Evaluate(degree<2>, Real x) 73 | { 74 | Real xsqr = x * x; 75 | Real poly; 76 | poly = (Real)GTE_C_COS_DEG2_C1; 77 | poly = (Real)GTE_C_COS_DEG2_C0 + poly * xsqr; 78 | return poly; 79 | } 80 | 81 | template 82 | inline Real CosEstimate::Evaluate(degree<4>, Real x) 83 | { 84 | Real xsqr = x * x; 85 | Real poly; 86 | poly = (Real)GTE_C_COS_DEG4_C2; 87 | poly = (Real)GTE_C_COS_DEG4_C1 + poly * xsqr; 88 | poly = (Real)GTE_C_COS_DEG4_C0 + poly * xsqr; 89 | return poly; 90 | } 91 | 92 | template 93 | inline Real CosEstimate::Evaluate(degree<6>, Real x) 94 | { 95 | Real xsqr = x * x; 96 | Real poly; 97 | poly = (Real)GTE_C_COS_DEG6_C3; 98 | poly = (Real)GTE_C_COS_DEG6_C2 + poly * xsqr; 99 | poly = (Real)GTE_C_COS_DEG6_C1 + poly * xsqr; 100 | poly = (Real)GTE_C_COS_DEG6_C0 + poly * xsqr; 101 | return poly; 102 | } 103 | 104 | template 105 | inline Real CosEstimate::Evaluate(degree<8>, Real x) 106 | { 107 | Real xsqr = x * x; 108 | Real poly; 109 | poly = (Real)GTE_C_COS_DEG8_C4; 110 | poly = (Real)GTE_C_COS_DEG8_C3 + poly * xsqr; 111 | poly = (Real)GTE_C_COS_DEG8_C2 + poly * xsqr; 112 | poly = (Real)GTE_C_COS_DEG8_C1 + poly * xsqr; 113 | poly = (Real)GTE_C_COS_DEG8_C0 + poly * xsqr; 114 | return poly; 115 | } 116 | 117 | template 118 | inline Real CosEstimate::Evaluate(degree<10>, Real x) 119 | { 120 | Real xsqr = x * x; 121 | Real poly; 122 | poly = (Real)GTE_C_COS_DEG10_C5; 123 | poly = (Real)GTE_C_COS_DEG10_C4 + poly * xsqr; 124 | poly = (Real)GTE_C_COS_DEG10_C3 + poly * xsqr; 125 | poly = (Real)GTE_C_COS_DEG10_C2 + poly * xsqr; 126 | poly = (Real)GTE_C_COS_DEG10_C1 + poly * xsqr; 127 | poly = (Real)GTE_C_COS_DEG10_C0 + poly * xsqr; 128 | return poly; 129 | } 130 | 131 | template 132 | inline void CosEstimate::Reduce(Real x, Real& y, Real& sign) 133 | { 134 | // Map x to y in [-pi,pi], x = 2*pi*quotient + remainder. 135 | Real quotient = (Real)GTE_C_INV_TWO_PI * x; 136 | if (x >= (Real)0) 137 | { 138 | quotient = (Real)((int)(quotient + (Real)0.5)); 139 | } 140 | else 141 | { 142 | quotient = (Real)((int)(quotient - (Real)0.5)); 143 | } 144 | y = x - (Real)GTE_C_TWO_PI * quotient; 145 | 146 | // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). 147 | if (y > (Real)GTE_C_HALF_PI) 148 | { 149 | y = (Real)GTE_C_PI - y; 150 | sign = (Real)-1; 151 | } 152 | else if (y < (Real)-GTE_C_HALF_PI) 153 | { 154 | y = (Real)-GTE_C_PI - y; 155 | sign = (Real)-1; 156 | } 157 | else 158 | { 159 | sign = (Real)1; 160 | } 161 | } 162 | 163 | 164 | } 165 | -------------------------------------------------------------------------------- /Test/BitCount/align.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // http://stackoverflow.com/a/12942652/126995 5 | 6 | enum class Alignment: size_t 7 | { 8 | Normal = sizeof( void* ), 9 | SSE = 16, 10 | AVX = 32, 11 | }; 12 | 13 | namespace detail 14 | { 15 | void* allocate_aligned_memory( size_t align, size_t size ) noexcept; 16 | void deallocate_aligned_memory( void* ptr ) noexcept; 17 | } 18 | 19 | template 20 | class AlignedAllocator; 21 | 22 | template 23 | class AlignedAllocator 24 | { 25 | public: 26 | typedef void* pointer; 27 | typedef const void* const_pointer; 28 | typedef void value_type; 29 | 30 | template struct rebind { typedef AlignedAllocator other; }; 31 | }; 32 | 33 | template 34 | class AlignedAllocator 35 | { 36 | public: 37 | typedef T value_type; 38 | typedef T* pointer; 39 | typedef const T* const_pointer; 40 | typedef T& reference; 41 | typedef const T& const_reference; 42 | typedef size_t size_type; 43 | typedef ptrdiff_t difference_type; 44 | 45 | typedef std::true_type propagate_on_container_move_assignment; 46 | 47 | template 48 | struct rebind { typedef AlignedAllocator other; }; 49 | 50 | public: 51 | AlignedAllocator() noexcept 52 | {} 53 | 54 | template 55 | AlignedAllocator( const AlignedAllocator& ) noexcept 56 | {} 57 | 58 | size_type max_size() const noexcept 59 | { 60 | return ( size_type( ~0 ) - size_type( Align ) ) / sizeof( T ); 61 | } 62 | 63 | pointer address( reference x ) const noexcept 64 | { 65 | return std::addressof( x ); 66 | } 67 | 68 | const_pointer address( const_reference x ) const noexcept 69 | { 70 | return std::addressof( x ); 71 | } 72 | 73 | pointer allocate( size_type n, typename AlignedAllocator::const_pointer = 0 ) 74 | { 75 | const size_type alignment = static_cast( Align ); 76 | void* ptr = detail::allocate_aligned_memory( alignment, n * sizeof( T ) ); 77 | if( ptr == nullptr ) { 78 | throw std::bad_alloc(); 79 | } 80 | 81 | return reinterpret_cast( ptr ); 82 | } 83 | 84 | void deallocate( pointer p, size_type ) noexcept 85 | { 86 | return detail::deallocate_aligned_memory( p ); 87 | } 88 | 89 | template 90 | void construct( U* p, Args&&... args ) 91 | { 92 | ::new( reinterpret_cast( p ) ) U( std::forward( args )... ); 93 | } 94 | 95 | void destroy( pointer p ) 96 | { 97 | p->~T(); 98 | } 99 | }; 100 | 101 | template 102 | class AlignedAllocator 103 | { 104 | public: 105 | typedef T value_type; 106 | typedef const T* pointer; 107 | typedef const T* const_pointer; 108 | typedef const T& reference; 109 | typedef const T& const_reference; 110 | typedef size_t size_type; 111 | typedef ptrdiff_t difference_type; 112 | 113 | typedef std::true_type propagate_on_container_move_assignment; 114 | 115 | template 116 | struct rebind { typedef AlignedAllocator other; }; 117 | 118 | public: 119 | AlignedAllocator() noexcept 120 | {} 121 | 122 | template 123 | AlignedAllocator( const AlignedAllocator& ) noexcept 124 | {} 125 | 126 | size_type max_size() const noexcept 127 | { 128 | return ( size_type( ~0 ) - size_type( Align ) ) / sizeof( T ); 129 | } 130 | 131 | const_pointer address( const_reference x ) const noexcept 132 | { 133 | return std::addressof( x ); 134 | } 135 | 136 | pointer allocate( size_type n, typename AlignedAllocator::const_pointer = 0 ) 137 | { 138 | const size_type alignment = static_cast( Align ); 139 | void* ptr = detail::allocate_aligned_memory( alignment, n * sizeof( T ) ); 140 | if( ptr == nullptr ) { 141 | throw std::bad_alloc(); 142 | } 143 | 144 | return reinterpret_cast( ptr ); 145 | } 146 | 147 | void deallocate( pointer p, size_type ) noexcept 148 | { 149 | return detail::deallocate_aligned_memory( p ); 150 | } 151 | 152 | template 153 | void construct( U* p, Args&&... args ) 154 | { 155 | ::new( reinterpret_cast( p ) ) U( std::forward( args )... ); 156 | } 157 | 158 | void destroy( pointer p ) 159 | { 160 | p->~T(); 161 | } 162 | }; 163 | 164 | template 165 | inline bool operator== ( const AlignedAllocator&, const AlignedAllocator& ) noexcept 166 | { 167 | return TAlign == UAlign; 168 | } 169 | 170 | template 171 | inline bool operator!= ( const AlignedAllocator&, const AlignedAllocator& ) noexcept 172 | { 173 | return TAlign != UAlign; 174 | } 175 | 176 | template 177 | using alignedVector = std::vector>; 178 | 179 | namespace detail 180 | { 181 | static const Alignment default_alignment = Alignment::SSE; 182 | 183 | // template T* allocate() 184 | template T* allocate( _Types&&... _Args ) 185 | { 186 | std::unique_ptr hold( static_cast( allocate_aligned_memory( default_alignment, sizeof( T ) ) ), deallocate_aligned_memory ); 187 | ::new ( hold.get() ) T( _STD forward<_Types>( _Args )... ); 188 | return static_cast( hold.release() ); 189 | } 190 | 191 | template 192 | void deallocate( void* p ) 193 | { 194 | static_cast( p )->~T(); 195 | deallocate_aligned_memory( p ); 196 | } 197 | } -------------------------------------------------------------------------------- /ParseResults/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.IO; 6 | using System.Threading; 7 | using System.Globalization; 8 | 9 | namespace ParseResults 10 | { 11 | using kvp = KeyValuePair; 12 | using dictI2D = Dictionary; 13 | 14 | static class Program 15 | { 16 | const string strFolder = @"C:\Z\Fun\LookupTables\RawResults"; 17 | const string strOutput = @"C:\Z\Fun\LookupTables\RawResults\Parsed.tsv"; 18 | const string strOutputRel = @"C:\Z\Fun\LookupTables\RawResults\Relative.tsv"; 19 | 20 | ///

Test ID

21 | enum eTest : byte 22 | { 23 | SinCos = 1, 24 | BitCount = 2, 25 | } 26 | 27 | ///

Pack test + algorithm into an int

28 | static int key( eTest et, int algo ) 29 | { 30 | return ( (int)et << 8 ) | algo; 31 | } 32 | 33 | ///

Unpack test from integer key

34 | static eTest test( this int k ) 35 | { 36 | return (eTest)( k >> 8 ); 37 | } 38 | 39 | ///

Unpack algorithm from integer key

40 | static int algo( this int k ) 41 | { 42 | return k & 255; 43 | } 44 | 45 | ///

Algorithms names for SinCos test

46 | static readonly Dictionary s_algoSinCos = new Dictionary() 47 | { 48 | { 1, "StdLib" }, 49 | { 2, "Lookup" }, 50 | { 3, "GTE" }, 51 | { 4, "DX" }, 52 | { 5, "DX_Est" }, 53 | }; 54 | 55 | ///

Algorithms names for BitCount test

56 | static readonly Dictionary s_algoCnt = new Dictionary() 57 | { 58 | { 1, "Lookup" }, 59 | { 2, "SSE2" }, 60 | { 3, "SSSE3" }, 61 | { 4, "XOP" }, 62 | { 5, "POPCNT" }, 63 | }; 64 | 65 | ///

Read all lines from the file

66 | static IEnumerable getLines( string fileName ) 67 | { 68 | using( StreamReader sr = new StreamReader( fileName, Encoding.ASCII ) ) 69 | { 70 | while( !sr.EndOfStream ) 71 | yield return sr.ReadLine(); 72 | } 73 | } 74 | 75 | ///

Case-insensitive string compare

76 | static bool isEqual( this string s, string s2 ) 77 | { 78 | return s.Equals( s2, StringComparison.OrdinalIgnoreCase ); 79 | } 80 | 81 | ///

Parse lines in the result file

82 | static IEnumerable parse( string fileName ) 83 | { 84 | eTest? test = null; 85 | foreach( string line in getLines( fileName ) ) 86 | { 87 | if( line.isEqual( "SinCos" ) ) 88 | { 89 | test = eTest.SinCos; 90 | continue; 91 | } 92 | if( line.isEqual( "BitCount" ) ) 93 | { 94 | test = eTest.BitCount; 95 | continue; 96 | } 97 | if( !test.HasValue ) 98 | continue; 99 | string[] fields = line.Split( '\t' ); 100 | if( fields.Length < 3 ) 101 | continue; 102 | int algo; 103 | if( !int.TryParse( fields[ 0 ], out algo ) ) 104 | continue; 105 | double measure; 106 | if( !double.TryParse( fields[ 2 ], out measure ) ) 107 | continue; 108 | yield return new kvp( key( test.Value, algo ), measure ); 109 | } 110 | } 111 | 112 | ///

The log file contains 3 tests per algorithm, this function groups them and calculates the average.

113 | static IEnumerable average( this IEnumerable lines ) 114 | { 115 | Func, kvp> aggregate = ( IEnumerable group ) => 116 | { 117 | int c = 0; 118 | double sum = 0; 119 | foreach( var t in group ) 120 | { 121 | c++; 122 | sum += t.Value; 123 | } 124 | var f = group.First(); 125 | return new kvp( f.Key, sum / c ); 126 | }; 127 | 128 | return lines.GroupBy( t => t.Key ).Select( aggregate ); 129 | } 130 | 131 | ///

Read all files in the directory, parse, calculate tests average

132 | static Dictionary readFiles( string dir ) 133 | { 134 | string[] files = Directory.GetFiles( strFolder, "*.txt" ); 135 | Dictionary res = new Dictionary( files.Length ); 136 | 137 | foreach( string f in files ) 138 | { 139 | string key = Path.GetFileNameWithoutExtension( f ); 140 | 141 | Dictionary val = parse( f ).average().ToDictionary( k => k.Key, k => k.Value ); 142 | res[ key ] = val; 143 | } 144 | return res; 145 | } 146 | 147 | ///

Get lines in this dataset.

148 | static int[] tableLines( this Dictionary data ) 149 | { 150 | HashSet hs = new HashSet(); 151 | foreach( var c in data.Values ) 152 | hs.UnionWith( c.Keys ); 153 | return hs.OrderBy( k => k ).ToArray(); 154 | } 155 | 156 | ///

157 | static void printFields( this TextWriter tw, IEnumerable fields ) 158 | { 159 | tw.WriteLine( String.Join( "\t", fields ) ); 160 | } 161 | 162 | ///

163 | static void printTable( TextWriter tw, string title, Dictionary data, IEnumerable rows, string[] columns, Dictionary algoNames ) 164 | { 165 | tw.WriteLine( title ); 166 | 167 | List fields = new List(); 168 | fields.Add( "" ); 169 | fields.AddRange( columns ); 170 | tw.printFields( fields ); 171 | 172 | foreach( int r in rows ) 173 | { 174 | fields.Clear(); 175 | fields.Add( algoNames[ r.algo() ] ); 176 | foreach( string c in columns ) 177 | { 178 | dictI2D dataColumn = data[ c ]; 179 | double val; 180 | if( dataColumn.TryGetValue( r, out val ) ) 181 | fields.Add( val.ToString() ); 182 | else 183 | fields.Add( string.Empty ); 184 | } 185 | tw.printFields( fields ); 186 | } 187 | } 188 | 189 | ///

190 | static void print( Dictionary data, string dest ) 191 | { 192 | int[] lines = data.tableLines(); 193 | string[] columns = data.Keys.OrderBy( k => k ).ToArray(); 194 | 195 | using( StreamWriter sw = new StreamWriter( dest ) ) 196 | { 197 | printTable( sw, "SinCos", data, lines.Where( l => l.test() == eTest.SinCos ), columns, s_algoSinCos ); 198 | sw.WriteLine(); 199 | printTable( sw, "BitCount", data, lines.Where( l => l.test() == eTest.BitCount ), columns, s_algoCnt ); 200 | } 201 | } 202 | 203 | static void calcRel( dictI2D column, eTest test, int algo ) 204 | { 205 | int[] keys = column.Keys.ToArray(); 206 | double rel = column[ key( test, algo ) ]; 207 | foreach( int k in keys ) 208 | { 209 | if( k.test() != test ) 210 | continue; 211 | column[ k ] = 100.0 * column[ k ] / rel; 212 | } 213 | } 214 | 215 | ///

216 | static void Main( string[] args ) 217 | { 218 | // Reset culture to en-us, to match the output of C++ code 219 | CultureInfo english = new CultureInfo( "en-US" ); 220 | CultureInfo.DefaultThreadCurrentCulture = english; 221 | CultureInfo.DefaultThreadCurrentUICulture = english; 222 | Thread.CurrentThread.CurrentCulture = english; 223 | Thread.CurrentThread.CurrentUICulture = english; 224 | 225 | // Read, parse, average 226 | Dictionary data = readFiles( strFolder ); 227 | 228 | // Print raw results 229 | print( data, strOutput ); 230 | 231 | // Calculate relative values 232 | foreach( var d in data.Values ) 233 | { 234 | calcRel( d, eTest.BitCount, 1 ); 235 | calcRel( d, eTest.SinCos, 2 ); 236 | } 237 | 238 | // Print relative results 239 | print( data, strOutputRel ); 240 | } 241 | } 242 | } -------------------------------------------------------------------------------- /SinCosPolyPrecision/SinCosPolyPrecision.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58} 23 | Win32Proj 24 | SinCosPolyPrecision 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | Application 36 | false 37 | v140 38 | true 39 | Unicode 40 | 41 | 42 | Application 43 | true 44 | v140 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | true 74 | $(Platform)\$(Configuration)\ 75 | $(Platform)\$(Configuration)\ 76 | 77 | 78 | true 79 | $(Platform)\$(Configuration)\ 80 | 81 | 82 | false 83 | $(Platform)\$(Configuration)\ 84 | $(Platform)\$(Configuration)\ 85 | 86 | 87 | false 88 | $(Platform)\$(Configuration)\ 89 | 90 | 91 | 92 | Use 93 | Level3 94 | Disabled 95 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 96 | true 97 | 98 | 99 | Console 100 | true 101 | 102 | 103 | 104 | 105 | Use 106 | Level3 107 | Disabled 108 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 109 | true 110 | 111 | 112 | Console 113 | true 114 | 115 | 116 | 117 | 118 | Level3 119 | Use 120 | MaxSpeed 121 | true 122 | true 123 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 124 | true 125 | 126 | 127 | Console 128 | true 129 | true 130 | true 131 | 132 | 133 | 134 | 135 | Level3 136 | Use 137 | MaxSpeed 138 | true 139 | true 140 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 141 | true 142 | 143 | 144 | Console 145 | true 146 | true 147 | true 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | Create 158 | Create 159 | Create 160 | Create 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /Test/LookupTables.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC} 23 | Win32Proj 24 | LookupTables 25 | 8.1 26 | 27 | 28 | 29 | Application 30 | true 31 | v140 32 | Unicode 33 | 34 | 35 | Application 36 | false 37 | v140 38 | true 39 | Unicode 40 | 41 | 42 | Application 43 | true 44 | v140 45 | Unicode 46 | 47 | 48 | Application 49 | false 50 | v140 51 | true 52 | Unicode 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | true 74 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath) 75 | $(Platform)\$(Configuration)\ 76 | $(Platform)\$(Configuration)\ 77 | 78 | 79 | true 80 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath) 81 | $(Platform)\$(Configuration)\ 82 | 83 | 84 | false 85 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath) 86 | $(Platform)\$(Configuration)\ 87 | $(Platform)\$(Configuration)\ 88 | 89 | 90 | false 91 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath) 92 | $(Platform)\$(Configuration)\ 93 | 94 | 95 | 96 | Use 97 | Level3 98 | Disabled 99 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 100 | true 101 | 102 | 103 | Console 104 | true 105 | 106 | 107 | 108 | 109 | Use 110 | Level3 111 | Disabled 112 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 113 | true 114 | 115 | 116 | Console 117 | true 118 | 119 | 120 | 121 | 122 | Level3 123 | Use 124 | MaxSpeed 125 | true 126 | true 127 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 128 | true 129 | MultiThreaded 130 | Speed 131 | StreamingSIMDExtensions2 132 | 133 | 134 | Console 135 | true 136 | true 137 | true 138 | 139 | 140 | 141 | 142 | Level3 143 | Use 144 | MaxSpeed 145 | true 146 | true 147 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 148 | true 149 | MultiThreaded 150 | Speed 151 | StreamingSIMDExtensions2 152 | 153 | 154 | Console 155 | true 156 | true 157 | true 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | Create 182 | Create 183 | Create 184 | Create 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /Test/SinCos/Mathematics/GteConstants.h: -------------------------------------------------------------------------------- 1 | // David Eberly, Geometric Tools, Redmond WA 98052 2 | // Copyright (c) 1998-2016 3 | // Distributed under the Boost Software License, Version 1.0. 4 | // http://www.boost.org/LICENSE_1_0.txt 5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt 6 | // File Version: 3.0.0 (2016/06/19) 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | // This file is for sharing of constants among the CPU, SSE2, and GPU. The 13 | // hard-coded numbers lead to correctly rounded approximations of the 14 | // constants when using 'float' or 'double'. 15 | 16 | // Constants involving pi. 17 | #define GTE_C_PI 3.1415926535897931 18 | #define GTE_C_HALF_PI 1.5707963267948966 19 | #define GTE_C_QUARTER_PI 0.7853981633974483 20 | #define GTE_C_TWO_PI 6.2831853071795862 21 | #define GTE_C_INV_PI 0.3183098861837907 22 | #define GTE_C_INV_TWO_PI 0.1591549430918953 23 | #define GTE_C_INV_HALF_PI 0.6366197723675813 24 | 25 | // Conversions between degrees and radians. 26 | #define GTE_C_DEG_TO_RAD 0.0174532925199433 27 | #define GTE_C_RAD_TO_DEG 57.295779513082321 28 | 29 | // Common constants. 30 | #define GTE_C_SQRT_2 1.4142135623730951 31 | #define GTE_C_INV_SQRT_2 0.7071067811865475 32 | #define GTE_C_LN_2 0.6931471805599453 33 | #define GTE_C_INV_LN_2 1.4426950408889634 34 | #define GTE_C_LN_10 2.3025850929940459 35 | #define GTE_C_INV_LN_10 0.43429448190325176 36 | 37 | // Constants for minimax polynomial approximations to sqrt(x). 38 | // The algorithm minimizes the maximum absolute error on [1,2]. 39 | #define GTE_C_SQRT_DEG1_C0 +1.0 40 | #define GTE_C_SQRT_DEG1_C1 +4.1421356237309505e-01 41 | #define GTE_C_SQRT_DEG1_MAX_ERROR 1.7766952966368793e-2 42 | 43 | #define GTE_C_SQRT_DEG2_C0 +1.0 44 | #define GTE_C_SQRT_DEG2_C1 +4.8563183076125260e-01 45 | #define GTE_C_SQRT_DEG2_C2 -7.1418268388157458e-02 46 | #define GTE_C_SQRT_DEG2_MAX_ERROR 1.1795695163108744e-3 47 | 48 | #define GTE_C_SQRT_DEG3_C0 +1.0 49 | #define GTE_C_SQRT_DEG3_C1 +4.9750045320242231e-01 50 | #define GTE_C_SQRT_DEG3_C2 -1.0787308044477850e-01 51 | #define GTE_C_SQRT_DEG3_C3 +2.4586189615451115e-02 52 | #define GTE_C_SQRT_DEG3_MAX_ERROR 1.1309620116468910e-4 53 | 54 | #define GTE_C_SQRT_DEG4_C0 +1.0 55 | #define GTE_C_SQRT_DEG4_C1 +4.9955939832918816e-01 56 | #define GTE_C_SQRT_DEG4_C2 -1.2024066151943025e-01 57 | #define GTE_C_SQRT_DEG4_C3 +4.5461507257698486e-02 58 | #define GTE_C_SQRT_DEG4_C4 -1.0566681694362146e-02 59 | #define GTE_C_SQRT_DEG4_MAX_ERROR 1.2741170151556180e-5 60 | 61 | #define GTE_C_SQRT_DEG5_C0 +1.0 62 | #define GTE_C_SQRT_DEG5_C1 +4.9992197660031912e-01 63 | #define GTE_C_SQRT_DEG5_C2 -1.2378506719245053e-01 64 | #define GTE_C_SQRT_DEG5_C3 +5.6122776972699739e-02 65 | #define GTE_C_SQRT_DEG5_C4 -2.3128836281145482e-02 66 | #define GTE_C_SQRT_DEG5_C5 +5.0827122737047148e-03 67 | #define GTE_C_SQRT_DEG5_MAX_ERROR 1.5725568940708201e-6 68 | 69 | #define GTE_C_SQRT_DEG6_C0 +1.0 70 | #define GTE_C_SQRT_DEG6_C1 +4.9998616695784914e-01 71 | #define GTE_C_SQRT_DEG6_C2 -1.2470733323278438e-01 72 | #define GTE_C_SQRT_DEG6_C3 +6.0388587356982271e-02 73 | #define GTE_C_SQRT_DEG6_C4 -3.1692053551807930e-02 74 | #define GTE_C_SQRT_DEG6_C5 +1.2856590305148075e-02 75 | #define GTE_C_SQRT_DEG6_C6 -2.6183954624343642e-03 76 | #define GTE_C_SQRT_DEG6_MAX_ERROR 2.0584155535630089e-7 77 | 78 | #define GTE_C_SQRT_DEG7_C0 +1.0 79 | #define GTE_C_SQRT_DEG7_C1 +4.9999754817809228e-01 80 | #define GTE_C_SQRT_DEG7_C2 -1.2493243476353655e-01 81 | #define GTE_C_SQRT_DEG7_C3 +6.1859954146370910e-02 82 | #define GTE_C_SQRT_DEG7_C4 -3.6091595023208356e-02 83 | #define GTE_C_SQRT_DEG7_C5 +1.9483946523450868e-02 84 | #define GTE_C_SQRT_DEG7_C6 -7.5166134568007692e-03 85 | #define GTE_C_SQRT_DEG7_C7 +1.4127567687864939e-03 86 | #define GTE_C_SQRT_DEG7_MAX_ERROR 2.8072302919734948e-8 87 | 88 | #define GTE_C_SQRT_DEG8_C0 +1.0 89 | #define GTE_C_SQRT_DEG8_C1 +4.9999956583056759e-01 90 | #define GTE_C_SQRT_DEG8_C2 -1.2498490369914350e-01 91 | #define GTE_C_SQRT_DEG8_C3 +6.2318494667579216e-02 92 | #define GTE_C_SQRT_DEG8_C4 -3.7982961896432244e-02 93 | #define GTE_C_SQRT_DEG8_C5 +2.3642612312869460e-02 94 | #define GTE_C_SQRT_DEG8_C6 -1.2529377587270574e-02 95 | #define GTE_C_SQRT_DEG8_C7 +4.5382426960713929e-03 96 | #define GTE_C_SQRT_DEG8_C8 -7.8810995273670414e-04 97 | #define GTE_C_SQRT_DEG8_MAX_ERROR 3.9460605685825989e-9 98 | 99 | // Constants for minimax polynomial approximations to 1/sqrt(x). 100 | // The algorithm minimizes the maximum absolute error on [1,2]. 101 | #define GTE_C_INVSQRT_DEG1_C0 +1.0 102 | #define GTE_C_INVSQRT_DEG1_C1 -2.9289321881345254e-01 103 | #define GTE_C_INVSQRT_DEG1_MAX_ERROR 3.7814314552701983e-2 104 | 105 | #define GTE_C_INVSQRT_DEG2_C0 +1.0 106 | #define GTE_C_INVSQRT_DEG2_C1 -4.4539812104566801e-01 107 | #define GTE_C_INVSQRT_DEG2_C2 +1.5250490223221547e-01 108 | #define GTE_C_INVSQRT_DEG2_MAX_ERROR 4.1953446330581234e-3 109 | 110 | #define GTE_C_INVSQRT_DEG3_C0 +1.0 111 | #define GTE_C_INVSQRT_DEG3_C1 -4.8703230993068791e-01 112 | #define GTE_C_INVSQRT_DEG3_C2 +2.8163710486669835e-01 113 | #define GTE_C_INVSQRT_DEG3_C3 -8.7498013749463421e-02 114 | #define GTE_C_INVSQRT_DEG3_MAX_ERROR 5.6307702007266786e-4 115 | 116 | #define GTE_C_INVSQRT_DEG4_C0 +1.0 117 | #define GTE_C_INVSQRT_DEG4_C1 -4.9710061558048779e-01 118 | #define GTE_C_INVSQRT_DEG4_C2 +3.4266247597676802e-01 119 | #define GTE_C_INVSQRT_DEG4_C3 -1.9106356536293490e-01 120 | #define GTE_C_INVSQRT_DEG4_C4 +5.2608486153198797e-02 121 | #define GTE_C_INVSQRT_DEG4_MAX_ERROR 8.1513919987605266e-5 122 | 123 | #define GTE_C_INVSQRT_DEG5_C0 +1.0 124 | #define GTE_C_INVSQRT_DEG5_C1 -4.9937760586004143e-01 125 | #define GTE_C_INVSQRT_DEG5_C2 +3.6508741295133973e-01 126 | #define GTE_C_INVSQRT_DEG5_C3 -2.5884890281853501e-01 127 | #define GTE_C_INVSQRT_DEG5_C4 +1.3275782221320753e-01 128 | #define GTE_C_INVSQRT_DEG5_C5 -3.2511945299404488e-02 129 | #define GTE_C_INVSQRT_DEG5_MAX_ERROR 1.2289367475583346e-5 130 | 131 | #define GTE_C_INVSQRT_DEG6_C0 +1.0 132 | #define GTE_C_INVSQRT_DEG6_C1 -4.9987029229547453e-01 133 | #define GTE_C_INVSQRT_DEG6_C2 +3.7220923604495226e-01 134 | #define GTE_C_INVSQRT_DEG6_C3 -2.9193067713256937e-01 135 | #define GTE_C_INVSQRT_DEG6_C4 +1.9937605991094642e-01 136 | #define GTE_C_INVSQRT_DEG6_C5 -9.3135712130901993e-02 137 | #define GTE_C_INVSQRT_DEG6_C6 +2.0458166789566690e-02 138 | #define GTE_C_INVSQRT_DEG6_MAX_ERROR 1.9001451223750465e-6 139 | 140 | #define GTE_C_INVSQRT_DEG7_C0 +1.0 141 | #define GTE_C_INVSQRT_DEG7_C1 -4.9997357250704977e-01 142 | #define GTE_C_INVSQRT_DEG7_C2 +3.7426216884998809e-01 143 | #define GTE_C_INVSQRT_DEG7_C3 -3.0539882498248971e-01 144 | #define GTE_C_INVSQRT_DEG7_C4 +2.3976005607005391e-01 145 | #define GTE_C_INVSQRT_DEG7_C5 -1.5410326351684489e-01 146 | #define GTE_C_INVSQRT_DEG7_C6 +6.5598809723041995e-02 147 | #define GTE_C_INVSQRT_DEG7_C7 -1.3038592450470787e-02 148 | #define GTE_C_INVSQRT_DEG7_MAX_ERROR 2.9887724993168940e-7 149 | 150 | #define GTE_C_INVSQRT_DEG8_C0 +1.0 151 | #define GTE_C_INVSQRT_DEG8_C1 -4.9999471066120371e-01 152 | #define GTE_C_INVSQRT_DEG8_C2 +3.7481415745794067e-01 153 | #define GTE_C_INVSQRT_DEG8_C3 -3.1023804387422160e-01 154 | #define GTE_C_INVSQRT_DEG8_C4 +2.5977002682930106e-01 155 | #define GTE_C_INVSQRT_DEG8_C5 -1.9818790717727097e-01 156 | #define GTE_C_INVSQRT_DEG8_C6 +1.1882414252613671e-01 157 | #define GTE_C_INVSQRT_DEG8_C7 -4.6270038088550791e-02 158 | #define GTE_C_INVSQRT_DEG8_C8 +8.3891541755747312e-03 159 | #define GTE_C_INVSQRT_DEG8_MAX_ERROR 4.7596926146947771e-8 160 | 161 | // Constants for minimax polynomial approximations to sin(x). 162 | // The algorithm minimizes the maximum absolute error on [-pi/2,pi/2]. 163 | #define GTE_C_SIN_DEG3_C0 +1.0 164 | #define GTE_C_SIN_DEG3_C1 -1.4727245910375519e-01 165 | #define GTE_C_SIN_DEG3_MAX_ERROR 1.3481903639145865e-2 166 | 167 | #define GTE_C_SIN_DEG5_C0 +1.0 168 | #define GTE_C_SIN_DEG5_C1 -1.6600599923812209e-01 169 | #define GTE_C_SIN_DEG5_C2 +7.5924178409012000e-03 170 | #define GTE_C_SIN_DEG5_MAX_ERROR 1.4001209384639779e-4 171 | 172 | #define GTE_C_SIN_DEG7_C0 +1.0 173 | #define GTE_C_SIN_DEG7_C1 -1.6665578084732124e-01 174 | #define GTE_C_SIN_DEG7_C2 +8.3109378830028557e-03 175 | #define GTE_C_SIN_DEG7_C3 -1.8447486103462252e-04 176 | #define GTE_C_SIN_DEG7_MAX_ERROR 1.0205878936686563e-6 177 | 178 | #define GTE_C_SIN_DEG9_C0 +1.0 179 | #define GTE_C_SIN_DEG9_C1 -1.6666656235308897e-01 180 | #define GTE_C_SIN_DEG9_C2 +8.3329962509886002e-03 181 | #define GTE_C_SIN_DEG9_C3 -1.9805100675274190e-04 182 | #define GTE_C_SIN_DEG9_C4 +2.5967200279475300e-06 183 | #define GTE_C_SIN_DEG9_MAX_ERROR 5.2010746265374053e-9 184 | 185 | #define GTE_C_SIN_DEG11_C0 +1.0 186 | #define GTE_C_SIN_DEG11_C1 -1.6666666601721269e-01 187 | #define GTE_C_SIN_DEG11_C2 +8.3333303183525942e-03 188 | #define GTE_C_SIN_DEG11_C3 -1.9840782426250314e-04 189 | #define GTE_C_SIN_DEG11_C4 +2.7521557770526783e-06 190 | #define GTE_C_SIN_DEG11_C5 -2.3828544692960918e-08 191 | #define GTE_C_SIN_DEG11_MAX_ERROR 1.9295870457014530e-11 192 | 193 | // Constants for minimax polynomial approximations to cos(x). 194 | // The algorithm minimizes the maximum absolute error on [-pi/2,pi/2]. 195 | #define GTE_C_COS_DEG2_C0 +1.0 196 | #define GTE_C_COS_DEG2_C1 -4.0528473456935105e-01 197 | #define GTE_C_COS_DEG2_MAX_ERROR 5.4870946878404048e-2 198 | 199 | #define GTE_C_COS_DEG4_C0 +1.0 200 | #define GTE_C_COS_DEG4_C1 -4.9607181958647262e-01 201 | #define GTE_C_COS_DEG4_C2 +3.6794619653489236e-02 202 | #define GTE_C_COS_DEG4_MAX_ERROR 9.1879932449712154e-4 203 | 204 | #define GTE_C_COS_DEG6_C0 +1.0 205 | #define GTE_C_COS_DEG6_C1 -4.9992746217057404e-01 206 | #define GTE_C_COS_DEG6_C2 +4.1493920348353308e-02 207 | #define GTE_C_COS_DEG6_C3 -1.2712435011987822e-03 208 | #define GTE_C_COS_DEG6_MAX_ERROR 9.2028470133065365e-6 209 | 210 | #define GTE_C_COS_DEG8_C0 +1.0 211 | #define GTE_C_COS_DEG8_C1 -4.9999925121358291e-01 212 | #define GTE_C_COS_DEG8_C2 +4.1663780117805693e-02 213 | #define GTE_C_COS_DEG8_C3 -1.3854239405310942e-03 214 | #define GTE_C_COS_DEG8_C4 +2.3154171575501259e-05 215 | #define GTE_C_COS_DEG8_MAX_ERROR 5.9804533020235695e-8 216 | 217 | #define GTE_C_COS_DEG10_C0 +1.0 218 | #define GTE_C_COS_DEG10_C1 -4.9999999508695869e-01 219 | #define GTE_C_COS_DEG10_C2 +4.1666638865338612e-02 220 | #define GTE_C_COS_DEG10_C3 -1.3888377661039897e-03 221 | #define GTE_C_COS_DEG10_C4 +2.4760495088926859e-05 222 | #define GTE_C_COS_DEG10_C5 -2.6051615464872668e-07 223 | #define GTE_C_COS_DEG10_MAX_ERROR 2.7006769043325107e-10 224 | 225 | // Constants for minimax polynomial approximations to tan(x). 226 | // The algorithm minimizes the maximum absolute error on [-pi/4,pi/4]. 227 | #define GTE_C_TAN_DEG3_C0 1.0 228 | #define GTE_C_TAN_DEG3_C1 4.4295926544736286e-01 229 | #define GTE_C_TAN_DEG3_MAX_ERROR 1.1661892256204731e-2 230 | 231 | #define GTE_C_TAN_DEG5_C0 1.0 232 | #define GTE_C_TAN_DEG5_C1 3.1401320403542421e-01 233 | #define GTE_C_TAN_DEG5_C2 2.0903948109240345e-01 234 | #define GTE_C_TAN_DEG5_MAX_ERROR 5.8431854390143118e-4 235 | 236 | #define GTE_C_TAN_DEG7_C0 1.0 237 | #define GTE_C_TAN_DEG7_C1 3.3607213284422555e-01 238 | #define GTE_C_TAN_DEG7_C2 1.1261037305184907e-01 239 | #define GTE_C_TAN_DEG7_C3 9.8352099470524479e-02 240 | #define GTE_C_TAN_DEG7_MAX_ERROR 3.5418688397723108e-5 241 | 242 | #define GTE_C_TAN_DEG9_C0 1.0 243 | #define GTE_C_TAN_DEG9_C1 3.3299232843941784e-01 244 | #define GTE_C_TAN_DEG9_C2 1.3747843432474838e-01 245 | #define GTE_C_TAN_DEG9_C3 3.7696344813028304e-02 246 | #define GTE_C_TAN_DEG9_C4 4.6097377279281204e-02 247 | #define GTE_C_TAN_DEG9_MAX_ERROR 2.2988173242199927e-6 248 | 249 | #define GTE_C_TAN_DEG11_C0 1.0 250 | #define GTE_C_TAN_DEG11_C1 3.3337224456224224e-01 251 | #define GTE_C_TAN_DEG11_C2 1.3264516053824593e-01 252 | #define GTE_C_TAN_DEG11_C3 5.8145237645931047e-02 253 | #define GTE_C_TAN_DEG11_C4 1.0732193237572574e-02 254 | #define GTE_C_TAN_DEG11_C5 2.1558456793513869e-02 255 | #define GTE_C_TAN_DEG11_MAX_ERROR 1.5426257940140409e-7 256 | 257 | #define GTE_C_TAN_DEG13_C0 1.0 258 | #define GTE_C_TAN_DEG13_C1 3.3332916426394554e-01 259 | #define GTE_C_TAN_DEG13_C2 1.3343404625112498e-01 260 | #define GTE_C_TAN_DEG13_C3 5.3104565343119248e-02 261 | #define GTE_C_TAN_DEG13_C4 2.5355038312682154e-02 262 | #define GTE_C_TAN_DEG13_C5 1.8253255966556026e-03 263 | #define GTE_C_TAN_DEG13_C6 1.0069407176615641e-02 264 | #define GTE_C_TAN_DEG13_MAX_ERROR 1.0550264249037378e-8 265 | 266 | // Constants for minimax polynomial approximations to acos(x), where the 267 | // approximation is of the form acos(x) = sqrt(1 - x)*p(x) with p(x) a 268 | // polynomial. The algorithm minimizes the maximum error 269 | // |acos(x)/sqrt(1-x) - p(x)| on [0,1]. At the same time we get an 270 | // approximation for asin(x) = pi/2 - acos(x). 271 | #define GTE_C_ACOS_DEG1_C0 +1.5707963267948966 272 | #define GTE_C_ACOS_DEG1_C1 -1.5658276442180141e-01 273 | #define GTE_C_ACOS_DEG1_MAX_ERROR 1.1659002803738105e-2 274 | 275 | #define GTE_C_ACOS_DEG2_C0 +1.5707963267948966 276 | #define GTE_C_ACOS_DEG2_C1 -2.0347053865798365e-01 277 | #define GTE_C_ACOS_DEG2_C2 +4.6887774236182234e-02 278 | #define GTE_C_ACOS_DEG2_MAX_ERROR 9.0311602490029258e-4 279 | 280 | #define GTE_C_ACOS_DEG3_C0 +1.5707963267948966 281 | #define GTE_C_ACOS_DEG3_C1 -2.1253291899190285e-01 282 | #define GTE_C_ACOS_DEG3_C2 +7.4773789639484223e-02 283 | #define GTE_C_ACOS_DEG3_C3 -1.8823635069382449e-02 284 | #define GTE_C_ACOS_DEG3_MAX_ERROR 9.3066396954288172e-5 285 | 286 | #define GTE_C_ACOS_DEG4_C0 +1.5707963267948966 287 | #define GTE_C_ACOS_DEG4_C1 -2.1422258835275865e-01 288 | #define GTE_C_ACOS_DEG4_C2 +8.4936675142844198e-02 289 | #define GTE_C_ACOS_DEG4_C3 -3.5991475120957794e-02 290 | #define GTE_C_ACOS_DEG4_C4 +8.6946239090712751e-03 291 | #define GTE_C_ACOS_DEG4_MAX_ERROR 1.0930595804481413e-5 292 | 293 | #define GTE_C_ACOS_DEG5_C0 +1.5707963267948966 294 | #define GTE_C_ACOS_DEG5_C1 -2.1453292139805524e-01 295 | #define GTE_C_ACOS_DEG5_C2 +8.7973089282889383e-02 296 | #define GTE_C_ACOS_DEG5_C3 -4.5130266382166440e-02 297 | #define GTE_C_ACOS_DEG5_C4 +1.9467466687281387e-02 298 | #define GTE_C_ACOS_DEG5_C5 -4.3601326117634898e-03 299 | #define GTE_C_ACOS_DEG5_MAX_ERROR 1.3861070257241426-6 300 | 301 | #define GTE_C_ACOS_DEG6_C0 +1.5707963267948966 302 | #define GTE_C_ACOS_DEG6_C1 -2.1458939285677325e-01 303 | #define GTE_C_ACOS_DEG6_C2 +8.8784960563641491e-02 304 | #define GTE_C_ACOS_DEG6_C3 -4.8887131453156485e-02 305 | #define GTE_C_ACOS_DEG6_C4 +2.7011519960012720e-02 306 | #define GTE_C_ACOS_DEG6_C5 -1.1210537323478320e-02 307 | #define GTE_C_ACOS_DEG6_C6 +2.3078166879102469e-03 308 | #define GTE_C_ACOS_DEG6_MAX_ERROR 1.8491291330427484e-7 309 | 310 | #define GTE_C_ACOS_DEG7_C0 +1.5707963267948966 311 | #define GTE_C_ACOS_DEG7_C1 -2.1459960076929829e-01 312 | #define GTE_C_ACOS_DEG7_C2 +8.8986946573346160e-02 313 | #define GTE_C_ACOS_DEG7_C3 -5.0207843052845647e-02 314 | #define GTE_C_ACOS_DEG7_C4 +3.0961594977611639e-02 315 | #define GTE_C_ACOS_DEG7_C5 -1.7162031184398074e-02 316 | #define GTE_C_ACOS_DEG7_C6 +6.7072304676685235e-03 317 | #define GTE_C_ACOS_DEG7_C7 -1.2690614339589956e-03 318 | #define GTE_C_ACOS_DEG7_MAX_ERROR 2.5574620927948377e-8 319 | 320 | #define GTE_C_ACOS_DEG8_C0 +1.5707963267948966 321 | #define GTE_C_ACOS_DEG8_C1 -2.1460143648688035e-01 322 | #define GTE_C_ACOS_DEG8_C2 +8.9034700107934128e-02 323 | #define GTE_C_ACOS_DEG8_C3 -5.0625279962389413e-02 324 | #define GTE_C_ACOS_DEG8_C4 +3.2683762943179318e-02 325 | #define GTE_C_ACOS_DEG8_C5 -2.0949278766238422e-02 326 | #define GTE_C_ACOS_DEG8_C6 +1.1272900916992512e-02 327 | #define GTE_C_ACOS_DEG8_C7 -4.1160981058965262e-03 328 | #define GTE_C_ACOS_DEG8_C8 +7.1796493341480527e-04 329 | #define GTE_C_ACOS_DEG8_MAX_ERROR 3.6340015129032732e-9 330 | 331 | // Constants for minimax polynomial approximations to atan(x). 332 | // The algorithm minimizes the maximum absolute error on [-1,1]. 333 | #define GTE_C_ATAN_DEG3_C0 +1.0 334 | #define GTE_C_ATAN_DEG3_C1 -2.1460183660255172e-01 335 | #define GTE_C_ATAN_DEG3_MAX_ERROR 1.5970326392614240e-2 336 | 337 | #define GTE_C_ATAN_DEG5_C0 +1.0 338 | #define GTE_C_ATAN_DEG5_C1 -3.0189478312144946e-01 339 | #define GTE_C_ATAN_DEG5_C2 +8.7292946518897740e-02 340 | #define GTE_C_ATAN_DEG5_MAX_ERROR 1.3509832247372636e-3 341 | 342 | #define GTE_C_ATAN_DEG7_C0 +1.0 343 | #define GTE_C_ATAN_DEG7_C1 -3.2570157599356531e-01 344 | #define GTE_C_ATAN_DEG7_C2 +1.5342994884206673e-01 345 | #define GTE_C_ATAN_DEG7_C3 -4.2330209451053591e-02 346 | #define GTE_C_ATAN_DEG7_MAX_ERROR 1.5051227215514412e-4 347 | 348 | #define GTE_C_ATAN_DEG9_C0 +1.0 349 | #define GTE_C_ATAN_DEG9_C1 -3.3157878236439586e-01 350 | #define GTE_C_ATAN_DEG9_C2 +1.8383034738018011e-01 351 | #define GTE_C_ATAN_DEG9_C3 -8.9253037587244677e-02 352 | #define GTE_C_ATAN_DEG9_C4 +2.2399635968909593e-02 353 | #define GTE_C_ATAN_DEG9_MAX_ERROR 1.8921598624582064e-5 354 | 355 | #define GTE_C_ATAN_DEG11_C0 +1.0 356 | #define GTE_C_ATAN_DEG11_C1 -3.3294527685374087e-01 357 | #define GTE_C_ATAN_DEG11_C2 +1.9498657165383548e-01 358 | #define GTE_C_ATAN_DEG11_C3 -1.1921576270475498e-01 359 | #define GTE_C_ATAN_DEG11_C4 +5.5063351366968050e-02 360 | #define GTE_C_ATAN_DEG11_C5 -1.2490720064867844e-02 361 | #define GTE_C_ATAN_DEG11_MAX_ERROR 2.5477724974187765e-6 362 | 363 | #define GTE_C_ATAN_DEG13_C0 +1.0 364 | #define GTE_C_ATAN_DEG13_C1 -3.3324998579202170e-01 365 | #define GTE_C_ATAN_DEG13_C2 +1.9856563505717162e-01 366 | #define GTE_C_ATAN_DEG13_C3 -1.3374657325451267e-01 367 | #define GTE_C_ATAN_DEG13_C4 +8.1675882859940430e-02 368 | #define GTE_C_ATAN_DEG13_C5 -3.5059680836411644e-02 369 | #define GTE_C_ATAN_DEG13_C6 +7.2128853633444123e-03 370 | #define GTE_C_ATAN_DEG13_MAX_ERROR 3.5859104691865484e-7 371 | 372 | // Constants for minimax polynomial approximations to exp2(x) = 2^x. 373 | // The algorithm minimizes the maximum absolute error on [0,1]. 374 | #define GTE_C_EXP2_DEG1_C0 1.0 375 | #define GTE_C_EXP2_DEG1_C1 1.0 376 | #define GTE_C_EXP2_DEG1_MAX_ERROR 8.6071332055934313e-2 377 | 378 | #define GTE_C_EXP2_DEG2_C0 1.0 379 | #define GTE_C_EXP2_DEG2_C1 6.5571332605741528e-01 380 | #define GTE_C_EXP2_DEG2_C2 3.4428667394258472e-01 381 | #define GTE_C_EXP2_DEG2_MAX_ERROR 3.8132476831060358e-3 382 | 383 | #define GTE_C_EXP2_DEG3_C0 1.0 384 | #define GTE_C_EXP2_DEG3_C1 6.9589012084456225e-01 385 | #define GTE_C_EXP2_DEG3_C2 2.2486494900110188e-01 386 | #define GTE_C_EXP2_DEG3_C3 7.9244930154334980e-02 387 | #define GTE_C_EXP2_DEG3_MAX_ERROR 1.4694877755186408e-4 388 | 389 | #define GTE_C_EXP2_DEG4_C0 1.0 390 | #define GTE_C_EXP2_DEG4_C1 6.9300392358459195e-01 391 | #define GTE_C_EXP2_DEG4_C2 2.4154981722455560e-01 392 | #define GTE_C_EXP2_DEG4_C3 5.1744260331489045e-02 393 | #define GTE_C_EXP2_DEG4_C4 1.3701998859367848e-02 394 | #define GTE_C_EXP2_DEG4_MAX_ERROR 4.7617792624521371e-6 395 | 396 | #define GTE_C_EXP2_DEG5_C0 1.0 397 | #define GTE_C_EXP2_DEG5_C1 6.9315298010274962e-01 398 | #define GTE_C_EXP2_DEG5_C2 2.4014712313022102e-01 399 | #define GTE_C_EXP2_DEG5_C3 5.5855296413199085e-02 400 | #define GTE_C_EXP2_DEG5_C4 8.9477503096873079e-03 401 | #define GTE_C_EXP2_DEG5_C5 1.8968500441332026e-03 402 | #define GTE_C_EXP2_DEG5_MAX_ERROR 1.3162098333463490e-7 403 | 404 | #define GTE_C_EXP2_DEG6_C0 1.0 405 | #define GTE_C_EXP2_DEG6_C1 6.9314698914837525e-01 406 | #define GTE_C_EXP2_DEG6_C2 2.4023013440952923e-01 407 | #define GTE_C_EXP2_DEG6_C3 5.5481276898206033e-02 408 | #define GTE_C_EXP2_DEG6_C4 9.6838443037086108e-03 409 | #define GTE_C_EXP2_DEG6_C5 1.2388324048515642e-03 410 | #define GTE_C_EXP2_DEG6_C6 2.1892283501756538e-04 411 | #define GTE_C_EXP2_DEG6_MAX_ERROR 3.1589168225654163e-9 412 | 413 | #define GTE_C_EXP2_DEG7_C0 1.0 414 | #define GTE_C_EXP2_DEG7_C1 6.9314718588750690e-01 415 | #define GTE_C_EXP2_DEG7_C2 2.4022637363165700e-01 416 | #define GTE_C_EXP2_DEG7_C3 5.5505235570535660e-02 417 | #define GTE_C_EXP2_DEG7_C4 9.6136265387940512e-03 418 | #define GTE_C_EXP2_DEG7_C5 1.3429234504656051e-03 419 | #define GTE_C_EXP2_DEG7_C6 1.4299202757683815e-04 420 | #define GTE_C_EXP2_DEG7_C7 2.1662892777385423e-05 421 | #define GTE_C_EXP2_DEG7_MAX_ERROR 6.6864513925679603e-11 422 | 423 | // Constants for minimax polynomial approximations to log2(x). 424 | // The algorithm minimizes the maximum absolute error on [1,2]. 425 | // The polynomials all have constant term zero. 426 | #define GTE_C_LOG2_DEG1_C1 +1.0 427 | #define GTE_C_LOG2_DEG1_MAX_ERROR 8.6071332055934202e-2 428 | 429 | #define GTE_C_LOG2_DEG2_C1 +1.3465553856377803 430 | #define GTE_C_LOG2_DEG2_C2 -3.4655538563778032e-01 431 | #define GTE_C_LOG2_DEG2_MAX_ERROR 7.6362868906658110e-3 432 | 433 | #define GTE_C_LOG2_DEG3_C1 +1.4228653756681227 434 | #define GTE_C_LOG2_DEG3_C2 -5.8208556916449616e-01 435 | #define GTE_C_LOG2_DEG3_C3 +1.5922019349637218e-01 436 | #define GTE_C_LOG2_DEG3_MAX_ERROR 8.7902902652883808e-4 437 | 438 | #define GTE_C_LOG2_DEG4_C1 +1.4387257478171547 439 | #define GTE_C_LOG2_DEG4_C2 -6.7778401359918661e-01 440 | #define GTE_C_LOG2_DEG4_C3 +3.2118898377713379e-01 441 | #define GTE_C_LOG2_DEG4_C4 -8.2130717995088531e-02 442 | #define GTE_C_LOG2_DEG4_MAX_ERROR 1.1318551355360418e-4 443 | 444 | #define GTE_C_LOG2_DEG5_C1 +1.4419170408633741 445 | #define GTE_C_LOG2_DEG5_C2 -7.0909645927612530e-01 446 | #define GTE_C_LOG2_DEG5_C3 +4.1560609399164150e-01 447 | #define GTE_C_LOG2_DEG5_C4 -1.9357573729558908e-01 448 | #define GTE_C_LOG2_DEG5_C5 +4.5149061716699634e-02 449 | #define GTE_C_LOG2_DEG5_MAX_ERROR 1.5521274478735858e-5 450 | 451 | #define GTE_C_LOG2_DEG6_C1 +1.4425449435950917 452 | #define GTE_C_LOG2_DEG6_C2 -7.1814525675038965e-01 453 | #define GTE_C_LOG2_DEG6_C3 +4.5754919692564044e-01 454 | #define GTE_C_LOG2_DEG6_C4 -2.7790534462849337e-01 455 | #define GTE_C_LOG2_DEG6_C5 +1.2179791068763279e-01 456 | #define GTE_C_LOG2_DEG6_C6 -2.5841449829670182e-02 457 | #define GTE_C_LOG2_DEG6_MAX_ERROR 2.2162051216689793e-6 458 | 459 | #define GTE_C_LOG2_DEG7_C1 +1.4426664401536078 460 | #define GTE_C_LOG2_DEG7_C2 -7.2055423726162360e-01 461 | #define GTE_C_LOG2_DEG7_C3 +4.7332419162501083e-01 462 | #define GTE_C_LOG2_DEG7_C4 -3.2514018752954144e-01 463 | #define GTE_C_LOG2_DEG7_C5 +1.9302965529095673e-01 464 | #define GTE_C_LOG2_DEG7_C6 -7.8534970641157997e-02 465 | #define GTE_C_LOG2_DEG7_C7 +1.5209108363023915e-02 466 | #define GTE_C_LOG2_DEG7_MAX_ERROR 3.2546531700261561e-7 467 | 468 | #define GTE_C_LOG2_DEG8_C1 +1.4426896453621882 469 | #define GTE_C_LOG2_DEG8_C2 -7.2115893912535967e-01 470 | #define GTE_C_LOG2_DEG8_C3 +4.7861716616785088e-01 471 | #define GTE_C_LOG2_DEG8_C4 -3.4699935395019565e-01 472 | #define GTE_C_LOG2_DEG8_C5 +2.4114048765477492e-01 473 | #define GTE_C_LOG2_DEG8_C6 -1.3657398692885181e-01 474 | #define GTE_C_LOG2_DEG8_C7 +5.1421382871922106e-02 475 | #define GTE_C_LOG2_DEG8_C8 -9.1364020499895560e-03 476 | #define GTE_C_LOG2_DEG8_MAX_ERROR 4.8796219218050219e-8 477 | --------------------------------------------------------------------------------