├── absolute.png
├── relative.png
├── linIntError.png
├── RawResults
├── Results.xlsx
├── Relative.tsv
├── Parsed.tsv
├── laptop_32.txt
├── laptop_64.txt
├── desktop_32.txt
├── desktop_64.txt
└── tablet_32.txt
├── ParseResults
├── App.config
├── Properties
│ └── AssemblyInfo.cs
├── ParseResults.csproj
└── Program.cs
├── .gitignore
├── SinCosPolyPrecision
├── stdafx.h
├── stdafx.cpp
├── targetver.h
├── SinCosPolyPrecision.vcxproj.filters
├── SinCosPolyPrecision.cpp
└── SinCosPolyPrecision.vcxproj
├── Test
├── SinCos
│ ├── SinCos.h
│ ├── SinCosLinInt.hpp
│ ├── SinCos.cpp
│ ├── GTEngineDEF.h
│ └── Mathematics
│ │ ├── GteSinEstimate.h
│ │ ├── GteCosEstimate.h
│ │ └── GteConstants.h
├── stdafx.cpp
├── targetver.h
├── BitCount
│ ├── align.cpp
│ ├── BitCount.h
│ ├── BitCountBuiltin.cpp
│ ├── BitCountLookup.cpp
│ ├── BitCount.cpp
│ ├── BitCountSse.cpp
│ └── align.h
├── stdafx.h
├── LookupTables.cpp
├── LookupTables.vcxproj.filters
└── LookupTables.vcxproj
├── LookupTables.sln
└── readme.md
/absolute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/absolute.png
--------------------------------------------------------------------------------
/relative.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/relative.png
--------------------------------------------------------------------------------
/linIntError.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/linIntError.png
--------------------------------------------------------------------------------
/RawResults/Results.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Const-me/LookupTables/HEAD/RawResults/Results.xlsx
--------------------------------------------------------------------------------
/ParseResults/App.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vs/
2 | LookupTables.VC.db
3 | LookupTables.VC.VC.opendb
4 | Test/Win32/
5 | Test/x64/
6 | ParseResults/obj/
7 | ParseResults/bin/
8 | ipch/
9 | *.user
10 | SinCosPolyPrecision/Win32/
11 | SinCosPolyPrecision/x64/
--------------------------------------------------------------------------------
/SinCosPolyPrecision/stdafx.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "targetver.h"
4 |
5 | #include
6 | #include
7 | #include
8 | #define _USE_MATH_DEFINES
9 | #include
10 | #include
11 | #include
--------------------------------------------------------------------------------
/Test/SinCos/SinCos.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | namespace nsSinCos
4 | {
5 | enum struct eAlgo: uint8_t
6 | {
7 | StdLib,
8 |
9 | Lookup,
10 |
11 | PolyGTE,
12 |
13 | #ifdef _WIN32
14 | PolyDX,
15 | PolyDxLow,
16 | #endif
17 | };
18 |
19 | template
20 | stopwatch::duration testSinCos();
21 | }
--------------------------------------------------------------------------------
/Test/stdafx.cpp:
--------------------------------------------------------------------------------
1 | // stdafx.cpp : source file that includes just the standard includes
2 | // LookupTables.pch will be the pre-compiled header
3 | // stdafx.obj will contain the pre-compiled type information
4 |
5 | #include "stdafx.h"
6 |
7 | // TODO: reference any additional headers you need in STDAFX.H
8 | // and not in this file
9 |
--------------------------------------------------------------------------------
/Test/targetver.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Including SDKDDKVer.h defines the highest available Windows platform.
4 |
5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
7 |
8 | #include
9 |
--------------------------------------------------------------------------------
/SinCosPolyPrecision/stdafx.cpp:
--------------------------------------------------------------------------------
1 | // stdafx.cpp : source file that includes just the standard includes
2 | // SinCosPolyPrecision.pch will be the pre-compiled header
3 | // stdafx.obj will contain the pre-compiled type information
4 |
5 | #include "stdafx.h"
6 |
7 | // TODO: reference any additional headers you need in STDAFX.H
8 | // and not in this file
9 |
--------------------------------------------------------------------------------
/SinCosPolyPrecision/targetver.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | // Including SDKDDKVer.h defines the highest available Windows platform.
4 |
5 | // If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
6 | // set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
7 |
8 | #include
9 |
--------------------------------------------------------------------------------
/Test/BitCount/align.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "align.h"
3 |
4 | void* detail::allocate_aligned_memory( size_t align, size_t size ) noexcept
5 | {
6 | assert( align >= sizeof( void* ) );
7 | if( size == 0 )
8 | return nullptr;
9 | return _aligned_malloc( size, align );
10 | }
11 |
12 | void detail::deallocate_aligned_memory( void *ptr ) noexcept
13 | {
14 | return _aligned_free( ptr );
15 | }
--------------------------------------------------------------------------------
/Test/BitCount/BitCount.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "align.h"
3 |
4 | namespace nsBitCnt
5 | {
6 | stopwatch::duration test( int nAlgo );
7 |
8 | uint64_t testLookup( const alignedVector& vec );
9 |
10 | uint64_t testSse2( const alignedVector& vec );
11 | uint64_t testSsse3( const alignedVector& vec );
12 | uint64_t testXop( const alignedVector& vec );
13 | uint64_t testBuiltin( const alignedVector& vec );
14 | }
--------------------------------------------------------------------------------
/RawResults/Relative.tsv:
--------------------------------------------------------------------------------
1 | SinCos
2 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32
3 | StdLib 179.9536957 92.98304131 162.8084785 123.2181684 157.78789
4 | Lookup 100 100 100 100 100
5 | GTE 82.28514215 92.95617369 85.07954026 102.7868762 108.8143041
6 | DX 65.3408725 80.64719148 64.31137149 91.00110594 83.03161227
7 | DX_Est 57.72547152 61.91282228 57.50528739 66.57133087 64.1036295
8 |
9 | BitCount
10 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32
11 | Lookup 100 100 100 100 100
12 | SSE2 14.58106797 45.97619823 20.09557259 54.19812104 32.25625402
13 | SSSE3 19.56030359 59.17546616 24.53645872 74.53063189 49.57154478
14 | POPCNT 19.48515159 30.89744576 20.69146076 33.29716138 30.94231022
15 |
--------------------------------------------------------------------------------
/Test/BitCount/BitCountBuiltin.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "BitCount.h"
3 |
4 | namespace nsBitCnt
5 | {
6 | // Calculate bits using POPCNT instruction.
7 | uint64_t testBuiltin( const alignedVector& vec )
8 | {
9 | #ifdef _MSC_VER
10 | #define popcnt64 __popcnt64
11 | #define popcnt32 __popcnt
12 | #else
13 | #define popcnt64 __popcntq
14 | #define popcnt32 __popcntd
15 | #endif
16 |
17 | #if defined(_M_X64) || defined(__amd64__)
18 | #define _pc popcnt64
19 | using tElt = uint64_t;
20 | #else
21 | #define _pc popcnt32
22 | using tElt = uint32_t;
23 | #endif
24 | const tElt* p = (const tElt*)vec.data();
25 | const tElt* pEnd = p + ( vec.size() * sizeof( uint16_t ) / sizeof( tElt ) );
26 |
27 | uint64_t bits = _pc( *p );
28 | p++;
29 | while( p < pEnd )
30 | {
31 | bits += _pc( *p );
32 | p++;
33 | }
34 | return bits;
35 | }
36 | }
--------------------------------------------------------------------------------
/RawResults/Parsed.tsv:
--------------------------------------------------------------------------------
1 | SinCos
2 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32
3 | StdLib 404.756525666667 183.099744 461.678577666667 345.527862 1522.45437133333
4 | Lookup 224.922597 196.917353333333 283.571581666667 280.419573333333 964.874029
5 | GTE 185.077878666667 183.046837 241.261398 288.234519666667 1049.92096033333
6 | DX 146.966387333333 158.808315 182.368773333333 255.184913 801.150462666667
7 | DX_Est 129.837629666667 121.917091 163.068653 186.679042 618.519272666667
8 |
9 | BitCount
10 | desktop_32 desktop_64 laptop_32 laptop_64 tablet_32
11 | Lookup 240.437594 74.874501 284.062135666667 91.4256473333333 597.494531
12 | SSE2 35.058369 34.424449 57.0839126666667 49.550983 192.729353666667
13 | SSSE3 47.0303233333333 44.307335 69.6987886666667 68.1401126666667 296.187269
14 | POPCNT 46.8496296666667 23.1343083333333 58.7766053333333 30.4421453333333 184.878611333333
15 |
--------------------------------------------------------------------------------
/Test/stdafx.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "targetver.h"
3 |
4 | #include
5 | #include
6 |
7 | #include
8 | #define _USE_MATH_DEFINES
9 | #include
10 | #include
11 | #include
12 |
13 | #include
14 | using std::vector;
15 |
16 | #include
17 | using std::array;
18 |
19 | #include
20 | typedef std::chrono::high_resolution_clock stopwatch;
21 |
22 | #ifdef _WIN32
23 | #include
24 | #define WIN32_LEAN_AND_MEAN
25 | #include
26 | #endif
27 |
28 | inline uint32_t getTickCount()
29 | {
30 | #ifdef _WIN32
31 | return GetTickCount();
32 | #else
33 | struct timespec ts;
34 | unsigned theTick = 0U;
35 | clock_gettime( CLOCK_REALTIME, &ts );
36 | theTick = ts.tv_nsec / 1000000;
37 | theTick += ts.tv_sec * 1000;
38 | return theTick;
39 | #endif
40 | }
41 |
42 | inline void resetRand()
43 | {
44 | srand( getTickCount() );
45 | }
--------------------------------------------------------------------------------
/RawResults/laptop_32.txt:
--------------------------------------------------------------------------------
1 | SinCos
2 | 1 0.000331 / 0.000091 457.499649
3 | 1 -0.000006 / 0.000265 464.936045
4 | 1 0.000216 / 0.000202 462.600039
5 | 2 -0.000353 / 0.000094 282.346982
6 | 2 -0.000003 / 0.000185 282.180539
7 | 2 0.000293 / -0.000104 286.187224
8 | 3 0.000628 / -0.000044 240.207853
9 | 3 0.000173 / 0.000198 240.642925
10 | 3 -0.000336 / -0.000260 242.933416
11 | 4 0.000349 / 0.000232 183.415030
12 | 4 0.000075 / 0.000633 179.753280
13 | 4 0.000352 / -0.000016 183.938010
14 | 5 0.000067 / -0.000190 160.336992
15 | 5 -0.000266 / 0.000151 168.412387
16 | 5 0.000115 / -0.000147 160.456580
17 | BitCount
18 | 1 750005609 263.460813
19 | 1 750005461 268.941390
20 | 1 750006700 319.784204
21 | 2 749997104 57.007905
22 | 2 749998576 57.073947
23 | 2 750001899 57.169886
24 | 3 749990732 69.994489
25 | 3 750008066 69.566557
26 | 3 749994005 69.535320
27 | 4 This CPU doesn't support XOP instruction set.0.000000
28 | 4 This CPU doesn't support XOP instruction set.0.000000
29 | 4 This CPU doesn't support XOP instruction set.0.000000
30 | 5 749995957 58.769168
31 | 5 750007034 58.758905
32 | 5 750003094 58.801743
33 |
--------------------------------------------------------------------------------
/RawResults/laptop_64.txt:
--------------------------------------------------------------------------------
1 | SinCos
2 | 1 -0.000087 / 0.000315 338.650739
3 | 1 -0.000218 / 0.000161 348.276331
4 | 1 -0.000228 / -0.000103 349.656516
5 | 2 0.037992 / -0.000081 285.009627
6 | 2 0.037861 / -0.000156 280.632127
7 | 2 0.038111 / -0.000174 275.616966
8 | 3 0.000152 / -0.000035 268.059197
9 | 3 -0.000075 / -0.000456 273.469715
10 | 3 -0.000150 / 0.000081 323.174647
11 | 4 0.000159 / -0.000192 248.901720
12 | 4 0.000113 / 0.000009 255.936064
13 | 4 -0.000143 / 0.000206 260.716955
14 | 5 0.000180 / 0.000084 188.482845
15 | 5 0.000038 / -0.000145 191.554236
16 | 5 0.000019 / 0.000190 180.000045
17 | BitCount
18 | 1 749983487 91.259502
19 | 1 749989718 91.397386
20 | 1 750000247 91.620054
21 | 2 749996617 49.734383
22 | 2 749995530 49.489403
23 | 2 749996927 49.429163
24 | 3 750012749 65.660273
25 | 3 750000511 73.860166
26 | 3 749991648 64.899899
27 | 4 This CPU doesn't support XOP instruction set.0.000000
28 | 4 This CPU doesn't support XOP instruction set.0.000000
29 | 4 This CPU doesn't support XOP instruction set.0.000000
30 | 5 749990728 30.728178
31 | 5 749996849 30.154328
32 | 5 749982555 30.443930
33 |
--------------------------------------------------------------------------------
/RawResults/desktop_32.txt:
--------------------------------------------------------------------------------
1 | SinCos
2 | 1 0.000183 / 0.000024 404.772739
3 | 1 0.000014 / 0.000351 404.965059
4 | 1 -0.000211 / -0.000226 404.531779
5 | 2 0.000122 / 0.000149 225.209424
6 | 2 -0.000095 / 0.000403 225.211664
7 | 2 -0.000121 / -0.000078 224.346703
8 | 3 -0.000146 / 0.000479 184.801719
9 | 3 0.000012 / 0.000415 185.113718
10 | 3 0.000102 / -0.000369 185.318199
11 | 4 -0.000041 / -0.000243 147.260254
12 | 4 0.000016 / 0.000089 146.934494
13 | 4 -0.000207 / -0.000020 146.704414
14 | 5 0.000009 / 0.000110 130.457043
15 | 5 0.000217 / -0.000466 130.508883
16 | 5 0.000253 / 0.000013 128.546963
17 | BitCount
18 | 1 749995608 230.947348
19 | 1 750002573 257.653925
20 | 1 749994654 232.711509
21 | 2 750015525 35.056022
22 | 2 749994490 35.063383
23 | 2 750003652 35.055702
24 | 3 750002897 46.917470
25 | 3 749987174 47.343070
26 | 3 749992602 46.830430
27 | 4 This CPU doesn't support XOP instruction set.0.000000
28 | 4 This CPU doesn't support XOP instruction set.0.000000
29 | 4 This CPU doesn't support XOP instruction set.0.000000
30 | 5 749978439 47.672031
31 | 5 750001358 46.298909
32 | 5 750013174 46.577949
33 |
--------------------------------------------------------------------------------
/RawResults/desktop_64.txt:
--------------------------------------------------------------------------------
1 | SinCos
2 | 1 -0.000108 / 0.000542 182.713397
3 | 1 -0.000241 / -0.000410 182.296117
4 | 1 -0.000193 / -0.000140 184.289718
5 | 2 0.037735 / -0.000581 197.624127
6 | 2 0.038014 / -0.000103 197.432127
7 | 2 0.037950 / 0.000446 195.695806
8 | 3 0.000164 / -0.000187 182.557557
9 | 3 0.000094 / -0.000511 183.252917
10 | 3 0.000387 / 0.000001 183.330037
11 | 4 -0.000488 / 0.000444 158.287461
12 | 4 0.000104 / -0.000164 160.676903
13 | 4 0.000164 / -0.000182 157.460581
14 | 5 -0.000037 / 0.000055 122.187278
15 | 5 -0.000200 / 0.000086 121.219917
16 | 5 -0.000116 / -0.000263 122.344078
17 | BitCount
18 | 1 750018142 75.096048
19 | 1 749985960 74.529967
20 | 1 749995042 74.997488
21 | 2 750002267 34.381462
22 | 2 750003782 34.414422
23 | 2 750002314 34.477463
24 | 3 750004016 44.472028
25 | 3 749991643 44.144029
26 | 3 750015061 44.305948
27 | 4 This CPU doesn't support XOP instruction set.0.000000
28 | 4 This CPU doesn't support XOP instruction set.0.000000
29 | 4 This CPU doesn't support XOP instruction set.0.000000
30 | 5 750023961 22.786255
31 | 5 749998210 23.233295
32 | 5 750000563 23.383375
33 |
--------------------------------------------------------------------------------
/RawResults/tablet_32.txt:
--------------------------------------------------------------------------------
1 | SinCos
2 | 1 -0.000134 / -0.000098 1567.576618
3 | 1 -0.000204 / -0.000125 1492.351882
4 | 1 0.000310 / -0.000196 1507.434614
5 | 2 -0.000009 / 0.000008 997.259779
6 | 2 0.000061 / 0.000159 899.369089
7 | 2 0.000183 / 0.000105 997.993219
8 | 3 0.000042 / 0.000321 1051.851454
9 | 3 -0.000120 / 0.000155 1035.472339
10 | 3 0.000279 / 0.000181 1062.439088
11 | 4 -0.000158 / -0.000164 797.009412
12 | 4 0.000155 / 0.000139 793.296905
13 | 4 -0.000175 / 0.000232 813.145071
14 | 5 -0.000433 / 0.000279 610.483187
15 | 5 -0.000309 / 0.000145 628.213980
16 | 5 0.000007 / 0.000118 616.860651
17 | BitCount
18 | 1 749995519 605.079546
19 | 1 749997515 580.843800
20 | 1 749992402 606.560247
21 | 2 750009716 163.519279
22 | 2 749978422 173.830434
23 | 2 749996628 240.838348
24 | 3 750016545 338.547023
25 | 3 749997743 284.511380
26 | 3 750001774 265.503404
27 | 4 This CPU doesn't support XOP instruction set.0.000000
28 | 4 This CPU doesn't support XOP instruction set.0.000000
29 | 4 This CPU doesn't support XOP instruction set.0.000000
30 | 5 750000321 184.418068
31 | 5 749995447 186.380305
32 | 5 749993234 183.837461
33 |
--------------------------------------------------------------------------------
/SinCosPolyPrecision/SinCosPolyPrecision.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hh;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Header Files
20 |
21 |
22 | Header Files
23 |
24 |
25 |
26 |
27 | Source Files
28 |
29 |
30 | Source Files
31 |
32 |
33 |
--------------------------------------------------------------------------------
/Test/BitCount/BitCountLookup.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "BitCount.h"
3 |
4 | namespace nsBitCnt
5 | {
6 | // http://stackoverflow.com/a/21455308/126995
7 | static uint8_t BYTE_BIT_COUNTS[ 256 ] =
8 | {
9 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
10 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
11 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
12 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
13 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
14 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
15 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
16 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
17 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
18 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
19 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
20 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
21 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
22 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
23 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
24 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
25 | };
26 |
27 | uint64_t testLookup( const alignedVector& vec )
28 | {
29 | const uint8_t* p = (const uint8_t*)vec.data();
30 | const uint8_t* const pEnd = p + vec.size() * sizeof( uint16_t );
31 |
32 | uint64_t res = BYTE_BIT_COUNTS[ *p ];
33 | p++;
34 |
35 | for( ; p < pEnd; p++ )
36 | res += BYTE_BIT_COUNTS[ *p ];
37 | return res;
38 | }
39 | }
--------------------------------------------------------------------------------
/ParseResults/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle( "ParseResults" )]
9 | [assembly: AssemblyDescription( "" )]
10 | [assembly: AssemblyConfiguration( "" )]
11 | [assembly: AssemblyCompany( "" )]
12 | [assembly: AssemblyProduct( "ParseResults" )]
13 | [assembly: AssemblyCopyright( "Copyright © 2016" )]
14 | [assembly: AssemblyTrademark( "" )]
15 | [assembly: AssemblyCulture( "" )]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible( false )]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid( "4b4bf54c-38cd-438e-823d-727f0d77d1d0" )]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion( "1.0.0.0" )]
36 | [assembly: AssemblyFileVersion( "1.0.0.0" )]
37 |
--------------------------------------------------------------------------------
/SinCosPolyPrecision/SinCosPolyPrecision.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "../Test/SinCos/SinCosLinInt.hpp"
3 |
4 | class Stat
5 | {
6 | double m_sum;
7 | double m_max;
8 | int m_count;
9 |
10 | public:
11 | Stat() : m_sum( 0 ), m_max( 0 ), m_count( 0 ) {}
12 |
13 | void add( double v )
14 | {
15 | v = abs( v );
16 | m_count++;
17 | m_sum += v;
18 | m_max = std::max( m_max, v );
19 | }
20 |
21 | void print( const char* title )
22 | {
23 | const int digs = DBL_DECIMAL_DIG;
24 | printf( "%s\t%.*e\t%.*e\n",
25 | title,
26 | digs, m_sum / m_count,
27 | digs, m_max );
28 | }
29 | };
30 |
31 | int main()
32 | {
33 | using namespace DirectX;
34 | const int nTests = 1000;
35 |
36 | Stat sinHi, sinLo, cosHi, cosLo, sinLinInt, cosLinInt;
37 |
38 | const LinInt linInt;
39 |
40 | for( int i = 0; i < nTests; i++ )
41 | {
42 | float angle = float( M_PI * i / double( nTests ) );
43 |
44 | float sinPrecise = sinf( angle );
45 | float cosPrecise = cosf( angle );
46 |
47 | float s, c;
48 | XMScalarSinCos( &s, &c, angle );
49 | sinHi.add( s - sinPrecise );
50 | cosHi.add( c - cosPrecise );
51 |
52 | XMScalarSinCosEst( &s, &c, angle );
53 | sinLo.add( s - sinPrecise );
54 | cosLo.add( c - cosPrecise );
55 |
56 | linInt.sinCos( angle, s, c );
57 | sinLinInt.add( s - sinPrecise );
58 | cosLinInt.add( c - cosPrecise );
59 | }
60 |
61 | sinHi.print( "High-degree sine" );
62 | cosHi.print( "High-degree cosine" );
63 | sinLo.print( "Low-degree sine" );
64 | cosLo.print( "Low-degree cosine" );
65 | sinLinInt.print( "LinInt sine" );
66 | cosLinInt.print( "LinInt cosine" );
67 |
68 | return 0;
69 | }
--------------------------------------------------------------------------------
/Test/LookupTables.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "SinCos.h"
3 | #include "BitCount/BitCount.h"
4 |
5 | int result( stopwatch::duration dr )
6 | {
7 | typedef std::chrono::duration ms;
8 | ms d = dr;
9 | printf( "%f\n", d.count() );
10 | return 0;
11 | }
12 |
13 | int sinCos( int argc, char** argv )
14 | {
15 | int a = -1;
16 | if( argc > 0 )
17 | a = atoi( *argv );
18 | printf( "%i\t", a );
19 |
20 | using namespace nsSinCos;
21 | switch( a )
22 | {
23 | case 1:
24 | return result( testSinCos() );
25 | case 2:
26 | return result( testSinCos() );
27 | case 3:
28 | return result( testSinCos() );
29 | case 4:
30 | return result( testSinCos() );
31 | case 5:
32 | return result( testSinCos() );
33 | }
34 |
35 | printf( "Unknown algorithm.\nPossible values: 1 = C standard library, 2 = lookup table, 3 = GTEngine high-degree poly, 4 = DirectX high-degree poly, 5 = DirectX low-degree poly" );
36 | return -1;
37 | }
38 |
39 | int bitCnt( int argc, char** argv )
40 | {
41 | if( argc <= 0 )
42 | {
43 | nsBitCnt::test( -1 );
44 | return 1;
45 | }
46 |
47 | int a = atoi( *argv );
48 | return result( nsBitCnt::test( a ) );
49 | }
50 |
51 | int main( int argc, char** argv )
52 | {
53 | if( argc <= 1 || argc > 3 )
54 | {
55 | printf( "Usage: LookupTables \n" );
56 | return 1;
57 | }
58 |
59 | if( 0 == _strcmpi( argv[ 1 ], "sincos" ) )
60 | {
61 | return sinCos( argc - 2, argv + 2 );
62 | }
63 |
64 | if( 0 == _strcmpi( argv[ 1 ], "bitcount" ) )
65 | {
66 | return bitCnt( argc - 2, argv + 2 );
67 | }
68 |
69 |
70 | printf( "E_NOTIMPL\n" );
71 | return 1;
72 | }
--------------------------------------------------------------------------------
/Test/SinCos/SinCosLinInt.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #define _PI (float)( M_PI )
4 | #define _2PI (float)( M_PI * 2 )
5 | #define _PIDIV2 (float)( M_PI / 2 )
6 | #define _1DIV2PI (float)( 1.0 / ( M_PI * 2 ) )
7 |
8 | class LinInt
9 | {
10 | static const size_t size = 256;
11 | // Two tables for sin and cos, from -PI/2 to +PI/2, interleaved for cache friendliness
12 | std::array lookupTable;
13 | const float indexMul;
14 |
15 | // value should be from -PI/2 to +PI/2
16 | inline void lookup( float value, float& sin, float& cos ) const
17 | {
18 | // Calculate index + coefficients for linear interpolation
19 | value *= indexMul;
20 | int i1 = int( floor( value ) );
21 | value -= i1;
22 | i1 += ( size / 2 );
23 | const float b = 1.0f - value;
24 |
25 | // Interpolate both sin + cos using same coefficients.
26 | const float* entries = lookupTable.data() + ( i1 << 1 );
27 | sin = entries[ 0 ] * b + entries[ 2 ] * value;
28 | cos = entries[ 1 ] * b + entries[ 3 ] * value;
29 | }
30 | public:
31 | LinInt() : indexMul( float( size / M_PI ) )
32 | {
33 | for( int i = 0; i <= size; i++ )
34 | {
35 | double val = M_PI * ( ( i - ( size / 2 ) ) / double( size ) );
36 | lookupTable[ i * 2 ] = float( sin( val ) );
37 | lookupTable[ i * 2 + 1 ] = float( cos( val ) );
38 | }
39 | }
40 |
41 | inline void sinCos( float Value, float& sin, float& cos ) const
42 | {
43 | // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
44 | float quotient = _1DIV2PI * Value;
45 | if( Value >= 0.0f )
46 | {
47 | quotient = (float)( (int)( quotient + 0.5f ) );
48 | }
49 | else
50 | {
51 | quotient = (float)( (int)( quotient - 0.5f ) );
52 | }
53 | float y = Value - _2PI * quotient;
54 |
55 | // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
56 | float sign;
57 | if( y > _PIDIV2 )
58 | {
59 | y = _PI - y;
60 | sign = -1.0f;
61 | }
62 | else if( y < -_PIDIV2 )
63 | {
64 | y = -_PI - y;
65 | sign = -1.0f;
66 | }
67 | else
68 | {
69 | sign = +1.0f;
70 | }
71 | // Interpolate both
72 | float cosNoSign;
73 | lookup( y, sin, cosNoSign );
74 | // Restore cos sign
75 | cos = cosNoSign * sign;
76 | }
77 | };
--------------------------------------------------------------------------------
/Test/LookupTables.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
6 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
7 |
8 |
9 | {949c2519-00bd-4a39-9f34-5af9561dc569}
10 |
11 |
12 | {ee34c638-c5f4-47f4-9863-fdfdab6f36c8}
13 |
14 |
15 |
16 |
17 |
18 |
19 | BitCount
20 |
21 |
22 | BitCount
23 |
24 |
25 | SinCos
26 |
27 |
28 | SinCos
29 |
30 |
31 | SinCos
32 |
33 |
34 | SinCos
35 |
36 |
37 | SinCos
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 | BitCount
46 |
47 |
48 | BitCount
49 |
50 |
51 | BitCount
52 |
53 |
54 | BitCount
55 |
56 |
57 | BitCount
58 |
59 |
60 | SinCos
61 |
62 |
63 |
--------------------------------------------------------------------------------
/Test/SinCos/SinCos.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "SinCos.h"
3 | #include
4 | #include
5 | #include "SinCosLinInt.hpp"
6 |
7 | using namespace nsSinCos;
8 |
9 | struct StdLib
10 | {
11 | inline void sinCos( float val, float& s, float& c ) const
12 | {
13 | s = sinf( val );
14 | c = cosf( val );
15 | }
16 | };
17 |
18 |
19 | #ifdef _WIN32
20 | struct DX
21 | {
22 | inline void sinCos( float val, float& s, float& c ) const
23 | {
24 | DirectX::XMScalarSinCos( &s, &c, val );
25 | }
26 | };
27 | struct DxLow
28 | {
29 | inline void sinCos( float val, float& s, float& c ) const
30 | {
31 | DirectX::XMScalarSinCosEst( &s, &c, val );
32 | }
33 | };
34 | #endif
35 |
36 | struct Gte
37 | {
38 | inline void sinCos( float val, float& s, float& c ) const
39 | {
40 | s = gte::SinEstimate::DegreeRR<11>( val );
41 | c = gte::CosEstimate::DegreeRR<10>( val );
42 | }
43 | };
44 |
45 |
46 | template
47 | stopwatch::duration testImpl( const vector& src, const Algo& algo )
48 | {
49 | float rs = 0, rc = 0;
50 | float s, c;
51 | auto start = stopwatch::now();
52 | for( float i : src )
53 | {
54 | algo.sinCos( i, s, c );
55 | rs += s;
56 | rc += c;
57 | }
58 | auto stop = stopwatch::now();
59 | auto duration = stop - start;
60 |
61 | const double avgSin = double( rs ) / double( src.size() );
62 | const double avgCos = double( rc ) / double( src.size() );
63 | printf( "%f / %f\t", avgSin, avgCos );
64 | return duration;
65 | }
66 |
67 | template
68 | static stopwatch::duration testImpl()
69 | {
70 | resetRand();
71 |
72 | // static const size_t testSize = 10 * 1000;
73 | static const size_t testSize = 10 * 1000 * 1000;
74 |
75 | vector src;
76 | src.resize( testSize );
77 | for( float& f : src )
78 | f = float( 2.0 * rand() * M_PI / RAND_MAX );
79 |
80 | // const StdLib algo;
81 | // const DX algo;
82 | const algo algo;
83 | return testImpl( src, algo );
84 | }
85 |
86 | namespace nsSinCos
87 | {
88 | template<>
89 | stopwatch::duration testSinCos()
90 | {
91 | return testImpl();
92 | }
93 |
94 | template<>
95 | stopwatch::duration testSinCos()
96 | {
97 | return testImpl();
98 | }
99 |
100 | template<>
101 | stopwatch::duration testSinCos()
102 | {
103 | return testImpl();
104 | }
105 |
106 | #ifdef _WIN32
107 | template<>
108 | stopwatch::duration testSinCos()
109 | {
110 | return testImpl();
111 | }
112 |
113 | template<>
114 | stopwatch::duration testSinCos()
115 | {
116 | return testImpl();
117 | }
118 | #endif
119 | }
--------------------------------------------------------------------------------
/ParseResults/ParseResults.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | AnyCPU
7 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}
8 | Exe
9 | Properties
10 | ParseResults
11 | ParseResults
12 | v4.5.2
13 | 512
14 | true
15 |
16 |
17 | AnyCPU
18 | true
19 | full
20 | false
21 | bin\Debug\
22 | DEBUG;TRACE
23 | prompt
24 | 4
25 |
26 |
27 | AnyCPU
28 | pdbonly
29 | true
30 | bin\Release\
31 | TRACE
32 | prompt
33 | 4
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
60 |
--------------------------------------------------------------------------------
/Test/BitCount/BitCount.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "BitCount.h"
3 | #include "align.h"
4 |
5 | // static const size_t szArr = 10000;
6 | static const size_t szArr = 100000000; // 100M elements, ~190 MB RAM
7 |
8 | static_assert( 0 == ( szArr * sizeof( uint16_t ) % 16 ), "Invalid array length: must be a multiple of 16 bytes, because SSE" );
9 |
10 | namespace nsBitCnt
11 | {
12 | using fnTest = uint64_t( * )( const alignedVector& );
13 |
14 | static inline stopwatch::duration measure( const alignedVector& vec, fnTest test )
15 | {
16 | const auto start = stopwatch::now();
17 | const uint64_t bits = test( vec );
18 | const auto stop = stopwatch::now();
19 | printf( "%llu\t", bits );
20 | return stop - start;
21 | }
22 |
23 | static bool hasSsse3()
24 | {
25 | int regs[ 4 ];
26 | __cpuid( regs, 1 );
27 | const int ecx = regs[ 2 ];
28 | return 0 != ( ecx & ( 1 << 9 ) );
29 | }
30 |
31 | static bool hasXop()
32 | {
33 | int regs[ 4 ];
34 | __cpuid( regs, 0 );
35 |
36 | static const int idAmd[ 3 ] =
37 | {
38 | 0x68747541, 0x444d4163, 0x69746e65
39 | };
40 |
41 | if( 0 != memcmp( regs + 1, idAmd, 12 ) )
42 | return false;
43 |
44 | __cpuid( regs, 0x80000001 );
45 | const int ecx = regs[ 2 ];
46 | return 0 != ( ecx & ( 1 << 11 ) );
47 | }
48 |
49 | static bool hasBuiltin()
50 | {
51 | int regs[ 4 ];
52 | __cpuid( regs, 1 );
53 | const int ecx = regs[ 2 ];
54 | return 0 != ( ecx & ( 1 << 23 ) );
55 | }
56 |
57 | static inline fnTest testFunc( int nAlgo )
58 | {
59 | switch( nAlgo )
60 | {
61 | case 1:
62 | return &testLookup;
63 | case 2:
64 | return &testSse2;
65 | case 3:
66 | if( !hasSsse3() )
67 | {
68 | printf( "This CPU doesn't support SSSE3 instruction set." );
69 | return nullptr;
70 | }
71 | return &testSsse3;
72 | case 4:
73 | if( !hasXop() )
74 | {
75 | printf( "This CPU doesn't support XOP instruction set." );
76 | return nullptr;
77 | }
78 | return &testXop;
79 | case 5:
80 | if( !hasBuiltin() )
81 | {
82 | printf( "This CPU doesn't support POPCNT instruction." );
83 | return nullptr;
84 | }
85 | return &testBuiltin;
86 | default:
87 | break;
88 | }
89 | printf( "Unknown algorithm.\nPossible values: 1 = lookup table, 2 = SSE2, 3 = SSSE3, 4 = XOP, 5 = POPCNT\n" );
90 | return nullptr;
91 | }
92 |
93 | stopwatch::duration test( int nAlgo )
94 | {
95 | printf( "%i\t", nAlgo );
96 |
97 | const fnTest fn = testFunc( nAlgo );
98 | if( nullptr == fn )
99 | return stopwatch::duration();
100 |
101 | alignedVector vec;
102 | vec.resize( szArr );
103 |
104 | resetRand();
105 | for( uint16_t& i : vec )
106 | i = rand();
107 |
108 | return measure( vec, fn );
109 | }
110 | }
--------------------------------------------------------------------------------
/Test/BitCount/BitCountSse.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "BitCount.h"
3 |
4 | namespace nsBitCnt
5 | {
6 | static const __m128i popcount_mask1 = _mm_set1_epi8( 0x77 );
7 | static const __m128i popcount_mask2 = _mm_set1_epi8( 0x0F );
8 | static inline __m128i popcnt8_sse2( __m128i x )
9 | {
10 | __m128i n;
11 | // Count bits in each 4-bit field.
12 | n = _mm_srli_epi64( x, 1 );
13 | n = _mm_and_si128( popcount_mask1, n );
14 | x = _mm_sub_epi8( x, n );
15 | n = _mm_srli_epi64( n, 1 );
16 | n = _mm_and_si128( popcount_mask1, n );
17 | x = _mm_sub_epi8( x, n );
18 | n = _mm_srli_epi64( n, 1 );
19 | n = _mm_and_si128( popcount_mask1, n );
20 | x = _mm_sub_epi8( x, n );
21 | x = _mm_add_epi8( x, _mm_srli_epi16( x, 4 ) );
22 | x = _mm_and_si128( popcount_mask2, x );
23 | return x;
24 | }
25 |
26 | static inline __m128i popcnt8_ssse3( __m128i n )
27 | {
28 | static const __m128i popcount_mask = _mm_set1_epi8( 0x0F );
29 | static const __m128i popcount_table = _mm_setr_epi8( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 );
30 |
31 | const __m128i pcnt0 = _mm_shuffle_epi8( popcount_table, _mm_and_si128( n, popcount_mask ) );
32 | const __m128i pcnt1 = _mm_shuffle_epi8( popcount_table, _mm_and_si128( _mm_srli_epi16( n, 4 ), popcount_mask ) );
33 | return _mm_add_epi8( pcnt0, pcnt1 );
34 | }
35 |
36 | static inline __m128i popcount8_xop( __m128i n )
37 | {
38 | static const __m128i popcount_mask = _mm_set1_epi8( 0x0F );
39 | static const __m128i popcount_table = _mm_setr_epi8( 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 );
40 | static const __m128i popcount_shift = _mm_set1_epi8( -4 );
41 |
42 | const __m128i pcnt0 = _mm_perm_epi8( popcount_table, popcount_table, _mm_and_si128( n, popcount_mask ) );
43 | const __m128i pcnt1 = _mm_perm_epi8( popcount_table, popcount_table, _mm_shl_epi8( n, popcount_shift ) );
44 | return _mm_add_epi8( pcnt0, pcnt1 );
45 | }
46 |
47 | template
48 | static inline uint64_t countBitsImpl( const alignedVector& vec, pcnt8 pc8 )
49 | {
50 | const __m128i* p = ( const __m128i* )vec.data();
51 | const __m128i* pEnd = p + ( vec.size() * sizeof( uint16_t ) / sizeof( __m128i ) );
52 |
53 | const __m128i zero = _mm_setzero_si128();
54 | __m128i res = zero;
55 | for( ; p < pEnd; p++ )
56 | {
57 | const __m128i cnt8 = pc8( *p );
58 | const __m128i cnt64 = _mm_sad_epu8( cnt8, zero );
59 | res = _mm_add_epi64( res, cnt64 );
60 | }
61 | return res.m128i_u64[ 0 ] + res.m128i_u64[ 1 ];
62 | }
63 |
64 | uint64_t testSse2( const alignedVector& vec )
65 | {
66 | return countBitsImpl( vec, &popcnt8_sse2 );
67 | }
68 | uint64_t testSsse3( const alignedVector& vec )
69 | {
70 | return countBitsImpl( vec, &popcnt8_ssse3 );
71 | }
72 | uint64_t testXop( const alignedVector& vec )
73 | {
74 | return countBitsImpl( vec, &popcount8_xop );
75 | }
76 | }
--------------------------------------------------------------------------------
/Test/SinCos/GTEngineDEF.h:
--------------------------------------------------------------------------------
1 | // David Eberly, Geometric Tools, Redmond WA 98052
2 | // Copyright (c) 1998-2016
3 | // Distributed under the Boost Software License, Version 1.0.
4 | // http://www.boost.org/LICENSE_1_0.txt
5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
6 | // File Version: 3.0.0 (2016/06/19)
7 |
8 | #pragma once
9 |
10 | //----------------------------------------------------------------------------
11 | // The platform specification.
12 | //
13 | // __MSWINDOWS__ : Microsoft Windows (WIN32 or WIN64)
14 | // __APPLE__ : Macintosh OS X
15 | // __LINUX__ : Linux or Cygwin
16 | //----------------------------------------------------------------------------
17 |
18 | #if !defined(__LINUX__) && (defined(WIN32) || defined(_WIN64))
19 | #define __MSWINDOWS__
20 |
21 | #if !defined(_MSC_VER)
22 | #error Microsoft Visual Studio 2013 or later is required.
23 | #endif
24 |
25 | // MSVC 6 is version 12.0
26 | // MSVC 7.0 is version 13.0 (MSVS 2002)
27 | // MSVC 7.1 is version 13.1 (MSVS 2003)
28 | // MSVC 8.0 is version 14.0 (MSVS 2005)
29 | // MSVC 9.0 is version 15.0 (MSVS 2008)
30 | // MSVC 10.0 is version 16.0 (MSVS 2010)
31 | // MSVC 11.0 is version 17.0 (MSVS 2012)
32 | // MSVC 12.0 is version 18.0 (MSVS 2013)
33 | // MSVC 14.0 is version 19.0 (MSVS 2015)
34 | // Currently, projects are provided only for MSVC 12.0 and 14.0.
35 | #if _MSC_VER < 1800
36 | #error Microsoft Visual Studio 2013 or later is required.
37 | #endif
38 |
39 | // Debug build values (choose_your_value is 0, 1, or 2)
40 | // 0: Disables checked iterators and disables iterator debugging.
41 | // 1: Enables checked iterators and disables iterator debugging.
42 | // 2: (default) Enables iterator debugging; checked iterators are not relevant.
43 | //
44 | // Release build values (choose_your_value is 0 or 1)
45 | // 0: (default) Disables checked iterators.
46 | // 1: Enables checked iterators; iterator debugging is not relevant.
47 | //
48 | // #define _ITERATOR_DEBUG_LEVEL choose_your_value
49 |
50 | #endif // WIN32 or _WIN64
51 |
52 | // TODO: Windows DLL configurations have not yet been added to the project,
53 | // but these defines are required to support them (when we do add them).
54 | //
55 | // Add GTE_EXPORT to project preprocessor options for dynamic library
56 | // configurations to export their symbols.
57 | #if defined(GTE_EXPORT)
58 | // For the dynamic library configurations.
59 | #define GTE_IMPEXP __declspec(dllexport)
60 | #else
61 | // For a client of the dynamic library or for the static library
62 | // configurations.
63 | #define GTE_IMPEXP
64 | #endif
65 |
66 | // Expose exactly one of these.
67 | #define GTE_USE_ROW_MAJOR
68 | //#define GTE_USE_COL_MAJOR
69 |
70 | // Expose exactly one of these.
71 | #define GTE_USE_MAT_VEC
72 | //#define GTE_USE_VEC_MAT
73 |
74 | #if (defined(GTE_USE_ROW_MAJOR) && defined(GTE_USE_COL_MAJOR)) || (!defined(GTE_USE_ROW_MAJOR) && !defined(GTE_USE_COL_MAJOR))
75 | #error Exactly one storage order must be specified.
76 | #endif
77 |
78 | #if (defined(GTE_USE_MAT_VEC) && defined(GTE_USE_VEC_MAT)) || (!defined(GTE_USE_MAT_VEC) && !defined(GTE_USE_VEC_MAT))
79 | #error Exactly one multiplication convention must be specified.
80 | #endif
81 |
--------------------------------------------------------------------------------
/LookupTables.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 14
4 | VisualStudioVersion = 14.0.25420.1
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LookupTables", "Test\LookupTables.vcxproj", "{476AD73F-6B56-4B38-B499-1FC642BB9AEC}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParseResults", "ParseResults\ParseResults.csproj", "{4B4BF54C-38CD-438E-823D-727F0D77D1D0}"
9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SinCosPolyPrecision", "SinCosPolyPrecision\SinCosPolyPrecision.vcxproj", "{B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}"
11 | EndProject
12 | Global
13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
14 | Debug|Any CPU = Debug|Any CPU
15 | Debug|x64 = Debug|x64
16 | Debug|x86 = Debug|x86
17 | Release|Any CPU = Release|Any CPU
18 | Release|x64 = Release|x64
19 | Release|x86 = Release|x86
20 | EndGlobalSection
21 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
22 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|Any CPU.ActiveCfg = Debug|Win32
23 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x64.ActiveCfg = Debug|x64
24 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x64.Build.0 = Debug|x64
25 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x86.ActiveCfg = Debug|Win32
26 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Debug|x86.Build.0 = Debug|Win32
27 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|Any CPU.ActiveCfg = Release|Win32
28 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x64.ActiveCfg = Release|x64
29 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x64.Build.0 = Release|x64
30 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x86.ActiveCfg = Release|Win32
31 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}.Release|x86.Build.0 = Release|Win32
32 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
33 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|Any CPU.Build.0 = Debug|Any CPU
34 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x64.ActiveCfg = Debug|Any CPU
35 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x64.Build.0 = Debug|Any CPU
36 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x86.ActiveCfg = Debug|Any CPU
37 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Debug|x86.Build.0 = Debug|Any CPU
38 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|Any CPU.ActiveCfg = Release|Any CPU
39 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|Any CPU.Build.0 = Release|Any CPU
40 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x64.ActiveCfg = Release|Any CPU
41 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x64.Build.0 = Release|Any CPU
42 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x86.ActiveCfg = Release|Any CPU
43 | {4B4BF54C-38CD-438E-823D-727F0D77D1D0}.Release|x86.Build.0 = Release|Any CPU
44 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|Any CPU.ActiveCfg = Debug|Win32
45 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x64.ActiveCfg = Debug|x64
46 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x64.Build.0 = Debug|x64
47 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x86.ActiveCfg = Debug|Win32
48 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Debug|x86.Build.0 = Debug|Win32
49 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|Any CPU.ActiveCfg = Release|Win32
50 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x64.ActiveCfg = Release|x64
51 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x64.Build.0 = Release|x64
52 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x86.ActiveCfg = Release|Win32
53 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}.Release|x86.Build.0 = Release|Win32
54 | EndGlobalSection
55 | GlobalSection(SolutionProperties) = preSolution
56 | HideSolutionNode = FALSE
57 | EndGlobalSection
58 | EndGlobal
59 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # A Case Against Lookup Tables
In many places on the internet, there’s an advice “to improve performance of your code, use a pre-calculated lookup table”.
My point is, now in 2016, in 80% cases, the advice is terribly wrong.
I’ve wrote a couple of benchmarks to demonstrate.
**The first test**, SinCos, calculates sine + cosine of random angles.
[Inspired by a question on stackoverflow](http://stackoverflow.com/q/31814105/126995).
I’ve implemented the following algorithms:
1. Standard C runtime library, i.e. sinf + cosf functions.
2. A lookup table, the length is 256 values, with linear interpolation between the values, and optimized for this test case (the test calculates both sin and cosine, so the table holds the sin and cos values in adjacent memory addresses).
3. Minimax high-degree (11 for sine, 12 for cosine) polynomial approximation, as implemented in [Geometric Tools](https://www.geometrictools.com/Source/Mathematics.html) library
4. Same polynomial approximation (11 for sine, 12 for cosine), as implemented in Microsoft DirectX SDK in [XMScalarSinCos](https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.scalar.xmscalarsincos%28v=vs.85%29.aspx) . Slightly faster than the previous one because optimized for simultaneous sin+cos case I have in my test.
5. Lower-degree polynomial approximation (7 for sine, 6 for cosine), as implemented in Microsoft DirectX SDK by [XMScalarSinCosEst](https://msdn.microsoft.com/en-us/library/windows/desktop/microsoft.directx_sdk.scalar.xmscalarsincosest(v=vs.85).aspx)
**The second test**, BitCount, calculates count of set bits in the array of random 16-bit values.
[Inspired by a google interview question](http://www.gwan.com/blog/20160405.html), where an interviewer expected the incorrect “lookup table” answer.
In the question there were merely 10000 short values. On today's hardware, the data set is too small for meaningful benchmark. So my array is larger, with 100 million short values, taking about 190 MB RAM.
I’ve implemented following algorithms:
1. 256 bytes lookup table.
2. SSE2 manually vectorized.
3. SSSE3 manually vectorized.
4. XOP manually vectorized (untested because don't have the hardware).
5. POPCNT instruction.
The algorithms 2-4 are [from the SO](http://stackoverflow.com/a/17355341/126995).
## Hardware
Currently, I happen to have three PCs on my desk.
* A desktop with [i5-4460](http://ark.intel.com/products/80817/Intel-Core-i5-4460-Processor-6M-Cache-up-to-3_40-GHz), 16GB RAM, Windows 10 x64
* A laptop with [i5-2410m](http://ark.intel.com/products/52224/Intel-Core-i5-2410M-Processor-3M-Cache-up-to-2_90-GHz), 8GB RAM, Windows 8.1 x64.
* A tablet with Atom [Z3735](http://ark.intel.com/products/80275/Intel-Atom-Processor-Z3735G-2M-Cache-up-to-1_83-GHz), 1GB RAM, Windows 10 x86.
## Test Results
Here’s absolute values in milliseconds:

Here’s values in percent, relative to the lookup tables performance:

You see the pattern here?
In 100% of my test cases, using a lookup tables is not good for performance. And in some cases the difference is **huge**, like 7 times slower than other implementation.
## Final Words
The people who wrote that classic programming books, were using very old computers. Since those classic books were written, CPU computation speed improved by orders of magnitude compared to RAM latency. Even the highly-sophisticated multi-level caches only help to some extent. That's why on modern hardware, implementing a lookup table to save CPU time is very often a bad idea.
Sure, there are cases when a lookup tables indeed improves performance. Like when the content is really hard to compute, [rainbow tables](https://en.wikipedia.org/wiki/Rainbow_table) being the extreme example.
But please, stop using lookup tables for trivial things like trigonometry or bit counting. When running on modern hardware, a CPU computes stuff much faster than you think it does, and accesses memory much slower than you think it does.
Also, please stop writing and reading those deprecated programming books that teach people to use lookup tables to optimize performance.
## Bonus Chapter: Polynomial Approximation Precision
You might be wondering “polynomial approximation sounds scary, how precise is that thing?”
It’s very precise.
Even the low-degree approximation is more precise than my 256-values lookup table.
Here’s the data:

The app that calculates those errors is also in this repository, SinCosPolyPrecision subfolder.
Not only lookup table for sine and cosine is slower, it’s less precise as well.
--------------------------------------------------------------------------------
/Test/SinCos/Mathematics/GteSinEstimate.h:
--------------------------------------------------------------------------------
1 | // David Eberly, Geometric Tools, Redmond WA 98052
2 | // Copyright (c) 1998-2016
3 | // Distributed under the Boost Software License, Version 1.0.
4 | // http://www.boost.org/LICENSE_1_0.txt
5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
6 | // File Version: 3.0.0 (2016/06/19)
7 |
8 | #pragma once
9 |
10 | #include
11 |
12 | // Minimax polynomial approximations to sin(x). The polynomial p(x) of
13 | // degree D has only odd-power terms, is required to have linear term x,
14 | // and p(pi/2) = sin(pi/2) = 1. It minimizes the quantity
15 | // maximum{|sin(x) - p(x)| : x in [-pi/2,pi/2]} over all polynomials of
16 | // degree D subject to the constraints mentioned.
17 |
18 | namespace gte
19 | {
20 |
21 | template
22 | class SinEstimate
23 | {
24 | public:
25 | // The input constraint is x in [-pi/2,pi/2]. For example,
26 | // float x; // in [-pi/2,pi/2]
27 | // float result = SinEstimate::Degree<3>(x);
28 | template
29 | inline static Real Degree(Real x);
30 |
31 | // The input x can be any real number. Range reduction is used to
32 | // generate a value y in [-pi/2,pi/2] for which sin(y) = sin(x).
33 | // For example,
34 | // float x; // x any real number
35 | // float result = SinEstimate::DegreeRR<3>(x);
36 | template
37 | inline static Real DegreeRR(Real x);
38 |
39 | private:
40 | // Metaprogramming and private implementation to allow specialization of
41 | // a template member function.
42 | template struct degree {};
43 | inline static Real Evaluate(degree<3>, Real x);
44 | inline static Real Evaluate(degree<5>, Real x);
45 | inline static Real Evaluate(degree<7>, Real x);
46 | inline static Real Evaluate(degree<9>, Real x);
47 | inline static Real Evaluate(degree<11>, Real x);
48 |
49 | // Support for range reduction.
50 | inline static Real Reduce(Real x);
51 | };
52 |
53 |
54 | template
55 | template
56 | inline Real SinEstimate::Degree(Real x)
57 | {
58 | return Evaluate(degree(), x);
59 | }
60 |
61 | template
62 | template
63 | inline Real SinEstimate::DegreeRR(Real x)
64 | {
65 | return Degree(Reduce(x));
66 | }
67 |
68 | template
69 | inline Real SinEstimate::Evaluate(degree<3>, Real x)
70 | {
71 | Real xsqr = x * x;
72 | Real poly;
73 | poly = (Real)GTE_C_SIN_DEG3_C1;
74 | poly = (Real)GTE_C_SIN_DEG3_C0 + poly * xsqr;
75 | poly = poly * x;
76 | return poly;
77 | }
78 |
79 | template
80 | inline Real SinEstimate::Evaluate(degree<5>, Real x)
81 | {
82 | Real xsqr = x * x;
83 | Real poly;
84 | poly = (Real)GTE_C_SIN_DEG5_C2;
85 | poly = (Real)GTE_C_SIN_DEG5_C1 + poly * xsqr;
86 | poly = (Real)GTE_C_SIN_DEG5_C0 + poly * xsqr;
87 | poly = poly * x;
88 | return poly;
89 | }
90 |
91 | template
92 | inline Real SinEstimate::Evaluate(degree<7>, Real x)
93 | {
94 | Real xsqr = x * x;
95 | Real poly;
96 | poly = (Real)GTE_C_SIN_DEG7_C3;
97 | poly = (Real)GTE_C_SIN_DEG7_C2 + poly * xsqr;
98 | poly = (Real)GTE_C_SIN_DEG7_C1 + poly * xsqr;
99 | poly = (Real)GTE_C_SIN_DEG7_C0 + poly * xsqr;
100 | poly = poly * x;
101 | return poly;
102 | }
103 |
104 | template
105 | inline Real SinEstimate::Evaluate(degree<9>, Real x)
106 | {
107 | Real xsqr = x * x;
108 | Real poly;
109 | poly = (Real)GTE_C_SIN_DEG9_C4;
110 | poly = (Real)GTE_C_SIN_DEG9_C3 + poly * xsqr;
111 | poly = (Real)GTE_C_SIN_DEG9_C2 + poly * xsqr;
112 | poly = (Real)GTE_C_SIN_DEG9_C1 + poly * xsqr;
113 | poly = (Real)GTE_C_SIN_DEG9_C0 + poly * xsqr;
114 | poly = poly * x;
115 | return poly;
116 | }
117 |
118 | template
119 | inline Real SinEstimate::Evaluate(degree<11>, Real x)
120 | {
121 | Real xsqr = x * x;
122 | Real poly;
123 | poly = (Real)GTE_C_SIN_DEG11_C5;
124 | poly = (Real)GTE_C_SIN_DEG11_C4 + poly * xsqr;
125 | poly = (Real)GTE_C_SIN_DEG11_C3 + poly * xsqr;
126 | poly = (Real)GTE_C_SIN_DEG11_C2 + poly * xsqr;
127 | poly = (Real)GTE_C_SIN_DEG11_C1 + poly * xsqr;
128 | poly = (Real)GTE_C_SIN_DEG11_C0 + poly * xsqr;
129 | poly = poly * x;
130 | return poly;
131 | }
132 |
133 | template
134 | inline Real SinEstimate::Reduce(Real x)
135 | {
136 | // Map x to y in [-pi,pi], x = 2*pi*quotient + remainder.
137 | Real quotient = (Real)GTE_C_INV_TWO_PI * x;
138 | if (x >= (Real)0)
139 | {
140 | quotient = (Real)((int)(quotient + (Real)0.5));
141 | }
142 | else
143 | {
144 | quotient = (Real)((int)(quotient - (Real)0.5));
145 | }
146 | Real y = x - (Real)GTE_C_TWO_PI * quotient;
147 |
148 | // Map y to [-pi/2,pi/2] with sin(y) = sin(x).
149 | if (y > (Real)GTE_C_HALF_PI)
150 | {
151 | y = (Real)GTE_C_PI - y;
152 | }
153 | else if (y < (Real)-GTE_C_HALF_PI)
154 | {
155 | y = (Real)-GTE_C_PI - y;
156 | }
157 | return y;
158 | }
159 |
160 |
161 | }
162 |
--------------------------------------------------------------------------------
/Test/SinCos/Mathematics/GteCosEstimate.h:
--------------------------------------------------------------------------------
1 | // David Eberly, Geometric Tools, Redmond WA 98052
2 | // Copyright (c) 1998-2016
3 | // Distributed under the Boost Software License, Version 1.0.
4 | // http://www.boost.org/LICENSE_1_0.txt
5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
6 | // File Version: 3.0.0 (2016/06/19)
7 |
8 | #pragma once
9 |
10 | #include
11 |
12 | // Minimax polynomial approximations to cos(x). The polynomial p(x) of
13 | // degree D has only even-power terms, is required to have constant term 1,
14 | // and p(pi/2) = cos(pi/2) = 0. It minimizes the quantity
15 | // maximum{|cos(x) - p(x)| : x in [-pi/2,pi/2]} over all polynomials of
16 | // degree D subject to the constraints mentioned.
17 |
18 | namespace gte
19 | {
20 |
21 | template
22 | class CosEstimate
23 | {
24 | public:
25 | // The input constraint is x in [-pi/2,pi/2]. For example,
26 | // float x; // in [-pi/2,pi/2]
27 | // float result = CosEstimate::Degree<4>(x);
28 | template
29 | inline static Real Degree(Real x);
30 |
31 | // The input x can be any real number. Range reduction is used to
32 | // generate a value y in [-pi/2,pi/2] and a sign s for which
33 | // cos(y) = s*cos(x). For example,
34 | // float x; // x any real number
35 | // float result = CosEstimate::DegreeRR<3>(x);
36 | template
37 | inline static Real DegreeRR(Real x);
38 |
39 | private:
40 | // Metaprogramming and private implementation to allow specialization of
41 | // a template member function.
42 | template struct degree {};
43 | inline static Real Evaluate(degree<2>, Real x);
44 | inline static Real Evaluate(degree<4>, Real x);
45 | inline static Real Evaluate(degree<6>, Real x);
46 | inline static Real Evaluate(degree<8>, Real x);
47 | inline static Real Evaluate(degree<10>, Real x);
48 |
49 | // Support for range reduction.
50 | inline static void Reduce(Real x, Real& y, Real& sign);
51 | };
52 |
53 |
54 | template
55 | template
56 | inline Real CosEstimate::Degree(Real x)
57 | {
58 | return Evaluate(degree(), x);
59 | }
60 |
61 | template
62 | template
63 | inline Real CosEstimate::DegreeRR(Real x)
64 | {
65 | Real y, sign;
66 | Reduce(x, y, sign);
67 | Real poly = sign * Degree(y);
68 | return poly;
69 | }
70 |
71 | template
72 | inline Real CosEstimate::Evaluate(degree<2>, Real x)
73 | {
74 | Real xsqr = x * x;
75 | Real poly;
76 | poly = (Real)GTE_C_COS_DEG2_C1;
77 | poly = (Real)GTE_C_COS_DEG2_C0 + poly * xsqr;
78 | return poly;
79 | }
80 |
81 | template
82 | inline Real CosEstimate::Evaluate(degree<4>, Real x)
83 | {
84 | Real xsqr = x * x;
85 | Real poly;
86 | poly = (Real)GTE_C_COS_DEG4_C2;
87 | poly = (Real)GTE_C_COS_DEG4_C1 + poly * xsqr;
88 | poly = (Real)GTE_C_COS_DEG4_C0 + poly * xsqr;
89 | return poly;
90 | }
91 |
92 | template
93 | inline Real CosEstimate::Evaluate(degree<6>, Real x)
94 | {
95 | Real xsqr = x * x;
96 | Real poly;
97 | poly = (Real)GTE_C_COS_DEG6_C3;
98 | poly = (Real)GTE_C_COS_DEG6_C2 + poly * xsqr;
99 | poly = (Real)GTE_C_COS_DEG6_C1 + poly * xsqr;
100 | poly = (Real)GTE_C_COS_DEG6_C0 + poly * xsqr;
101 | return poly;
102 | }
103 |
104 | template
105 | inline Real CosEstimate::Evaluate(degree<8>, Real x)
106 | {
107 | Real xsqr = x * x;
108 | Real poly;
109 | poly = (Real)GTE_C_COS_DEG8_C4;
110 | poly = (Real)GTE_C_COS_DEG8_C3 + poly * xsqr;
111 | poly = (Real)GTE_C_COS_DEG8_C2 + poly * xsqr;
112 | poly = (Real)GTE_C_COS_DEG8_C1 + poly * xsqr;
113 | poly = (Real)GTE_C_COS_DEG8_C0 + poly * xsqr;
114 | return poly;
115 | }
116 |
117 | template
118 | inline Real CosEstimate::Evaluate(degree<10>, Real x)
119 | {
120 | Real xsqr = x * x;
121 | Real poly;
122 | poly = (Real)GTE_C_COS_DEG10_C5;
123 | poly = (Real)GTE_C_COS_DEG10_C4 + poly * xsqr;
124 | poly = (Real)GTE_C_COS_DEG10_C3 + poly * xsqr;
125 | poly = (Real)GTE_C_COS_DEG10_C2 + poly * xsqr;
126 | poly = (Real)GTE_C_COS_DEG10_C1 + poly * xsqr;
127 | poly = (Real)GTE_C_COS_DEG10_C0 + poly * xsqr;
128 | return poly;
129 | }
130 |
131 | template
132 | inline void CosEstimate::Reduce(Real x, Real& y, Real& sign)
133 | {
134 | // Map x to y in [-pi,pi], x = 2*pi*quotient + remainder.
135 | Real quotient = (Real)GTE_C_INV_TWO_PI * x;
136 | if (x >= (Real)0)
137 | {
138 | quotient = (Real)((int)(quotient + (Real)0.5));
139 | }
140 | else
141 | {
142 | quotient = (Real)((int)(quotient - (Real)0.5));
143 | }
144 | y = x - (Real)GTE_C_TWO_PI * quotient;
145 |
146 | // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
147 | if (y > (Real)GTE_C_HALF_PI)
148 | {
149 | y = (Real)GTE_C_PI - y;
150 | sign = (Real)-1;
151 | }
152 | else if (y < (Real)-GTE_C_HALF_PI)
153 | {
154 | y = (Real)-GTE_C_PI - y;
155 | sign = (Real)-1;
156 | }
157 | else
158 | {
159 | sign = (Real)1;
160 | }
161 | }
162 |
163 |
164 | }
165 |
--------------------------------------------------------------------------------
/Test/BitCount/align.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | // http://stackoverflow.com/a/12942652/126995
5 |
6 | enum class Alignment: size_t
7 | {
8 | Normal = sizeof( void* ),
9 | SSE = 16,
10 | AVX = 32,
11 | };
12 |
13 | namespace detail
14 | {
15 | void* allocate_aligned_memory( size_t align, size_t size ) noexcept;
16 | void deallocate_aligned_memory( void* ptr ) noexcept;
17 | }
18 |
19 | template
20 | class AlignedAllocator;
21 |
22 | template
23 | class AlignedAllocator
24 | {
25 | public:
26 | typedef void* pointer;
27 | typedef const void* const_pointer;
28 | typedef void value_type;
29 |
30 | template struct rebind { typedef AlignedAllocator other; };
31 | };
32 |
33 | template
34 | class AlignedAllocator
35 | {
36 | public:
37 | typedef T value_type;
38 | typedef T* pointer;
39 | typedef const T* const_pointer;
40 | typedef T& reference;
41 | typedef const T& const_reference;
42 | typedef size_t size_type;
43 | typedef ptrdiff_t difference_type;
44 |
45 | typedef std::true_type propagate_on_container_move_assignment;
46 |
47 | template
48 | struct rebind { typedef AlignedAllocator other; };
49 |
50 | public:
51 | AlignedAllocator() noexcept
52 | {}
53 |
54 | template
55 | AlignedAllocator( const AlignedAllocator& ) noexcept
56 | {}
57 |
58 | size_type max_size() const noexcept
59 | {
60 | return ( size_type( ~0 ) - size_type( Align ) ) / sizeof( T );
61 | }
62 |
63 | pointer address( reference x ) const noexcept
64 | {
65 | return std::addressof( x );
66 | }
67 |
68 | const_pointer address( const_reference x ) const noexcept
69 | {
70 | return std::addressof( x );
71 | }
72 |
73 | pointer allocate( size_type n, typename AlignedAllocator::const_pointer = 0 )
74 | {
75 | const size_type alignment = static_cast( Align );
76 | void* ptr = detail::allocate_aligned_memory( alignment, n * sizeof( T ) );
77 | if( ptr == nullptr ) {
78 | throw std::bad_alloc();
79 | }
80 |
81 | return reinterpret_cast( ptr );
82 | }
83 |
84 | void deallocate( pointer p, size_type ) noexcept
85 | {
86 | return detail::deallocate_aligned_memory( p );
87 | }
88 |
89 | template
90 | void construct( U* p, Args&&... args )
91 | {
92 | ::new( reinterpret_cast( p ) ) U( std::forward( args )... );
93 | }
94 |
95 | void destroy( pointer p )
96 | {
97 | p->~T();
98 | }
99 | };
100 |
101 | template
102 | class AlignedAllocator
103 | {
104 | public:
105 | typedef T value_type;
106 | typedef const T* pointer;
107 | typedef const T* const_pointer;
108 | typedef const T& reference;
109 | typedef const T& const_reference;
110 | typedef size_t size_type;
111 | typedef ptrdiff_t difference_type;
112 |
113 | typedef std::true_type propagate_on_container_move_assignment;
114 |
115 | template
116 | struct rebind { typedef AlignedAllocator other; };
117 |
118 | public:
119 | AlignedAllocator() noexcept
120 | {}
121 |
122 | template
123 | AlignedAllocator( const AlignedAllocator& ) noexcept
124 | {}
125 |
126 | size_type max_size() const noexcept
127 | {
128 | return ( size_type( ~0 ) - size_type( Align ) ) / sizeof( T );
129 | }
130 |
131 | const_pointer address( const_reference x ) const noexcept
132 | {
133 | return std::addressof( x );
134 | }
135 |
136 | pointer allocate( size_type n, typename AlignedAllocator::const_pointer = 0 )
137 | {
138 | const size_type alignment = static_cast( Align );
139 | void* ptr = detail::allocate_aligned_memory( alignment, n * sizeof( T ) );
140 | if( ptr == nullptr ) {
141 | throw std::bad_alloc();
142 | }
143 |
144 | return reinterpret_cast( ptr );
145 | }
146 |
147 | void deallocate( pointer p, size_type ) noexcept
148 | {
149 | return detail::deallocate_aligned_memory( p );
150 | }
151 |
152 | template
153 | void construct( U* p, Args&&... args )
154 | {
155 | ::new( reinterpret_cast( p ) ) U( std::forward( args )... );
156 | }
157 |
158 | void destroy( pointer p )
159 | {
160 | p->~T();
161 | }
162 | };
163 |
164 | template
165 | inline bool operator== ( const AlignedAllocator&, const AlignedAllocator& ) noexcept
166 | {
167 | return TAlign == UAlign;
168 | }
169 |
170 | template
171 | inline bool operator!= ( const AlignedAllocator&, const AlignedAllocator& ) noexcept
172 | {
173 | return TAlign != UAlign;
174 | }
175 |
176 | template
177 | using alignedVector = std::vector>;
178 |
179 | namespace detail
180 | {
181 | static const Alignment default_alignment = Alignment::SSE;
182 |
183 | // template T* allocate()
184 | template T* allocate( _Types&&... _Args )
185 | {
186 | std::unique_ptr hold( static_cast( allocate_aligned_memory( default_alignment, sizeof( T ) ) ), deallocate_aligned_memory );
187 | ::new ( hold.get() ) T( _STD forward<_Types>( _Args )... );
188 | return static_cast( hold.release() );
189 | }
190 |
191 | template
192 | void deallocate( void* p )
193 | {
194 | static_cast( p )->~T();
195 | deallocate_aligned_memory( p );
196 | }
197 | }
--------------------------------------------------------------------------------
/ParseResults/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.IO;
6 | using System.Threading;
7 | using System.Globalization;
8 |
9 | namespace ParseResults
10 | {
11 | using kvp = KeyValuePair;
12 | using dictI2D = Dictionary;
13 |
14 | static class Program
15 | {
16 | const string strFolder = @"C:\Z\Fun\LookupTables\RawResults";
17 | const string strOutput = @"C:\Z\Fun\LookupTables\RawResults\Parsed.tsv";
18 | const string strOutputRel = @"C:\Z\Fun\LookupTables\RawResults\Relative.tsv";
19 |
20 | /// Test ID
21 | enum eTest : byte
22 | {
23 | SinCos = 1,
24 | BitCount = 2,
25 | }
26 |
27 | /// Pack test + algorithm into an int
28 | static int key( eTest et, int algo )
29 | {
30 | return ( (int)et << 8 ) | algo;
31 | }
32 |
33 | /// Unpack test from integer key
34 | static eTest test( this int k )
35 | {
36 | return (eTest)( k >> 8 );
37 | }
38 |
39 | /// Unpack algorithm from integer key
40 | static int algo( this int k )
41 | {
42 | return k & 255;
43 | }
44 |
45 | /// Algorithms names for SinCos test
46 | static readonly Dictionary s_algoSinCos = new Dictionary()
47 | {
48 | { 1, "StdLib" },
49 | { 2, "Lookup" },
50 | { 3, "GTE" },
51 | { 4, "DX" },
52 | { 5, "DX_Est" },
53 | };
54 |
55 | /// Algorithms names for BitCount test
56 | static readonly Dictionary s_algoCnt = new Dictionary()
57 | {
58 | { 1, "Lookup" },
59 | { 2, "SSE2" },
60 | { 3, "SSSE3" },
61 | { 4, "XOP" },
62 | { 5, "POPCNT" },
63 | };
64 |
65 | /// Read all lines from the file
66 | static IEnumerable getLines( string fileName )
67 | {
68 | using( StreamReader sr = new StreamReader( fileName, Encoding.ASCII ) )
69 | {
70 | while( !sr.EndOfStream )
71 | yield return sr.ReadLine();
72 | }
73 | }
74 |
75 | /// Case-insensitive string compare
76 | static bool isEqual( this string s, string s2 )
77 | {
78 | return s.Equals( s2, StringComparison.OrdinalIgnoreCase );
79 | }
80 |
81 | /// Parse lines in the result file
82 | static IEnumerable parse( string fileName )
83 | {
84 | eTest? test = null;
85 | foreach( string line in getLines( fileName ) )
86 | {
87 | if( line.isEqual( "SinCos" ) )
88 | {
89 | test = eTest.SinCos;
90 | continue;
91 | }
92 | if( line.isEqual( "BitCount" ) )
93 | {
94 | test = eTest.BitCount;
95 | continue;
96 | }
97 | if( !test.HasValue )
98 | continue;
99 | string[] fields = line.Split( '\t' );
100 | if( fields.Length < 3 )
101 | continue;
102 | int algo;
103 | if( !int.TryParse( fields[ 0 ], out algo ) )
104 | continue;
105 | double measure;
106 | if( !double.TryParse( fields[ 2 ], out measure ) )
107 | continue;
108 | yield return new kvp( key( test.Value, algo ), measure );
109 | }
110 | }
111 |
112 | /// The log file contains 3 tests per algorithm, this function groups them and calculates the average.
113 | static IEnumerable average( this IEnumerable lines )
114 | {
115 | Func, kvp> aggregate = ( IEnumerable group ) =>
116 | {
117 | int c = 0;
118 | double sum = 0;
119 | foreach( var t in group )
120 | {
121 | c++;
122 | sum += t.Value;
123 | }
124 | var f = group.First();
125 | return new kvp( f.Key, sum / c );
126 | };
127 |
128 | return lines.GroupBy( t => t.Key ).Select( aggregate );
129 | }
130 |
131 | /// Read all files in the directory, parse, calculate tests average
132 | static Dictionary readFiles( string dir )
133 | {
134 | string[] files = Directory.GetFiles( strFolder, "*.txt" );
135 | Dictionary res = new Dictionary( files.Length );
136 |
137 | foreach( string f in files )
138 | {
139 | string key = Path.GetFileNameWithoutExtension( f );
140 |
141 | Dictionary val = parse( f ).average().ToDictionary( k => k.Key, k => k.Value );
142 | res[ key ] = val;
143 | }
144 | return res;
145 | }
146 |
147 | /// Get lines in this dataset.
148 | static int[] tableLines( this Dictionary data )
149 | {
150 | HashSet hs = new HashSet();
151 | foreach( var c in data.Values )
152 | hs.UnionWith( c.Keys );
153 | return hs.OrderBy( k => k ).ToArray();
154 | }
155 |
156 | ///
157 | static void printFields( this TextWriter tw, IEnumerable fields )
158 | {
159 | tw.WriteLine( String.Join( "\t", fields ) );
160 | }
161 |
162 | ///
163 | static void printTable( TextWriter tw, string title, Dictionary data, IEnumerable rows, string[] columns, Dictionary algoNames )
164 | {
165 | tw.WriteLine( title );
166 |
167 | List fields = new List();
168 | fields.Add( "" );
169 | fields.AddRange( columns );
170 | tw.printFields( fields );
171 |
172 | foreach( int r in rows )
173 | {
174 | fields.Clear();
175 | fields.Add( algoNames[ r.algo() ] );
176 | foreach( string c in columns )
177 | {
178 | dictI2D dataColumn = data[ c ];
179 | double val;
180 | if( dataColumn.TryGetValue( r, out val ) )
181 | fields.Add( val.ToString() );
182 | else
183 | fields.Add( string.Empty );
184 | }
185 | tw.printFields( fields );
186 | }
187 | }
188 |
189 | ///
190 | static void print( Dictionary data, string dest )
191 | {
192 | int[] lines = data.tableLines();
193 | string[] columns = data.Keys.OrderBy( k => k ).ToArray();
194 |
195 | using( StreamWriter sw = new StreamWriter( dest ) )
196 | {
197 | printTable( sw, "SinCos", data, lines.Where( l => l.test() == eTest.SinCos ), columns, s_algoSinCos );
198 | sw.WriteLine();
199 | printTable( sw, "BitCount", data, lines.Where( l => l.test() == eTest.BitCount ), columns, s_algoCnt );
200 | }
201 | }
202 |
203 | static void calcRel( dictI2D column, eTest test, int algo )
204 | {
205 | int[] keys = column.Keys.ToArray();
206 | double rel = column[ key( test, algo ) ];
207 | foreach( int k in keys )
208 | {
209 | if( k.test() != test )
210 | continue;
211 | column[ k ] = 100.0 * column[ k ] / rel;
212 | }
213 | }
214 |
215 | ///
216 | static void Main( string[] args )
217 | {
218 | // Reset culture to en-us, to match the output of C++ code
219 | CultureInfo english = new CultureInfo( "en-US" );
220 | CultureInfo.DefaultThreadCurrentCulture = english;
221 | CultureInfo.DefaultThreadCurrentUICulture = english;
222 | Thread.CurrentThread.CurrentCulture = english;
223 | Thread.CurrentThread.CurrentUICulture = english;
224 |
225 | // Read, parse, average
226 | Dictionary data = readFiles( strFolder );
227 |
228 | // Print raw results
229 | print( data, strOutput );
230 |
231 | // Calculate relative values
232 | foreach( var d in data.Values )
233 | {
234 | calcRel( d, eTest.BitCount, 1 );
235 | calcRel( d, eTest.SinCos, 2 );
236 | }
237 |
238 | // Print relative results
239 | print( data, strOutputRel );
240 | }
241 | }
242 | }
--------------------------------------------------------------------------------
/SinCosPolyPrecision/SinCosPolyPrecision.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 | Debug
14 | x64
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | {B9673A7E-A233-42FB-8A0C-B3AAA3B59C58}
23 | Win32Proj
24 | SinCosPolyPrecision
25 | 8.1
26 |
27 |
28 |
29 | Application
30 | true
31 | v140
32 | Unicode
33 |
34 |
35 | Application
36 | false
37 | v140
38 | true
39 | Unicode
40 |
41 |
42 | Application
43 | true
44 | v140
45 | Unicode
46 |
47 |
48 | Application
49 | false
50 | v140
51 | true
52 | Unicode
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | true
74 | $(Platform)\$(Configuration)\
75 | $(Platform)\$(Configuration)\
76 |
77 |
78 | true
79 | $(Platform)\$(Configuration)\
80 |
81 |
82 | false
83 | $(Platform)\$(Configuration)\
84 | $(Platform)\$(Configuration)\
85 |
86 |
87 | false
88 | $(Platform)\$(Configuration)\
89 |
90 |
91 |
92 | Use
93 | Level3
94 | Disabled
95 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
96 | true
97 |
98 |
99 | Console
100 | true
101 |
102 |
103 |
104 |
105 | Use
106 | Level3
107 | Disabled
108 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
109 | true
110 |
111 |
112 | Console
113 | true
114 |
115 |
116 |
117 |
118 | Level3
119 | Use
120 | MaxSpeed
121 | true
122 | true
123 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
124 | true
125 |
126 |
127 | Console
128 | true
129 | true
130 | true
131 |
132 |
133 |
134 |
135 | Level3
136 | Use
137 | MaxSpeed
138 | true
139 | true
140 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
141 | true
142 |
143 |
144 | Console
145 | true
146 | true
147 | true
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 | Create
158 | Create
159 | Create
160 | Create
161 |
162 |
163 |
164 |
165 |
166 |
--------------------------------------------------------------------------------
/Test/LookupTables.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 | Debug
14 | x64
15 |
16 |
17 | Release
18 | x64
19 |
20 |
21 |
22 | {476AD73F-6B56-4B38-B499-1FC642BB9AEC}
23 | Win32Proj
24 | LookupTables
25 | 8.1
26 |
27 |
28 |
29 | Application
30 | true
31 | v140
32 | Unicode
33 |
34 |
35 | Application
36 | false
37 | v140
38 | true
39 | Unicode
40 |
41 |
42 | Application
43 | true
44 | v140
45 | Unicode
46 |
47 |
48 | Application
49 | false
50 | v140
51 | true
52 | Unicode
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 | true
74 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath)
75 | $(Platform)\$(Configuration)\
76 | $(Platform)\$(Configuration)\
77 |
78 |
79 | true
80 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath)
81 | $(Platform)\$(Configuration)\
82 |
83 |
84 | false
85 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath)
86 | $(Platform)\$(Configuration)\
87 | $(Platform)\$(Configuration)\
88 |
89 |
90 | false
91 | $(ProjectDir);$(ProjectDir)SinCos\;$(IncludePath)
92 | $(Platform)\$(Configuration)\
93 |
94 |
95 |
96 | Use
97 | Level3
98 | Disabled
99 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
100 | true
101 |
102 |
103 | Console
104 | true
105 |
106 |
107 |
108 |
109 | Use
110 | Level3
111 | Disabled
112 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
113 | true
114 |
115 |
116 | Console
117 | true
118 |
119 |
120 |
121 |
122 | Level3
123 | Use
124 | MaxSpeed
125 | true
126 | true
127 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
128 | true
129 | MultiThreaded
130 | Speed
131 | StreamingSIMDExtensions2
132 |
133 |
134 | Console
135 | true
136 | true
137 | true
138 |
139 |
140 |
141 |
142 | Level3
143 | Use
144 | MaxSpeed
145 | true
146 | true
147 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
148 | true
149 | MultiThreaded
150 | Speed
151 | StreamingSIMDExtensions2
152 |
153 |
154 | Console
155 | true
156 | true
157 | true
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 | Create
182 | Create
183 | Create
184 | Create
185 |
186 |
187 |
188 |
189 |
190 |
--------------------------------------------------------------------------------
/Test/SinCos/Mathematics/GteConstants.h:
--------------------------------------------------------------------------------
1 | // David Eberly, Geometric Tools, Redmond WA 98052
2 | // Copyright (c) 1998-2016
3 | // Distributed under the Boost Software License, Version 1.0.
4 | // http://www.boost.org/LICENSE_1_0.txt
5 | // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
6 | // File Version: 3.0.0 (2016/06/19)
7 |
8 | #pragma once
9 |
10 | #include
11 |
12 | // This file is for sharing of constants among the CPU, SSE2, and GPU. The
13 | // hard-coded numbers lead to correctly rounded approximations of the
14 | // constants when using 'float' or 'double'.
15 |
16 | // Constants involving pi.
17 | #define GTE_C_PI 3.1415926535897931
18 | #define GTE_C_HALF_PI 1.5707963267948966
19 | #define GTE_C_QUARTER_PI 0.7853981633974483
20 | #define GTE_C_TWO_PI 6.2831853071795862
21 | #define GTE_C_INV_PI 0.3183098861837907
22 | #define GTE_C_INV_TWO_PI 0.1591549430918953
23 | #define GTE_C_INV_HALF_PI 0.6366197723675813
24 |
25 | // Conversions between degrees and radians.
26 | #define GTE_C_DEG_TO_RAD 0.0174532925199433
27 | #define GTE_C_RAD_TO_DEG 57.295779513082321
28 |
29 | // Common constants.
30 | #define GTE_C_SQRT_2 1.4142135623730951
31 | #define GTE_C_INV_SQRT_2 0.7071067811865475
32 | #define GTE_C_LN_2 0.6931471805599453
33 | #define GTE_C_INV_LN_2 1.4426950408889634
34 | #define GTE_C_LN_10 2.3025850929940459
35 | #define GTE_C_INV_LN_10 0.43429448190325176
36 |
37 | // Constants for minimax polynomial approximations to sqrt(x).
38 | // The algorithm minimizes the maximum absolute error on [1,2].
39 | #define GTE_C_SQRT_DEG1_C0 +1.0
40 | #define GTE_C_SQRT_DEG1_C1 +4.1421356237309505e-01
41 | #define GTE_C_SQRT_DEG1_MAX_ERROR 1.7766952966368793e-2
42 |
43 | #define GTE_C_SQRT_DEG2_C0 +1.0
44 | #define GTE_C_SQRT_DEG2_C1 +4.8563183076125260e-01
45 | #define GTE_C_SQRT_DEG2_C2 -7.1418268388157458e-02
46 | #define GTE_C_SQRT_DEG2_MAX_ERROR 1.1795695163108744e-3
47 |
48 | #define GTE_C_SQRT_DEG3_C0 +1.0
49 | #define GTE_C_SQRT_DEG3_C1 +4.9750045320242231e-01
50 | #define GTE_C_SQRT_DEG3_C2 -1.0787308044477850e-01
51 | #define GTE_C_SQRT_DEG3_C3 +2.4586189615451115e-02
52 | #define GTE_C_SQRT_DEG3_MAX_ERROR 1.1309620116468910e-4
53 |
54 | #define GTE_C_SQRT_DEG4_C0 +1.0
55 | #define GTE_C_SQRT_DEG4_C1 +4.9955939832918816e-01
56 | #define GTE_C_SQRT_DEG4_C2 -1.2024066151943025e-01
57 | #define GTE_C_SQRT_DEG4_C3 +4.5461507257698486e-02
58 | #define GTE_C_SQRT_DEG4_C4 -1.0566681694362146e-02
59 | #define GTE_C_SQRT_DEG4_MAX_ERROR 1.2741170151556180e-5
60 |
61 | #define GTE_C_SQRT_DEG5_C0 +1.0
62 | #define GTE_C_SQRT_DEG5_C1 +4.9992197660031912e-01
63 | #define GTE_C_SQRT_DEG5_C2 -1.2378506719245053e-01
64 | #define GTE_C_SQRT_DEG5_C3 +5.6122776972699739e-02
65 | #define GTE_C_SQRT_DEG5_C4 -2.3128836281145482e-02
66 | #define GTE_C_SQRT_DEG5_C5 +5.0827122737047148e-03
67 | #define GTE_C_SQRT_DEG5_MAX_ERROR 1.5725568940708201e-6
68 |
69 | #define GTE_C_SQRT_DEG6_C0 +1.0
70 | #define GTE_C_SQRT_DEG6_C1 +4.9998616695784914e-01
71 | #define GTE_C_SQRT_DEG6_C2 -1.2470733323278438e-01
72 | #define GTE_C_SQRT_DEG6_C3 +6.0388587356982271e-02
73 | #define GTE_C_SQRT_DEG6_C4 -3.1692053551807930e-02
74 | #define GTE_C_SQRT_DEG6_C5 +1.2856590305148075e-02
75 | #define GTE_C_SQRT_DEG6_C6 -2.6183954624343642e-03
76 | #define GTE_C_SQRT_DEG6_MAX_ERROR 2.0584155535630089e-7
77 |
78 | #define GTE_C_SQRT_DEG7_C0 +1.0
79 | #define GTE_C_SQRT_DEG7_C1 +4.9999754817809228e-01
80 | #define GTE_C_SQRT_DEG7_C2 -1.2493243476353655e-01
81 | #define GTE_C_SQRT_DEG7_C3 +6.1859954146370910e-02
82 | #define GTE_C_SQRT_DEG7_C4 -3.6091595023208356e-02
83 | #define GTE_C_SQRT_DEG7_C5 +1.9483946523450868e-02
84 | #define GTE_C_SQRT_DEG7_C6 -7.5166134568007692e-03
85 | #define GTE_C_SQRT_DEG7_C7 +1.4127567687864939e-03
86 | #define GTE_C_SQRT_DEG7_MAX_ERROR 2.8072302919734948e-8
87 |
88 | #define GTE_C_SQRT_DEG8_C0 +1.0
89 | #define GTE_C_SQRT_DEG8_C1 +4.9999956583056759e-01
90 | #define GTE_C_SQRT_DEG8_C2 -1.2498490369914350e-01
91 | #define GTE_C_SQRT_DEG8_C3 +6.2318494667579216e-02
92 | #define GTE_C_SQRT_DEG8_C4 -3.7982961896432244e-02
93 | #define GTE_C_SQRT_DEG8_C5 +2.3642612312869460e-02
94 | #define GTE_C_SQRT_DEG8_C6 -1.2529377587270574e-02
95 | #define GTE_C_SQRT_DEG8_C7 +4.5382426960713929e-03
96 | #define GTE_C_SQRT_DEG8_C8 -7.8810995273670414e-04
97 | #define GTE_C_SQRT_DEG8_MAX_ERROR 3.9460605685825989e-9
98 |
99 | // Constants for minimax polynomial approximations to 1/sqrt(x).
100 | // The algorithm minimizes the maximum absolute error on [1,2].
101 | #define GTE_C_INVSQRT_DEG1_C0 +1.0
102 | #define GTE_C_INVSQRT_DEG1_C1 -2.9289321881345254e-01
103 | #define GTE_C_INVSQRT_DEG1_MAX_ERROR 3.7814314552701983e-2
104 |
105 | #define GTE_C_INVSQRT_DEG2_C0 +1.0
106 | #define GTE_C_INVSQRT_DEG2_C1 -4.4539812104566801e-01
107 | #define GTE_C_INVSQRT_DEG2_C2 +1.5250490223221547e-01
108 | #define GTE_C_INVSQRT_DEG2_MAX_ERROR 4.1953446330581234e-3
109 |
110 | #define GTE_C_INVSQRT_DEG3_C0 +1.0
111 | #define GTE_C_INVSQRT_DEG3_C1 -4.8703230993068791e-01
112 | #define GTE_C_INVSQRT_DEG3_C2 +2.8163710486669835e-01
113 | #define GTE_C_INVSQRT_DEG3_C3 -8.7498013749463421e-02
114 | #define GTE_C_INVSQRT_DEG3_MAX_ERROR 5.6307702007266786e-4
115 |
116 | #define GTE_C_INVSQRT_DEG4_C0 +1.0
117 | #define GTE_C_INVSQRT_DEG4_C1 -4.9710061558048779e-01
118 | #define GTE_C_INVSQRT_DEG4_C2 +3.4266247597676802e-01
119 | #define GTE_C_INVSQRT_DEG4_C3 -1.9106356536293490e-01
120 | #define GTE_C_INVSQRT_DEG4_C4 +5.2608486153198797e-02
121 | #define GTE_C_INVSQRT_DEG4_MAX_ERROR 8.1513919987605266e-5
122 |
123 | #define GTE_C_INVSQRT_DEG5_C0 +1.0
124 | #define GTE_C_INVSQRT_DEG5_C1 -4.9937760586004143e-01
125 | #define GTE_C_INVSQRT_DEG5_C2 +3.6508741295133973e-01
126 | #define GTE_C_INVSQRT_DEG5_C3 -2.5884890281853501e-01
127 | #define GTE_C_INVSQRT_DEG5_C4 +1.3275782221320753e-01
128 | #define GTE_C_INVSQRT_DEG5_C5 -3.2511945299404488e-02
129 | #define GTE_C_INVSQRT_DEG5_MAX_ERROR 1.2289367475583346e-5
130 |
131 | #define GTE_C_INVSQRT_DEG6_C0 +1.0
132 | #define GTE_C_INVSQRT_DEG6_C1 -4.9987029229547453e-01
133 | #define GTE_C_INVSQRT_DEG6_C2 +3.7220923604495226e-01
134 | #define GTE_C_INVSQRT_DEG6_C3 -2.9193067713256937e-01
135 | #define GTE_C_INVSQRT_DEG6_C4 +1.9937605991094642e-01
136 | #define GTE_C_INVSQRT_DEG6_C5 -9.3135712130901993e-02
137 | #define GTE_C_INVSQRT_DEG6_C6 +2.0458166789566690e-02
138 | #define GTE_C_INVSQRT_DEG6_MAX_ERROR 1.9001451223750465e-6
139 |
140 | #define GTE_C_INVSQRT_DEG7_C0 +1.0
141 | #define GTE_C_INVSQRT_DEG7_C1 -4.9997357250704977e-01
142 | #define GTE_C_INVSQRT_DEG7_C2 +3.7426216884998809e-01
143 | #define GTE_C_INVSQRT_DEG7_C3 -3.0539882498248971e-01
144 | #define GTE_C_INVSQRT_DEG7_C4 +2.3976005607005391e-01
145 | #define GTE_C_INVSQRT_DEG7_C5 -1.5410326351684489e-01
146 | #define GTE_C_INVSQRT_DEG7_C6 +6.5598809723041995e-02
147 | #define GTE_C_INVSQRT_DEG7_C7 -1.3038592450470787e-02
148 | #define GTE_C_INVSQRT_DEG7_MAX_ERROR 2.9887724993168940e-7
149 |
150 | #define GTE_C_INVSQRT_DEG8_C0 +1.0
151 | #define GTE_C_INVSQRT_DEG8_C1 -4.9999471066120371e-01
152 | #define GTE_C_INVSQRT_DEG8_C2 +3.7481415745794067e-01
153 | #define GTE_C_INVSQRT_DEG8_C3 -3.1023804387422160e-01
154 | #define GTE_C_INVSQRT_DEG8_C4 +2.5977002682930106e-01
155 | #define GTE_C_INVSQRT_DEG8_C5 -1.9818790717727097e-01
156 | #define GTE_C_INVSQRT_DEG8_C6 +1.1882414252613671e-01
157 | #define GTE_C_INVSQRT_DEG8_C7 -4.6270038088550791e-02
158 | #define GTE_C_INVSQRT_DEG8_C8 +8.3891541755747312e-03
159 | #define GTE_C_INVSQRT_DEG8_MAX_ERROR 4.7596926146947771e-8
160 |
161 | // Constants for minimax polynomial approximations to sin(x).
162 | // The algorithm minimizes the maximum absolute error on [-pi/2,pi/2].
163 | #define GTE_C_SIN_DEG3_C0 +1.0
164 | #define GTE_C_SIN_DEG3_C1 -1.4727245910375519e-01
165 | #define GTE_C_SIN_DEG3_MAX_ERROR 1.3481903639145865e-2
166 |
167 | #define GTE_C_SIN_DEG5_C0 +1.0
168 | #define GTE_C_SIN_DEG5_C1 -1.6600599923812209e-01
169 | #define GTE_C_SIN_DEG5_C2 +7.5924178409012000e-03
170 | #define GTE_C_SIN_DEG5_MAX_ERROR 1.4001209384639779e-4
171 |
172 | #define GTE_C_SIN_DEG7_C0 +1.0
173 | #define GTE_C_SIN_DEG7_C1 -1.6665578084732124e-01
174 | #define GTE_C_SIN_DEG7_C2 +8.3109378830028557e-03
175 | #define GTE_C_SIN_DEG7_C3 -1.8447486103462252e-04
176 | #define GTE_C_SIN_DEG7_MAX_ERROR 1.0205878936686563e-6
177 |
178 | #define GTE_C_SIN_DEG9_C0 +1.0
179 | #define GTE_C_SIN_DEG9_C1 -1.6666656235308897e-01
180 | #define GTE_C_SIN_DEG9_C2 +8.3329962509886002e-03
181 | #define GTE_C_SIN_DEG9_C3 -1.9805100675274190e-04
182 | #define GTE_C_SIN_DEG9_C4 +2.5967200279475300e-06
183 | #define GTE_C_SIN_DEG9_MAX_ERROR 5.2010746265374053e-9
184 |
185 | #define GTE_C_SIN_DEG11_C0 +1.0
186 | #define GTE_C_SIN_DEG11_C1 -1.6666666601721269e-01
187 | #define GTE_C_SIN_DEG11_C2 +8.3333303183525942e-03
188 | #define GTE_C_SIN_DEG11_C3 -1.9840782426250314e-04
189 | #define GTE_C_SIN_DEG11_C4 +2.7521557770526783e-06
190 | #define GTE_C_SIN_DEG11_C5 -2.3828544692960918e-08
191 | #define GTE_C_SIN_DEG11_MAX_ERROR 1.9295870457014530e-11
192 |
193 | // Constants for minimax polynomial approximations to cos(x).
194 | // The algorithm minimizes the maximum absolute error on [-pi/2,pi/2].
195 | #define GTE_C_COS_DEG2_C0 +1.0
196 | #define GTE_C_COS_DEG2_C1 -4.0528473456935105e-01
197 | #define GTE_C_COS_DEG2_MAX_ERROR 5.4870946878404048e-2
198 |
199 | #define GTE_C_COS_DEG4_C0 +1.0
200 | #define GTE_C_COS_DEG4_C1 -4.9607181958647262e-01
201 | #define GTE_C_COS_DEG4_C2 +3.6794619653489236e-02
202 | #define GTE_C_COS_DEG4_MAX_ERROR 9.1879932449712154e-4
203 |
204 | #define GTE_C_COS_DEG6_C0 +1.0
205 | #define GTE_C_COS_DEG6_C1 -4.9992746217057404e-01
206 | #define GTE_C_COS_DEG6_C2 +4.1493920348353308e-02
207 | #define GTE_C_COS_DEG6_C3 -1.2712435011987822e-03
208 | #define GTE_C_COS_DEG6_MAX_ERROR 9.2028470133065365e-6
209 |
210 | #define GTE_C_COS_DEG8_C0 +1.0
211 | #define GTE_C_COS_DEG8_C1 -4.9999925121358291e-01
212 | #define GTE_C_COS_DEG8_C2 +4.1663780117805693e-02
213 | #define GTE_C_COS_DEG8_C3 -1.3854239405310942e-03
214 | #define GTE_C_COS_DEG8_C4 +2.3154171575501259e-05
215 | #define GTE_C_COS_DEG8_MAX_ERROR 5.9804533020235695e-8
216 |
217 | #define GTE_C_COS_DEG10_C0 +1.0
218 | #define GTE_C_COS_DEG10_C1 -4.9999999508695869e-01
219 | #define GTE_C_COS_DEG10_C2 +4.1666638865338612e-02
220 | #define GTE_C_COS_DEG10_C3 -1.3888377661039897e-03
221 | #define GTE_C_COS_DEG10_C4 +2.4760495088926859e-05
222 | #define GTE_C_COS_DEG10_C5 -2.6051615464872668e-07
223 | #define GTE_C_COS_DEG10_MAX_ERROR 2.7006769043325107e-10
224 |
225 | // Constants for minimax polynomial approximations to tan(x).
226 | // The algorithm minimizes the maximum absolute error on [-pi/4,pi/4].
227 | #define GTE_C_TAN_DEG3_C0 1.0
228 | #define GTE_C_TAN_DEG3_C1 4.4295926544736286e-01
229 | #define GTE_C_TAN_DEG3_MAX_ERROR 1.1661892256204731e-2
230 |
231 | #define GTE_C_TAN_DEG5_C0 1.0
232 | #define GTE_C_TAN_DEG5_C1 3.1401320403542421e-01
233 | #define GTE_C_TAN_DEG5_C2 2.0903948109240345e-01
234 | #define GTE_C_TAN_DEG5_MAX_ERROR 5.8431854390143118e-4
235 |
236 | #define GTE_C_TAN_DEG7_C0 1.0
237 | #define GTE_C_TAN_DEG7_C1 3.3607213284422555e-01
238 | #define GTE_C_TAN_DEG7_C2 1.1261037305184907e-01
239 | #define GTE_C_TAN_DEG7_C3 9.8352099470524479e-02
240 | #define GTE_C_TAN_DEG7_MAX_ERROR 3.5418688397723108e-5
241 |
242 | #define GTE_C_TAN_DEG9_C0 1.0
243 | #define GTE_C_TAN_DEG9_C1 3.3299232843941784e-01
244 | #define GTE_C_TAN_DEG9_C2 1.3747843432474838e-01
245 | #define GTE_C_TAN_DEG9_C3 3.7696344813028304e-02
246 | #define GTE_C_TAN_DEG9_C4 4.6097377279281204e-02
247 | #define GTE_C_TAN_DEG9_MAX_ERROR 2.2988173242199927e-6
248 |
249 | #define GTE_C_TAN_DEG11_C0 1.0
250 | #define GTE_C_TAN_DEG11_C1 3.3337224456224224e-01
251 | #define GTE_C_TAN_DEG11_C2 1.3264516053824593e-01
252 | #define GTE_C_TAN_DEG11_C3 5.8145237645931047e-02
253 | #define GTE_C_TAN_DEG11_C4 1.0732193237572574e-02
254 | #define GTE_C_TAN_DEG11_C5 2.1558456793513869e-02
255 | #define GTE_C_TAN_DEG11_MAX_ERROR 1.5426257940140409e-7
256 |
257 | #define GTE_C_TAN_DEG13_C0 1.0
258 | #define GTE_C_TAN_DEG13_C1 3.3332916426394554e-01
259 | #define GTE_C_TAN_DEG13_C2 1.3343404625112498e-01
260 | #define GTE_C_TAN_DEG13_C3 5.3104565343119248e-02
261 | #define GTE_C_TAN_DEG13_C4 2.5355038312682154e-02
262 | #define GTE_C_TAN_DEG13_C5 1.8253255966556026e-03
263 | #define GTE_C_TAN_DEG13_C6 1.0069407176615641e-02
264 | #define GTE_C_TAN_DEG13_MAX_ERROR 1.0550264249037378e-8
265 |
266 | // Constants for minimax polynomial approximations to acos(x), where the
267 | // approximation is of the form acos(x) = sqrt(1 - x)*p(x) with p(x) a
268 | // polynomial. The algorithm minimizes the maximum error
269 | // |acos(x)/sqrt(1-x) - p(x)| on [0,1]. At the same time we get an
270 | // approximation for asin(x) = pi/2 - acos(x).
271 | #define GTE_C_ACOS_DEG1_C0 +1.5707963267948966
272 | #define GTE_C_ACOS_DEG1_C1 -1.5658276442180141e-01
273 | #define GTE_C_ACOS_DEG1_MAX_ERROR 1.1659002803738105e-2
274 |
275 | #define GTE_C_ACOS_DEG2_C0 +1.5707963267948966
276 | #define GTE_C_ACOS_DEG2_C1 -2.0347053865798365e-01
277 | #define GTE_C_ACOS_DEG2_C2 +4.6887774236182234e-02
278 | #define GTE_C_ACOS_DEG2_MAX_ERROR 9.0311602490029258e-4
279 |
280 | #define GTE_C_ACOS_DEG3_C0 +1.5707963267948966
281 | #define GTE_C_ACOS_DEG3_C1 -2.1253291899190285e-01
282 | #define GTE_C_ACOS_DEG3_C2 +7.4773789639484223e-02
283 | #define GTE_C_ACOS_DEG3_C3 -1.8823635069382449e-02
284 | #define GTE_C_ACOS_DEG3_MAX_ERROR 9.3066396954288172e-5
285 |
286 | #define GTE_C_ACOS_DEG4_C0 +1.5707963267948966
287 | #define GTE_C_ACOS_DEG4_C1 -2.1422258835275865e-01
288 | #define GTE_C_ACOS_DEG4_C2 +8.4936675142844198e-02
289 | #define GTE_C_ACOS_DEG4_C3 -3.5991475120957794e-02
290 | #define GTE_C_ACOS_DEG4_C4 +8.6946239090712751e-03
291 | #define GTE_C_ACOS_DEG4_MAX_ERROR 1.0930595804481413e-5
292 |
293 | #define GTE_C_ACOS_DEG5_C0 +1.5707963267948966
294 | #define GTE_C_ACOS_DEG5_C1 -2.1453292139805524e-01
295 | #define GTE_C_ACOS_DEG5_C2 +8.7973089282889383e-02
296 | #define GTE_C_ACOS_DEG5_C3 -4.5130266382166440e-02
297 | #define GTE_C_ACOS_DEG5_C4 +1.9467466687281387e-02
298 | #define GTE_C_ACOS_DEG5_C5 -4.3601326117634898e-03
299 | #define GTE_C_ACOS_DEG5_MAX_ERROR 1.3861070257241426-6
300 |
301 | #define GTE_C_ACOS_DEG6_C0 +1.5707963267948966
302 | #define GTE_C_ACOS_DEG6_C1 -2.1458939285677325e-01
303 | #define GTE_C_ACOS_DEG6_C2 +8.8784960563641491e-02
304 | #define GTE_C_ACOS_DEG6_C3 -4.8887131453156485e-02
305 | #define GTE_C_ACOS_DEG6_C4 +2.7011519960012720e-02
306 | #define GTE_C_ACOS_DEG6_C5 -1.1210537323478320e-02
307 | #define GTE_C_ACOS_DEG6_C6 +2.3078166879102469e-03
308 | #define GTE_C_ACOS_DEG6_MAX_ERROR 1.8491291330427484e-7
309 |
310 | #define GTE_C_ACOS_DEG7_C0 +1.5707963267948966
311 | #define GTE_C_ACOS_DEG7_C1 -2.1459960076929829e-01
312 | #define GTE_C_ACOS_DEG7_C2 +8.8986946573346160e-02
313 | #define GTE_C_ACOS_DEG7_C3 -5.0207843052845647e-02
314 | #define GTE_C_ACOS_DEG7_C4 +3.0961594977611639e-02
315 | #define GTE_C_ACOS_DEG7_C5 -1.7162031184398074e-02
316 | #define GTE_C_ACOS_DEG7_C6 +6.7072304676685235e-03
317 | #define GTE_C_ACOS_DEG7_C7 -1.2690614339589956e-03
318 | #define GTE_C_ACOS_DEG7_MAX_ERROR 2.5574620927948377e-8
319 |
320 | #define GTE_C_ACOS_DEG8_C0 +1.5707963267948966
321 | #define GTE_C_ACOS_DEG8_C1 -2.1460143648688035e-01
322 | #define GTE_C_ACOS_DEG8_C2 +8.9034700107934128e-02
323 | #define GTE_C_ACOS_DEG8_C3 -5.0625279962389413e-02
324 | #define GTE_C_ACOS_DEG8_C4 +3.2683762943179318e-02
325 | #define GTE_C_ACOS_DEG8_C5 -2.0949278766238422e-02
326 | #define GTE_C_ACOS_DEG8_C6 +1.1272900916992512e-02
327 | #define GTE_C_ACOS_DEG8_C7 -4.1160981058965262e-03
328 | #define GTE_C_ACOS_DEG8_C8 +7.1796493341480527e-04
329 | #define GTE_C_ACOS_DEG8_MAX_ERROR 3.6340015129032732e-9
330 |
331 | // Constants for minimax polynomial approximations to atan(x).
332 | // The algorithm minimizes the maximum absolute error on [-1,1].
333 | #define GTE_C_ATAN_DEG3_C0 +1.0
334 | #define GTE_C_ATAN_DEG3_C1 -2.1460183660255172e-01
335 | #define GTE_C_ATAN_DEG3_MAX_ERROR 1.5970326392614240e-2
336 |
337 | #define GTE_C_ATAN_DEG5_C0 +1.0
338 | #define GTE_C_ATAN_DEG5_C1 -3.0189478312144946e-01
339 | #define GTE_C_ATAN_DEG5_C2 +8.7292946518897740e-02
340 | #define GTE_C_ATAN_DEG5_MAX_ERROR 1.3509832247372636e-3
341 |
342 | #define GTE_C_ATAN_DEG7_C0 +1.0
343 | #define GTE_C_ATAN_DEG7_C1 -3.2570157599356531e-01
344 | #define GTE_C_ATAN_DEG7_C2 +1.5342994884206673e-01
345 | #define GTE_C_ATAN_DEG7_C3 -4.2330209451053591e-02
346 | #define GTE_C_ATAN_DEG7_MAX_ERROR 1.5051227215514412e-4
347 |
348 | #define GTE_C_ATAN_DEG9_C0 +1.0
349 | #define GTE_C_ATAN_DEG9_C1 -3.3157878236439586e-01
350 | #define GTE_C_ATAN_DEG9_C2 +1.8383034738018011e-01
351 | #define GTE_C_ATAN_DEG9_C3 -8.9253037587244677e-02
352 | #define GTE_C_ATAN_DEG9_C4 +2.2399635968909593e-02
353 | #define GTE_C_ATAN_DEG9_MAX_ERROR 1.8921598624582064e-5
354 |
355 | #define GTE_C_ATAN_DEG11_C0 +1.0
356 | #define GTE_C_ATAN_DEG11_C1 -3.3294527685374087e-01
357 | #define GTE_C_ATAN_DEG11_C2 +1.9498657165383548e-01
358 | #define GTE_C_ATAN_DEG11_C3 -1.1921576270475498e-01
359 | #define GTE_C_ATAN_DEG11_C4 +5.5063351366968050e-02
360 | #define GTE_C_ATAN_DEG11_C5 -1.2490720064867844e-02
361 | #define GTE_C_ATAN_DEG11_MAX_ERROR 2.5477724974187765e-6
362 |
363 | #define GTE_C_ATAN_DEG13_C0 +1.0
364 | #define GTE_C_ATAN_DEG13_C1 -3.3324998579202170e-01
365 | #define GTE_C_ATAN_DEG13_C2 +1.9856563505717162e-01
366 | #define GTE_C_ATAN_DEG13_C3 -1.3374657325451267e-01
367 | #define GTE_C_ATAN_DEG13_C4 +8.1675882859940430e-02
368 | #define GTE_C_ATAN_DEG13_C5 -3.5059680836411644e-02
369 | #define GTE_C_ATAN_DEG13_C6 +7.2128853633444123e-03
370 | #define GTE_C_ATAN_DEG13_MAX_ERROR 3.5859104691865484e-7
371 |
372 | // Constants for minimax polynomial approximations to exp2(x) = 2^x.
373 | // The algorithm minimizes the maximum absolute error on [0,1].
374 | #define GTE_C_EXP2_DEG1_C0 1.0
375 | #define GTE_C_EXP2_DEG1_C1 1.0
376 | #define GTE_C_EXP2_DEG1_MAX_ERROR 8.6071332055934313e-2
377 |
378 | #define GTE_C_EXP2_DEG2_C0 1.0
379 | #define GTE_C_EXP2_DEG2_C1 6.5571332605741528e-01
380 | #define GTE_C_EXP2_DEG2_C2 3.4428667394258472e-01
381 | #define GTE_C_EXP2_DEG2_MAX_ERROR 3.8132476831060358e-3
382 |
383 | #define GTE_C_EXP2_DEG3_C0 1.0
384 | #define GTE_C_EXP2_DEG3_C1 6.9589012084456225e-01
385 | #define GTE_C_EXP2_DEG3_C2 2.2486494900110188e-01
386 | #define GTE_C_EXP2_DEG3_C3 7.9244930154334980e-02
387 | #define GTE_C_EXP2_DEG3_MAX_ERROR 1.4694877755186408e-4
388 |
389 | #define GTE_C_EXP2_DEG4_C0 1.0
390 | #define GTE_C_EXP2_DEG4_C1 6.9300392358459195e-01
391 | #define GTE_C_EXP2_DEG4_C2 2.4154981722455560e-01
392 | #define GTE_C_EXP2_DEG4_C3 5.1744260331489045e-02
393 | #define GTE_C_EXP2_DEG4_C4 1.3701998859367848e-02
394 | #define GTE_C_EXP2_DEG4_MAX_ERROR 4.7617792624521371e-6
395 |
396 | #define GTE_C_EXP2_DEG5_C0 1.0
397 | #define GTE_C_EXP2_DEG5_C1 6.9315298010274962e-01
398 | #define GTE_C_EXP2_DEG5_C2 2.4014712313022102e-01
399 | #define GTE_C_EXP2_DEG5_C3 5.5855296413199085e-02
400 | #define GTE_C_EXP2_DEG5_C4 8.9477503096873079e-03
401 | #define GTE_C_EXP2_DEG5_C5 1.8968500441332026e-03
402 | #define GTE_C_EXP2_DEG5_MAX_ERROR 1.3162098333463490e-7
403 |
404 | #define GTE_C_EXP2_DEG6_C0 1.0
405 | #define GTE_C_EXP2_DEG6_C1 6.9314698914837525e-01
406 | #define GTE_C_EXP2_DEG6_C2 2.4023013440952923e-01
407 | #define GTE_C_EXP2_DEG6_C3 5.5481276898206033e-02
408 | #define GTE_C_EXP2_DEG6_C4 9.6838443037086108e-03
409 | #define GTE_C_EXP2_DEG6_C5 1.2388324048515642e-03
410 | #define GTE_C_EXP2_DEG6_C6 2.1892283501756538e-04
411 | #define GTE_C_EXP2_DEG6_MAX_ERROR 3.1589168225654163e-9
412 |
413 | #define GTE_C_EXP2_DEG7_C0 1.0
414 | #define GTE_C_EXP2_DEG7_C1 6.9314718588750690e-01
415 | #define GTE_C_EXP2_DEG7_C2 2.4022637363165700e-01
416 | #define GTE_C_EXP2_DEG7_C3 5.5505235570535660e-02
417 | #define GTE_C_EXP2_DEG7_C4 9.6136265387940512e-03
418 | #define GTE_C_EXP2_DEG7_C5 1.3429234504656051e-03
419 | #define GTE_C_EXP2_DEG7_C6 1.4299202757683815e-04
420 | #define GTE_C_EXP2_DEG7_C7 2.1662892777385423e-05
421 | #define GTE_C_EXP2_DEG7_MAX_ERROR 6.6864513925679603e-11
422 |
423 | // Constants for minimax polynomial approximations to log2(x).
424 | // The algorithm minimizes the maximum absolute error on [1,2].
425 | // The polynomials all have constant term zero.
426 | #define GTE_C_LOG2_DEG1_C1 +1.0
427 | #define GTE_C_LOG2_DEG1_MAX_ERROR 8.6071332055934202e-2
428 |
429 | #define GTE_C_LOG2_DEG2_C1 +1.3465553856377803
430 | #define GTE_C_LOG2_DEG2_C2 -3.4655538563778032e-01
431 | #define GTE_C_LOG2_DEG2_MAX_ERROR 7.6362868906658110e-3
432 |
433 | #define GTE_C_LOG2_DEG3_C1 +1.4228653756681227
434 | #define GTE_C_LOG2_DEG3_C2 -5.8208556916449616e-01
435 | #define GTE_C_LOG2_DEG3_C3 +1.5922019349637218e-01
436 | #define GTE_C_LOG2_DEG3_MAX_ERROR 8.7902902652883808e-4
437 |
438 | #define GTE_C_LOG2_DEG4_C1 +1.4387257478171547
439 | #define GTE_C_LOG2_DEG4_C2 -6.7778401359918661e-01
440 | #define GTE_C_LOG2_DEG4_C3 +3.2118898377713379e-01
441 | #define GTE_C_LOG2_DEG4_C4 -8.2130717995088531e-02
442 | #define GTE_C_LOG2_DEG4_MAX_ERROR 1.1318551355360418e-4
443 |
444 | #define GTE_C_LOG2_DEG5_C1 +1.4419170408633741
445 | #define GTE_C_LOG2_DEG5_C2 -7.0909645927612530e-01
446 | #define GTE_C_LOG2_DEG5_C3 +4.1560609399164150e-01
447 | #define GTE_C_LOG2_DEG5_C4 -1.9357573729558908e-01
448 | #define GTE_C_LOG2_DEG5_C5 +4.5149061716699634e-02
449 | #define GTE_C_LOG2_DEG5_MAX_ERROR 1.5521274478735858e-5
450 |
451 | #define GTE_C_LOG2_DEG6_C1 +1.4425449435950917
452 | #define GTE_C_LOG2_DEG6_C2 -7.1814525675038965e-01
453 | #define GTE_C_LOG2_DEG6_C3 +4.5754919692564044e-01
454 | #define GTE_C_LOG2_DEG6_C4 -2.7790534462849337e-01
455 | #define GTE_C_LOG2_DEG6_C5 +1.2179791068763279e-01
456 | #define GTE_C_LOG2_DEG6_C6 -2.5841449829670182e-02
457 | #define GTE_C_LOG2_DEG6_MAX_ERROR 2.2162051216689793e-6
458 |
459 | #define GTE_C_LOG2_DEG7_C1 +1.4426664401536078
460 | #define GTE_C_LOG2_DEG7_C2 -7.2055423726162360e-01
461 | #define GTE_C_LOG2_DEG7_C3 +4.7332419162501083e-01
462 | #define GTE_C_LOG2_DEG7_C4 -3.2514018752954144e-01
463 | #define GTE_C_LOG2_DEG7_C5 +1.9302965529095673e-01
464 | #define GTE_C_LOG2_DEG7_C6 -7.8534970641157997e-02
465 | #define GTE_C_LOG2_DEG7_C7 +1.5209108363023915e-02
466 | #define GTE_C_LOG2_DEG7_MAX_ERROR 3.2546531700261561e-7
467 |
468 | #define GTE_C_LOG2_DEG8_C1 +1.4426896453621882
469 | #define GTE_C_LOG2_DEG8_C2 -7.2115893912535967e-01
470 | #define GTE_C_LOG2_DEG8_C3 +4.7861716616785088e-01
471 | #define GTE_C_LOG2_DEG8_C4 -3.4699935395019565e-01
472 | #define GTE_C_LOG2_DEG8_C5 +2.4114048765477492e-01
473 | #define GTE_C_LOG2_DEG8_C6 -1.3657398692885181e-01
474 | #define GTE_C_LOG2_DEG8_C7 +5.1421382871922106e-02
475 | #define GTE_C_LOG2_DEG8_C8 -9.1364020499895560e-03
476 | #define GTE_C_LOG2_DEG8_MAX_ERROR 4.8796219218050219e-8
477 |
--------------------------------------------------------------------------------