├── .appveyor.yml ├── .travis.yml ├── README.md ├── example ├── addons.make └── src │ ├── main.cpp │ ├── ofApp.cpp │ └── ofApp.h ├── libs └── half │ └── include │ └── half.hpp └── src ├── ofxTextureRecorder.cpp └── ofxTextureRecorder.h /.appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 1.0.{build} 2 | os: Visual Studio 2015 RC 3 | 4 | environment: 5 | global: 6 | APPVEYOR_OS_NAME: windows 7 | matrix: 8 | #MSYS2 Building 9 | - platform: x86 10 | BUILDER: MSYS2 11 | 12 | #VisualStudio Building 13 | - platform: x86 14 | BUILDER : VS 15 | BITS: 32 16 | - platform: x64 17 | BUILDER : VS 18 | BITS: 64 19 | 20 | configuration: Debug 21 | shallow_clone: true 22 | clone_depth: 10 23 | init: 24 | - set MSYS2_PATH=c:\msys64 25 | - set CHERE_INVOKING=1 26 | - if "%BUILDER%_%PLATFORM%"=="MSYS2_x86" set MSYSTEM=MINGW32 27 | - if "%BUILDER%_%PLATFORM%"=="MSYS2_x64" set MSYSTEM=MINGW64 28 | - if "%BUILDER%"=="VS" set PATH=C:\Program Files (x86)\MSBuild\14.0\Bin;%PATH% 29 | 30 | install: 31 | - cd .. 32 | - git clone --depth=1 --branch=master https://github.com/openframeworks/openFrameworks 33 | - call openFrameworks\scripts\ci\addons\install.cmd 34 | 35 | build_script: 36 | - cd %OF_PATH% 37 | - scripts\ci\addons\build.cmd 38 | 39 | 40 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This file allows testing your addon using travis CI servers to use it you'll need to 2 | # create an account in travis.org and enable your addon there. 3 | # 4 | # By default it will test linux 64bit and osx against the master and stable OF branches. 5 | # Other platforms can be enabled by uncommenting the corresponding sections. 6 | # 7 | # If any extra install is needed to use the addon it can be included in the corresponding 8 | # install script in: 9 | # 10 | # scripts/ci/$TARGET/install.sh 11 | # 12 | 13 | 14 | language: c++ 15 | compiler: gcc 16 | sudo: true 17 | matrix: 18 | include: 19 | # fully specify builds, include can't dynamically expand matrix entries 20 | # relative order of sudo and env is important so that addons: is recognized 21 | 22 | # Linux 64bit, OF master 23 | - os: linux 24 | dist: trusty 25 | sudo: required 26 | env: TARGET="linux64" OF_BRANCH="master" 27 | 28 | # Linux 64bit, OF stable: Not supported yet 29 | # - os: linux 30 | # dist: trusty 31 | # sudo: required 32 | # env: TARGET="linux64" OF_BRANCH="stable" 33 | 34 | # OSX, OF master 35 | - os: osx 36 | osx_image: xcode8 37 | compiler: clang 38 | env: TARGET="osx" OF_BRANCH="master" 39 | 40 | # OSX, OF stable: Not supported yet 41 | # - os: osx 42 | # osx_image: xcode8 43 | # compiler: clang 44 | # env: TARGET="osx" OF_BRANCH="stable" 45 | 46 | # Linux ARM6, OF master: Uncomment following lines to enable 47 | # - os: linux 48 | # sudo: required 49 | # dist: trusty 50 | # env: TARGET="linuxarmv6l" OF_BRANCH="master" 51 | 52 | 53 | # Linux ARM6, OF stable: Not supported yet 54 | # - os: linux 55 | # sudo: required 56 | # dist: trusty 57 | # env: TARGET="linuxarmv6l" OF_BRANCH="stable" 58 | 59 | # Linux ARM7, OF master: Uncomment following lines to enable 60 | # - os: linux 61 | # sudo: false 62 | # env: TARGET="linuxarmv7l" OF_BRANCH="master" 63 | # cache: 64 | # directories: 65 | # - ~/rpi2_toolchain 66 | # - ~/firmware-master 67 | # - ~/archlinux 68 | 69 | # Linux ARM7, OF stable: Not supported yet 70 | # - os: linux 71 | # sudo: false 72 | # env: TARGET="linuxarmv7l" OF_BRANCH="stable" 73 | # cache: 74 | # directories: 75 | # - ~/rpi2_toolchain 76 | # - ~/firmware-master 77 | # - ~/archlinux 78 | 79 | 80 | # Emscripten, OF master: Uncomment following lines to enable 81 | # - os: linux 82 | # sudo: false 83 | # env: TARGET="emscripten" OF_BRANCH="master" 84 | # addons: 85 | # apt: 86 | # sources: 87 | # - ubuntu-toolchain-r-test 88 | # packages: 89 | # - libstdc++6 90 | 91 | 92 | # Emscripten, OF stable: Not supported yet 93 | # - os: linux 94 | # sudo: false 95 | # env: TARGET="emscripten" OF_BRANCH="stable" 96 | # addons: 97 | # apt: 98 | # sources: 99 | # - ubuntu-toolchain-r-test 100 | # packages: 101 | # - libstdc++6 102 | 103 | 104 | # iOS, OF master: Not supported yet 105 | # - os: osx 106 | # osx_image: xcode8 107 | # compiler: clang 108 | # env: TARGET="ios" OF_BRANCH="master" 109 | 110 | 111 | # iOS, OF stable: Not supported yet 112 | # - os: osx 113 | # osx_image: xcode8 114 | # compiler: clang 115 | # env: TARGET="ios" OF_BRANCH="stable" 116 | 117 | 118 | # tvOS, OF master: Not supported yet 119 | # - os: osx 120 | # osx_image: xcode8 121 | # compiler: clang 122 | # env: TARGET="tvos" OF_BRANCH="master" 123 | 124 | 125 | # tvOS, OF stable: Not supported yet 126 | # - os: osx 127 | # osx_image: xcode8 128 | # compiler: clang 129 | # env: TARGET="tvos" OF_BRANCH="stable" 130 | 131 | 132 | # Android armv7, OF master: Uncomment following lines to enable 133 | # - os: linux 134 | # sudo: false 135 | # env: TARGET="android" OPT="armv7" OF_BRANCH="master" 136 | # cache: 137 | # directories: 138 | # - ~/android-ndk-r12b 139 | 140 | 141 | # Android armv7, OF stable: Not supported yet 142 | # - os: linux 143 | # sudo: false 144 | # env: TARGET="android" OPT="armv7" OF_BRANCH="stable" 145 | # cache: 146 | # directories: 147 | # - ~/android-ndk-r12b 148 | 149 | 150 | # Android x86, OF master: Uncomment following lines to enable 151 | # - os: linux 152 | # sudo: false 153 | # env: TARGET="android" OPT="x86" OF_BRANCH="master" 154 | # cache: 155 | # directories: 156 | # - ~/android-ndk-r12b 157 | 158 | 159 | # Android x86, OF stable: Not supported yet 160 | # - os: linux 161 | # sudo: false 162 | # env: TARGET="android" OPT="x86" OF_BRANCH="stable" 163 | # cache: 164 | # directories: 165 | # - ~/android-ndk-r12b 166 | 167 | 168 | # Exclude the default build that would otherwise be generated 169 | # see https://github.com/travis-ci/travis-ci/issues/1228 170 | exclude: 171 | - compiler: gcc 172 | 173 | install: 174 | - cd ~ 175 | - git clone --depth=1 --branch=$OF_BRANCH https://github.com/openframeworks/openFrameworks 176 | - cd openFrameworks 177 | - scripts/ci/addons/install.sh 178 | 179 | script: 180 | - scripts/ci/addons/build.sh 181 | 182 | git: 183 | depth: 10 184 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ofxTextureRecorder 2 | 3 | [![Build status linux / osx](https://travis-ci.org/arturoc/ofxTextureRecorder.svg?branch=master)](https://travis-ci.org/arturoc/ofxTextureRecorder.svg?branch=master) 4 | [![Build status windows](https://ci.appveyor.com/api/projects/status/7p3nnfjb6d1xlnni/branch/master?svg=true)](https://ci.appveyor.com/project/arturoc/ofxtexturerecorder/branch/master) 5 | 6 | Fast recording of textures to disk. You can draw to an fbo to record the full screen. 7 | 8 | It uses a PBO to download from the graphics card without blocking the main thread and several threads for encoding as fast as possible to the chosen format. 9 | 10 | The number of threads is the hardware concurrency value (usually number of cores * 2) - 2 so there's at 2 cores free for other tasks. 11 | -------------------------------------------------------------------------------- /example/addons.make: -------------------------------------------------------------------------------- 1 | ofxTextureRecorder 2 | -------------------------------------------------------------------------------- /example/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "ofMain.h" 2 | #include "ofApp.h" 3 | 4 | //======================================================================== 5 | int main( ){ 6 | ofSetupOpenGL(1024,768,OF_WINDOW); // <-------- setup the GL context 7 | 8 | // this kicks off the running of my app 9 | // can be OF_WINDOW or OF_FULLSCREEN 10 | // pass in width and height too: 11 | ofRunApp(new ofApp()); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /example/src/ofApp.cpp: -------------------------------------------------------------------------------- 1 | #include "ofApp.h" 2 | 3 | //-------------------------------------------------------------- 4 | void ofApp::setup(){ 5 | fbo.allocate(ofGetWidth(), ofGetHeight(), GL_RGB); 6 | 7 | ofxTextureRecorder::Settings settings(fbo.getTexture()); 8 | settings.imageFormat = OF_IMAGE_FORMAT_JPEG; 9 | settings.numThreads = 12; 10 | settings.maxMemoryUsage = 9000000000; 11 | recorder.setup(settings); 12 | } 13 | 14 | //-------------------------------------------------------------- 15 | void ofApp::update(){ 16 | fbo.begin(); 17 | ofClear(0,255); 18 | ofDrawCircle(ofGetFrameNum(), ofGetWidth()/2, 50); 19 | fbo.end(); 20 | if(ofGetFrameNum()>0){ 21 | recorder.save(fbo.getTexture()); 22 | } 23 | if(ofGetFrameNum() > ofGetWidth() + 50){ 24 | ofExit(0); 25 | } 26 | } 27 | 28 | //-------------------------------------------------------------- 29 | void ofApp::draw(){ 30 | fbo.draw(0,0); 31 | 32 | if(ofGetFrameNum()%60==0){ 33 | cout << ofGetFrameRate() << endl; 34 | cout << "texture copy: " << recorder.getAvgTimeTextureCopy() << endl; 35 | cout << "gpu download: " << recorder.getAvgTimeGpuDownload() << endl; 36 | cout << "image encoding: " << recorder.getAvgTimeEncode() << endl; 37 | cout << "file save: " << recorder.getAvgTimeSave() << endl; 38 | } 39 | 40 | } 41 | 42 | //-------------------------------------------------------------- 43 | void ofApp::keyPressed(int key){ 44 | 45 | } 46 | 47 | //-------------------------------------------------------------- 48 | void ofApp::keyReleased(int key){ 49 | 50 | } 51 | 52 | //-------------------------------------------------------------- 53 | void ofApp::mouseMoved(int x, int y ){ 54 | 55 | } 56 | 57 | //-------------------------------------------------------------- 58 | void ofApp::mouseDragged(int x, int y, int button){ 59 | 60 | } 61 | 62 | //-------------------------------------------------------------- 63 | void ofApp::mousePressed(int x, int y, int button){ 64 | 65 | } 66 | 67 | //-------------------------------------------------------------- 68 | void ofApp::mouseReleased(int x, int y, int button){ 69 | 70 | } 71 | 72 | //-------------------------------------------------------------- 73 | void ofApp::mouseEntered(int x, int y){ 74 | 75 | } 76 | 77 | //-------------------------------------------------------------- 78 | void ofApp::mouseExited(int x, int y){ 79 | 80 | } 81 | 82 | //-------------------------------------------------------------- 83 | void ofApp::windowResized(int w, int h){ 84 | 85 | } 86 | 87 | //-------------------------------------------------------------- 88 | void ofApp::gotMessage(ofMessage msg){ 89 | 90 | } 91 | 92 | //-------------------------------------------------------------- 93 | void ofApp::dragEvent(ofDragInfo dragInfo){ 94 | 95 | } 96 | -------------------------------------------------------------------------------- /example/src/ofApp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ofMain.h" 4 | #include "ofxTextureRecorder.h" 5 | 6 | class ofApp : public ofBaseApp{ 7 | 8 | public: 9 | void setup(); 10 | void update(); 11 | void draw(); 12 | 13 | void keyPressed(int key); 14 | void keyReleased(int key); 15 | void mouseMoved(int x, int y ); 16 | void mouseDragged(int x, int y, int button); 17 | void mousePressed(int x, int y, int button); 18 | void mouseReleased(int x, int y, int button); 19 | void mouseEntered(int x, int y); 20 | void mouseExited(int x, int y); 21 | void windowResized(int w, int h); 22 | void dragEvent(ofDragInfo dragInfo); 23 | void gotMessage(ofMessage msg); 24 | 25 | ofxTextureRecorder recorder; 26 | ofFbo fbo; 27 | }; 28 | -------------------------------------------------------------------------------- /libs/half/include/half.hpp: -------------------------------------------------------------------------------- 1 | // half - IEEE 754-based half-precision floating point library. 2 | // 3 | // Copyright (c) 2012-2017 Christian Rau 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation 6 | // files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 7 | // modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the 8 | // Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 11 | // 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 13 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 15 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 16 | 17 | // Version 1.12.0 18 | 19 | /// \file 20 | /// Main header file for half precision functionality. 21 | 22 | #ifndef HALF_HALF_HPP 23 | #define HALF_HALF_HPP 24 | 25 | /// Combined gcc version number. 26 | #define HALF_GNUC_VERSION (__GNUC__*100+__GNUC_MINOR__) 27 | 28 | //check C++11 language features 29 | #if defined(__clang__) //clang 30 | #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) 31 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 32 | #endif 33 | #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) 34 | #define HALF_ENABLE_CPP11_CONSTEXPR 1 35 | #endif 36 | #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) 37 | #define HALF_ENABLE_CPP11_NOEXCEPT 1 38 | #endif 39 | #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) 40 | #define HALF_ENABLE_CPP11_USER_LITERALS 1 41 | #endif 42 | #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) 43 | #define HALF_ENABLE_CPP11_LONG_LONG 1 44 | #endif 45 | /*#elif defined(__INTEL_COMPILER) //Intel C++ 46 | #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? 47 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 48 | #endif 49 | #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? 50 | #define HALF_ENABLE_CPP11_CONSTEXPR 1 51 | #endif 52 | #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? 53 | #define HALF_ENABLE_CPP11_NOEXCEPT 1 54 | #endif 55 | #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? 56 | #define HALF_ENABLE_CPP11_LONG_LONG 1 57 | #endif*/ 58 | #elif defined(__GNUC__) //gcc 59 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L 60 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) 61 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 62 | #endif 63 | #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) 64 | #define HALF_ENABLE_CPP11_CONSTEXPR 1 65 | #endif 66 | #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) 67 | #define HALF_ENABLE_CPP11_NOEXCEPT 1 68 | #endif 69 | #if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) 70 | #define HALF_ENABLE_CPP11_USER_LITERALS 1 71 | #endif 72 | #if !defined(HALF_ENABLE_CPP11_LONG_LONG) 73 | #define HALF_ENABLE_CPP11_LONG_LONG 1 74 | #endif 75 | #endif 76 | #elif defined(_MSC_VER) //Visual C++ 77 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) 78 | #define HALF_ENABLE_CPP11_CONSTEXPR 1 79 | #endif 80 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) 81 | #define HALF_ENABLE_CPP11_NOEXCEPT 1 82 | #endif 83 | #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) 84 | #define HALF_ENABLE_CPP11_USER_LITERALS 1 85 | #endif 86 | #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) 87 | #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 88 | #endif 89 | #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) 90 | #define HALF_ENABLE_CPP11_LONG_LONG 1 91 | #endif 92 | #define HALF_POP_WARNINGS 1 93 | #pragma warning(push) 94 | #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned 95 | #endif 96 | 97 | //check C++11 library features 98 | #include 99 | #if defined(_LIBCPP_VERSION) //libc++ 100 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 101 | #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS 102 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 103 | #endif 104 | #ifndef HALF_ENABLE_CPP11_CSTDINT 105 | #define HALF_ENABLE_CPP11_CSTDINT 1 106 | #endif 107 | #ifndef HALF_ENABLE_CPP11_CMATH 108 | #define HALF_ENABLE_CPP11_CMATH 1 109 | #endif 110 | #ifndef HALF_ENABLE_CPP11_HASH 111 | #define HALF_ENABLE_CPP11_HASH 1 112 | #endif 113 | #endif 114 | #elif defined(__GLIBCXX__) //libstdc++ 115 | #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 116 | #ifdef __clang__ 117 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) 118 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 119 | #endif 120 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) 121 | #define HALF_ENABLE_CPP11_CSTDINT 1 122 | #endif 123 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) 124 | #define HALF_ENABLE_CPP11_CMATH 1 125 | #endif 126 | #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) 127 | #define HALF_ENABLE_CPP11_HASH 1 128 | #endif 129 | #else 130 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) 131 | #define HALF_ENABLE_CPP11_CSTDINT 1 132 | #endif 133 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) 134 | #define HALF_ENABLE_CPP11_CMATH 1 135 | #endif 136 | #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) 137 | #define HALF_ENABLE_CPP11_HASH 1 138 | #endif 139 | #endif 140 | #endif 141 | #elif defined(_CPPLIB_VER) //Dinkumware/Visual C++ 142 | #if _CPPLIB_VER >= 520 143 | #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS 144 | #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 145 | #endif 146 | #ifndef HALF_ENABLE_CPP11_CSTDINT 147 | #define HALF_ENABLE_CPP11_CSTDINT 1 148 | #endif 149 | #ifndef HALF_ENABLE_CPP11_HASH 150 | #define HALF_ENABLE_CPP11_HASH 1 151 | #endif 152 | #endif 153 | #if _CPPLIB_VER >= 610 154 | #ifndef HALF_ENABLE_CPP11_CMATH 155 | #define HALF_ENABLE_CPP11_CMATH 1 156 | #endif 157 | #endif 158 | #endif 159 | #undef HALF_GNUC_VERSION 160 | 161 | //support constexpr 162 | #if HALF_ENABLE_CPP11_CONSTEXPR 163 | #define HALF_CONSTEXPR constexpr 164 | #define HALF_CONSTEXPR_CONST constexpr 165 | #else 166 | #define HALF_CONSTEXPR 167 | #define HALF_CONSTEXPR_CONST const 168 | #endif 169 | 170 | //support noexcept 171 | #if HALF_ENABLE_CPP11_NOEXCEPT 172 | #define HALF_NOEXCEPT noexcept 173 | #define HALF_NOTHROW noexcept 174 | #else 175 | #define HALF_NOEXCEPT 176 | #define HALF_NOTHROW throw() 177 | #endif 178 | 179 | #include 180 | #include 181 | #include 182 | #include 183 | #include 184 | #include 185 | #if HALF_ENABLE_CPP11_TYPE_TRAITS 186 | #include 187 | #endif 188 | #if HALF_ENABLE_CPP11_CSTDINT 189 | #include 190 | #endif 191 | #if HALF_ENABLE_CPP11_HASH 192 | #include 193 | #endif 194 | 195 | 196 | /// Default rounding mode. 197 | /// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and `float`s as well as 198 | /// for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including half.hpp) to one 199 | /// of the standard rounding modes using their respective constants or the equivalent values of `std::float_round_style`: 200 | /// 201 | /// `std::float_round_style` | value | rounding 202 | /// ---------------------------------|-------|------------------------- 203 | /// `std::round_indeterminate` | -1 | fastest (default) 204 | /// `std::round_toward_zero` | 0 | toward zero 205 | /// `std::round_to_nearest` | 1 | to nearest 206 | /// `std::round_toward_infinity` | 2 | toward positive infinity 207 | /// `std::round_toward_neg_infinity` | 3 | toward negative infinity 208 | /// 209 | /// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with overflows 210 | /// set to infinity) and is the fastest rounding mode possible. It can even be set to `std::numeric_limits::round_style` 211 | /// to synchronize the rounding mode with that of the underlying single-precision implementation. 212 | #ifndef HALF_ROUND_STYLE 213 | #define HALF_ROUND_STYLE -1 // = std::round_indeterminate 214 | #endif 215 | 216 | /// Tie-breaking behaviour for round to nearest. 217 | /// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this is 218 | /// defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way cases (and 219 | /// thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more IEEE-conformant 220 | /// behaviour is needed. 221 | #ifndef HALF_ROUND_TIES_TO_EVEN 222 | #define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero 223 | #endif 224 | 225 | /// Value signaling overflow. 226 | /// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an 227 | /// operation, in particular it just evaluates to positive infinity. 228 | #define HUGE_VALH std::numeric_limits::infinity() 229 | 230 | /// Fast half-precision fma function. 231 | /// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate 232 | /// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all 233 | /// arithmetic operations, this is in fact always the case. 234 | #define FP_FAST_FMAH 1 235 | 236 | #ifndef FP_ILOGB0 237 | #define FP_ILOGB0 INT_MIN 238 | #endif 239 | #ifndef FP_ILOGBNAN 240 | #define FP_ILOGBNAN INT_MAX 241 | #endif 242 | #ifndef FP_SUBNORMAL 243 | #define FP_SUBNORMAL 0 244 | #endif 245 | #ifndef FP_ZERO 246 | #define FP_ZERO 1 247 | #endif 248 | #ifndef FP_NAN 249 | #define FP_NAN 2 250 | #endif 251 | #ifndef FP_INFINITE 252 | #define FP_INFINITE 3 253 | #endif 254 | #ifndef FP_NORMAL 255 | #define FP_NORMAL 4 256 | #endif 257 | 258 | 259 | /// Main namespace for half precision functionality. 260 | /// This namespace contains all the functionality provided by the library. 261 | namespace half_float 262 | { 263 | class half; 264 | 265 | #if HALF_ENABLE_CPP11_USER_LITERALS 266 | /// Library-defined half-precision literals. 267 | /// Import this namespace to enable half-precision floating point literals: 268 | /// ~~~~{.cpp} 269 | /// using namespace half_float::literal; 270 | /// half_float::half = 4.2_h; 271 | /// ~~~~ 272 | namespace literal 273 | { 274 | half operator""_h(long double); 275 | } 276 | #endif 277 | 278 | /// \internal 279 | /// \brief Implementation details. 280 | namespace detail 281 | { 282 | #if HALF_ENABLE_CPP11_TYPE_TRAITS 283 | /// Conditional type. 284 | template struct conditional : std::conditional {}; 285 | 286 | /// Helper for tag dispatching. 287 | template struct bool_type : std::integral_constant {}; 288 | using std::true_type; 289 | using std::false_type; 290 | 291 | /// Type traits for floating point types. 292 | template struct is_float : std::is_floating_point {}; 293 | #else 294 | /// Conditional type. 295 | template struct conditional { typedef T type; }; 296 | template struct conditional { typedef F type; }; 297 | 298 | /// Helper for tag dispatching. 299 | template struct bool_type {}; 300 | typedef bool_type true_type; 301 | typedef bool_type false_type; 302 | 303 | /// Type traits for floating point types. 304 | template struct is_float : false_type {}; 305 | template struct is_float : is_float {}; 306 | template struct is_float : is_float {}; 307 | template struct is_float : is_float {}; 308 | template<> struct is_float : true_type {}; 309 | template<> struct is_float : true_type {}; 310 | template<> struct is_float : true_type {}; 311 | #endif 312 | 313 | /// Type traits for floating point bits. 314 | template struct bits { typedef unsigned char type; }; 315 | template struct bits : bits {}; 316 | template struct bits : bits {}; 317 | template struct bits : bits {}; 318 | 319 | #if HALF_ENABLE_CPP11_CSTDINT 320 | /// Unsigned integer of (at least) 16 bits width. 321 | typedef std::uint_least16_t uint16; 322 | 323 | /// Unsigned integer of (at least) 32 bits width. 324 | template<> struct bits { typedef std::uint_least32_t type; }; 325 | 326 | /// Unsigned integer of (at least) 64 bits width. 327 | template<> struct bits { typedef std::uint_least64_t type; }; 328 | #else 329 | /// Unsigned integer of (at least) 16 bits width. 330 | typedef unsigned short uint16; 331 | 332 | /// Unsigned integer of (at least) 32 bits width. 333 | template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; 334 | 335 | #if HALF_ENABLE_CPP11_LONG_LONG 336 | /// Unsigned integer of (at least) 64 bits width. 337 | template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; 338 | #else 339 | /// Unsigned integer of (at least) 64 bits width. 340 | template<> struct bits { typedef unsigned long type; }; 341 | #endif 342 | #endif 343 | 344 | /// Tag type for binary construction. 345 | struct binary_t {}; 346 | 347 | /// Tag for binary construction. 348 | HALF_CONSTEXPR_CONST binary_t binary = binary_t(); 349 | 350 | /// Temporary half-precision expression. 351 | /// This class represents a half-precision expression which just stores a single-precision value internally. 352 | struct expr 353 | { 354 | /// Conversion constructor. 355 | /// \param f single-precision value to convert 356 | explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} 357 | 358 | /// Conversion to single-precision. 359 | /// \return single precision value representing expression value 360 | HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; } 361 | 362 | private: 363 | /// Internal expression value stored in single-precision. 364 | float value_; 365 | }; 366 | 367 | /// SFINAE helper for generic half-precision functions. 368 | /// This class template has to be specialized for each valid combination of argument types to provide a corresponding 369 | /// `type` member equivalent to \a T. 370 | /// \tparam T type to return 371 | template struct enable {}; 372 | template struct enable { typedef T type; }; 373 | template struct enable { typedef T type; }; 374 | template struct enable { typedef T type; }; 375 | template struct enable { typedef T type; }; 376 | template struct enable { typedef T type; }; 377 | template struct enable { typedef T type; }; 378 | template struct enable { typedef T type; }; 379 | template struct enable { typedef T type; }; 380 | template struct enable { typedef T type; }; 381 | template struct enable { typedef T type; }; 382 | template struct enable { typedef T type; }; 383 | template struct enable { typedef T type; }; 384 | template struct enable { typedef T type; }; 385 | template struct enable { typedef T type; }; 386 | 387 | /// Return type for specialized generic 2-argument half-precision functions. 388 | /// This class template has to be specialized for each valid combination of argument types to provide a corresponding 389 | /// `type` member denoting the appropriate return type. 390 | /// \tparam T first argument type 391 | /// \tparam U first argument type 392 | template struct result : enable {}; 393 | template<> struct result { typedef half type; }; 394 | 395 | /// \name Classification helpers 396 | /// \{ 397 | 398 | /// Check for infinity. 399 | /// \tparam T argument type (builtin floating point type) 400 | /// \param arg value to query 401 | /// \retval true if infinity 402 | /// \retval false else 403 | template bool builtin_isinf(T arg) 404 | { 405 | #if HALF_ENABLE_CPP11_CMATH 406 | return std::isinf(arg); 407 | #elif defined(_MSC_VER) 408 | return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); 409 | #else 410 | return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); 411 | #endif 412 | } 413 | 414 | /// Check for NaN. 415 | /// \tparam T argument type (builtin floating point type) 416 | /// \param arg value to query 417 | /// \retval true if not a number 418 | /// \retval false else 419 | template bool builtin_isnan(T arg) 420 | { 421 | #if HALF_ENABLE_CPP11_CMATH 422 | return std::isnan(arg); 423 | #elif defined(_MSC_VER) 424 | return ::_isnan(static_cast(arg)) != 0; 425 | #else 426 | return arg != arg; 427 | #endif 428 | } 429 | 430 | /// Check sign. 431 | /// \tparam T argument type (builtin floating point type) 432 | /// \param arg value to query 433 | /// \retval true if signbit set 434 | /// \retval false else 435 | template bool builtin_signbit(T arg) 436 | { 437 | #if HALF_ENABLE_CPP11_CMATH 438 | return std::signbit(arg); 439 | #else 440 | return arg < T() || (arg == T() && T(1)/arg < T()); 441 | #endif 442 | } 443 | 444 | /// \} 445 | /// \name Conversion 446 | /// \{ 447 | 448 | /// Convert IEEE single-precision to half-precision. 449 | /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). 450 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 451 | /// \param value single-precision value 452 | /// \return binary representation of half-precision value 453 | template uint16 float2half_impl(float value, true_type) 454 | { 455 | typedef bits::type uint32; 456 | uint32 bits;// = *reinterpret_cast(&value); //violating strict aliasing! 457 | std::memcpy(&bits, &value, sizeof(float)); 458 | /* uint16 hbits = (bits>>16) & 0x8000; 459 | bits &= 0x7FFFFFFF; 460 | int exp = bits >> 23; 461 | if(exp == 255) 462 | return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); 463 | if(exp > 142) 464 | { 465 | if(R == std::round_toward_infinity) 466 | return hbits | 0x7C00 - (hbits>>15); 467 | if(R == std::round_toward_neg_infinity) 468 | return hbits | 0x7BFF + (hbits>>15); 469 | return hbits | 0x7BFF + (R!=std::round_toward_zero); 470 | } 471 | int g, s; 472 | if(exp > 112) 473 | { 474 | g = (bits>>12) & 1; 475 | s = (bits&0xFFF) != 0; 476 | hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); 477 | } 478 | else if(exp > 101) 479 | { 480 | int i = 125 - exp; 481 | bits = (bits&0x7FFFFF) | 0x800000; 482 | g = (bits>>i) & 1; 483 | s = (bits&((1L<> (i+1); 485 | } 486 | else 487 | { 488 | g = 0; 489 | s = bits != 0; 490 | } 491 | if(R == std::round_to_nearest) 492 | #if HALF_ROUND_TIES_TO_EVEN 493 | hbits += g & (s|hbits); 494 | #else 495 | hbits += g; 496 | #endif 497 | else if(R == std::round_toward_infinity) 498 | hbits += ~(hbits>>15) & (s|g); 499 | else if(R == std::round_toward_neg_infinity) 500 | hbits += (hbits>>15) & (g|s); 501 | */ static const uint16 base_table[512] = { 502 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 503 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 504 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 505 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 506 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 507 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 508 | 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 509 | 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 510 | 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 511 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 512 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 513 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 514 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 515 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 516 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 517 | 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 518 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 519 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 520 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 521 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 522 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 523 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 524 | 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 525 | 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 526 | 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 527 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 528 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 529 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 530 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 531 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 532 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 533 | 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00 }; 534 | static const unsigned char shift_table[512] = { 535 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 536 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 537 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 538 | 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 539 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 540 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 541 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 542 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 543 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 544 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 545 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 546 | 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 547 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 548 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 549 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 550 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; 551 | uint16 hbits = base_table[bits>>23] + static_cast((bits&0x7FFFFF)>>shift_table[bits>>23]); 552 | if(R == std::round_to_nearest) 553 | hbits += (((bits&0x7FFFFF)>>(shift_table[bits>>23]-1))|(((bits>>23)&0xFF)==102)) & ((hbits&0x7C00)!=0x7C00) 554 | #if HALF_ROUND_TIES_TO_EVEN 555 | & (((((static_cast(1)<<(shift_table[bits>>23]-1))-1)&bits)!=0)|hbits) 556 | #endif 557 | ; 558 | else if(R == std::round_toward_zero) 559 | hbits -= ((hbits&0x7FFF)==0x7C00) & ~shift_table[bits>>23]; 560 | else if(R == std::round_toward_infinity) 561 | hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=102)& 562 | ((bits>>23)!=0)))&(hbits<0x7C00)) - ((hbits==0xFC00)&((bits>>23)!=511)); 563 | else if(R == std::round_toward_neg_infinity) 564 | hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=358)& 565 | ((bits>>23)!=256)))&(hbits<0xFC00)&(hbits>>15)) - ((hbits==0x7C00)&((bits>>23)!=255)); 566 | return hbits; 567 | } 568 | 569 | /// Convert IEEE double-precision to half-precision. 570 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 571 | /// \param value double-precision value 572 | /// \return binary representation of half-precision value 573 | template uint16 float2half_impl(double value, true_type) 574 | { 575 | typedef bits::type uint32; 576 | typedef bits::type uint64; 577 | uint64 bits;// = *reinterpret_cast(&value); //violating strict aliasing! 578 | std::memcpy(&bits, &value, sizeof(double)); 579 | uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; 580 | uint16 hbits = (hi>>16) & 0x8000; 581 | hi &= 0x7FFFFFFF; 582 | int exp = hi >> 20; 583 | if(exp == 2047) 584 | return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0xFFFFFFFFFFFFF)!=0)); 585 | if(exp > 1038) 586 | { 587 | if(R == std::round_toward_infinity) 588 | return hbits | 0x7C00 - (hbits>>15); 589 | if(R == std::round_toward_neg_infinity) 590 | return hbits | 0x7BFF + (hbits>>15); 591 | return hbits | 0x7BFF + (R!=std::round_toward_zero); 592 | } 593 | int g, s = lo != 0; 594 | if(exp > 1008) 595 | { 596 | g = (hi>>9) & 1; 597 | s |= (hi&0x1FF) != 0; 598 | hbits |= ((exp-1008)<<10) | ((hi>>10)&0x3FF); 599 | } 600 | else if(exp > 997) 601 | { 602 | int i = 1018 - exp; 603 | hi = (hi&0xFFFFF) | 0x100000; 604 | g = (hi>>i) & 1; 605 | s |= (hi&((1L<> (i+1); 607 | } 608 | else 609 | { 610 | g = 0; 611 | s |= hi != 0; 612 | } 613 | if(R == std::round_to_nearest) 614 | #if HALF_ROUND_TIES_TO_EVEN 615 | hbits += g & (s|hbits); 616 | #else 617 | hbits += g; 618 | #endif 619 | else if(R == std::round_toward_infinity) 620 | hbits += ~(hbits>>15) & (s|g); 621 | else if(R == std::round_toward_neg_infinity) 622 | hbits += (hbits>>15) & (g|s); 623 | return hbits; 624 | } 625 | 626 | /// Convert non-IEEE floating point to half-precision. 627 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 628 | /// \tparam T source type (builtin floating point type) 629 | /// \param value floating point value 630 | /// \return binary representation of half-precision value 631 | template uint16 float2half_impl(T value, ...) 632 | { 633 | uint16 hbits = static_cast(builtin_signbit(value)) << 15; 634 | if(value == T()) 635 | return hbits; 636 | if(builtin_isnan(value)) 637 | return hbits | 0x7FFF; 638 | if(builtin_isinf(value)) 639 | return hbits | 0x7C00; 640 | int exp; 641 | std::frexp(value, &exp); 642 | if(exp > 16) 643 | { 644 | if(R == std::round_toward_infinity) 645 | return hbits | 0x7C00 - (hbits>>15); 646 | else if(R == std::round_toward_neg_infinity) 647 | return hbits | 0x7BFF + (hbits>>15); 648 | return hbits | 0x7BFF + (R!=std::round_toward_zero); 649 | } 650 | if(exp < -13) 651 | value = std::ldexp(value, 24); 652 | else 653 | { 654 | value = std::ldexp(value, 11-exp); 655 | hbits |= ((exp+13)<<10); 656 | } 657 | T ival, frac = std::modf(value, &ival); 658 | hbits += static_cast(std::abs(static_cast(ival))); 659 | if(R == std::round_to_nearest) 660 | { 661 | frac = std::abs(frac); 662 | #if HALF_ROUND_TIES_TO_EVEN 663 | hbits += (frac>T(0.5)) | ((frac==T(0.5))&hbits); 664 | #else 665 | hbits += frac >= T(0.5); 666 | #endif 667 | } 668 | else if(R == std::round_toward_infinity) 669 | hbits += frac > T(); 670 | else if(R == std::round_toward_neg_infinity) 671 | hbits += frac < T(); 672 | return hbits; 673 | } 674 | 675 | /// Convert floating point to half-precision. 676 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 677 | /// \tparam T source type (builtin floating point type) 678 | /// \param value floating point value 679 | /// \return binary representation of half-precision value 680 | template uint16 float2half(T value) 681 | { 682 | return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); 683 | } 684 | 685 | /// Convert integer to half-precision floating point. 686 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 687 | /// \tparam S `true` if value negative, `false` else 688 | /// \tparam T type to convert (builtin integer type) 689 | /// \param value non-negative integral value 690 | /// \return binary representation of half-precision value 691 | template uint16 int2half_impl(T value) 692 | { 693 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS 694 | static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); 695 | #endif 696 | if(S) 697 | value = -value; 698 | uint16 bits = S << 15; 699 | if(value > 0xFFFF) 700 | { 701 | if(R == std::round_toward_infinity) 702 | bits |= 0x7C00 - S; 703 | else if(R == std::round_toward_neg_infinity) 704 | bits |= 0x7BFF + S; 705 | else 706 | bits |= 0x7BFF + (R!=std::round_toward_zero); 707 | } 708 | else if(value) 709 | { 710 | unsigned int m = value, exp = 24; 711 | for(; m<0x400; m<<=1,--exp) ; 712 | for(; m>0x7FF; m>>=1,++exp) ; 713 | bits |= (exp<<10) + m; 714 | if(exp > 24) 715 | { 716 | if(R == std::round_to_nearest) 717 | bits += (value>>(exp-25)) & 1 718 | #if HALF_ROUND_TIES_TO_EVEN 719 | & (((((1<<(exp-25))-1)&value)!=0)|bits) 720 | #endif 721 | ; 722 | else if(R == std::round_toward_infinity) 723 | bits += ((value&((1<<(exp-24))-1))!=0) & !S; 724 | else if(R == std::round_toward_neg_infinity) 725 | bits += ((value&((1<<(exp-24))-1))!=0) & S; 726 | } 727 | } 728 | return bits; 729 | } 730 | 731 | /// Convert integer to half-precision floating point. 732 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 733 | /// \tparam T type to convert (builtin integer type) 734 | /// \param value integral value 735 | /// \return binary representation of half-precision value 736 | template uint16 int2half(T value) 737 | { 738 | return (value<0) ? int2half_impl(value) : int2half_impl(value); 739 | } 740 | 741 | /// Convert half-precision to IEEE single-precision. 742 | /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). 743 | /// \param value binary representation of half-precision value 744 | /// \return single-precision value 745 | inline float half2float_impl(uint16 value, float, true_type) 746 | { 747 | typedef bits::type uint32; 748 | /* uint32 bits = static_cast(value&0x8000) << 16; 749 | int abs = value & 0x7FFF; 750 | if(abs) 751 | { 752 | bits |= 0x38000000 << static_cast(abs>=0x7C00); 753 | for(; abs<0x400; abs<<=1,bits-=0x800000) ; 754 | bits += static_cast(abs) << 13; 755 | } 756 | */ static const uint32 mantissa_table[2048] = { 757 | 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, 758 | 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 759 | 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 760 | 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 761 | 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, 762 | 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, 763 | 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 764 | 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 765 | 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, 766 | 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 767 | 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 768 | 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 769 | 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 770 | 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, 771 | 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, 772 | 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 773 | 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 774 | 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, 775 | 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, 776 | 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 777 | 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 778 | 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 779 | 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, 780 | 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, 781 | 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 782 | 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 783 | 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, 784 | 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 785 | 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 786 | 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 787 | 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 788 | 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, 789 | 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, 790 | 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 791 | 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 792 | 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, 793 | 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 794 | 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 795 | 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 796 | 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 797 | 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, 798 | 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, 799 | 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 800 | 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 801 | 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, 802 | 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 803 | 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 804 | 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 805 | 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 806 | 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, 807 | 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, 808 | 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 809 | 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 810 | 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, 811 | 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 812 | 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 813 | 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 814 | 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 815 | 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, 816 | 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, 817 | 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 818 | 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 819 | 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, 820 | 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 821 | 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 822 | 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 823 | 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 824 | 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, 825 | 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, 826 | 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 827 | 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 828 | 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, 829 | 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 830 | 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 831 | 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 832 | 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 833 | 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, 834 | 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, 835 | 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 836 | 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 837 | 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, 838 | 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 839 | 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 840 | 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 841 | 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 842 | 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, 843 | 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, 844 | 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 845 | 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 846 | 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, 847 | 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 848 | 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 849 | 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 850 | 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 851 | 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, 852 | 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, 853 | 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 854 | 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 855 | 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, 856 | 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 857 | 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 858 | 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 859 | 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 860 | 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, 861 | 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, 862 | 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 863 | 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 864 | 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, 865 | 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 866 | 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 867 | 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 868 | 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 869 | 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, 870 | 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, 871 | 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 872 | 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 873 | 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, 874 | 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 875 | 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 876 | 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 877 | 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 878 | 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, 879 | 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, 880 | 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 881 | 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 882 | 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, 883 | 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 884 | 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; 885 | static const uint32 exponent_table[64] = { 886 | 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, 887 | 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 888 | 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 889 | 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; 890 | static const unsigned short offset_table[64] = { 891 | 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 892 | 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; 893 | uint32 bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; 894 | // return *reinterpret_cast(&bits); //violating strict aliasing! 895 | float out; 896 | std::memcpy(&out, &bits, sizeof(float)); 897 | return out; 898 | } 899 | 900 | /// Convert half-precision to IEEE double-precision. 901 | /// \param value binary representation of half-precision value 902 | /// \return double-precision value 903 | inline double half2float_impl(uint16 value, double, true_type) 904 | { 905 | typedef bits::type uint32; 906 | typedef bits::type uint64; 907 | uint32 hi = static_cast(value&0x8000) << 16; 908 | int abs = value & 0x7FFF; 909 | if(abs) 910 | { 911 | hi |= 0x3F000000 << static_cast(abs>=0x7C00); 912 | for(; abs<0x400; abs<<=1,hi-=0x100000) ; 913 | hi += static_cast(abs) << 10; 914 | } 915 | uint64 bits = static_cast(hi) << 32; 916 | // return *reinterpret_cast(&bits); //violating strict aliasing! 917 | double out; 918 | std::memcpy(&out, &bits, sizeof(double)); 919 | return out; 920 | } 921 | 922 | /// Convert half-precision to non-IEEE floating point. 923 | /// \tparam T type to convert to (builtin integer type) 924 | /// \param value binary representation of half-precision value 925 | /// \return floating point value 926 | template T half2float_impl(uint16 value, T, ...) 927 | { 928 | T out; 929 | int abs = value & 0x7FFF; 930 | if(abs > 0x7C00) 931 | out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); 932 | else if(abs == 0x7C00) 933 | out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); 934 | else if(abs > 0x3FF) 935 | out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); 936 | else 937 | out = std::ldexp(static_cast(abs), -24); 938 | return (value&0x8000) ? -out : out; 939 | } 940 | 941 | /// Convert half-precision to floating point. 942 | /// \tparam T type to convert to (builtin integer type) 943 | /// \param value binary representation of half-precision value 944 | /// \return floating point value 945 | template T half2float(uint16 value) 946 | { 947 | return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); 948 | } 949 | 950 | /// Convert half-precision floating point to integer. 951 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 952 | /// \tparam E `true` for round to even, `false` for round away from zero 953 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) 954 | /// \param value binary representation of half-precision value 955 | /// \return integral value 956 | template T half2int_impl(uint16 value) 957 | { 958 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS 959 | static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); 960 | #endif 961 | unsigned int e = value & 0x7FFF; 962 | if(e >= 0x7C00) 963 | return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); 964 | if(e < 0x3800) 965 | { 966 | if(R == std::round_toward_infinity) 967 | return T(~(value>>15)&(e!=0)); 968 | else if(R == std::round_toward_neg_infinity) 969 | return -T(value>0x8000); 970 | return T(); 971 | } 972 | unsigned int m = (value&0x3FF) | 0x400; 973 | e >>= 10; 974 | if(e < 25) 975 | { 976 | if(R == std::round_to_nearest) 977 | m += (1<<(24-e)) - (~(m>>(25-e))&E); 978 | else if(R == std::round_toward_infinity) 979 | m += ((value>>15)-1) & ((1<<(25-e))-1U); 980 | else if(R == std::round_toward_neg_infinity) 981 | m += -(value>>15) & ((1<<(25-e))-1U); 982 | m >>= 25 - e; 983 | } 984 | else 985 | m <<= e - 25; 986 | return (value&0x8000) ? -static_cast(m) : static_cast(m); 987 | } 988 | 989 | /// Convert half-precision floating point to integer. 990 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 991 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) 992 | /// \param value binary representation of half-precision value 993 | /// \return integral value 994 | template T half2int(uint16 value) { return half2int_impl(value); } 995 | 996 | /// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. 997 | /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) 998 | /// \param value binary representation of half-precision value 999 | /// \return integral value 1000 | template T half2int_up(uint16 value) { return half2int_impl(value); } 1001 | 1002 | /// Round half-precision number to nearest integer value. 1003 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 1004 | /// \tparam E `true` for round to even, `false` for round away from zero 1005 | /// \param value binary representation of half-precision value 1006 | /// \return half-precision bits for nearest integral value 1007 | template uint16 round_half_impl(uint16 value) 1008 | { 1009 | unsigned int e = value & 0x7FFF; 1010 | uint16 result = value; 1011 | if(e < 0x3C00) 1012 | { 1013 | result &= 0x8000; 1014 | if(R == std::round_to_nearest) 1015 | result |= 0x3C00U & -(e>=(0x3800+E)); 1016 | else if(R == std::round_toward_infinity) 1017 | result |= 0x3C00U & -(~(value>>15)&(e!=0)); 1018 | else if(R == std::round_toward_neg_infinity) 1019 | result |= 0x3C00U & -(value>0x8000); 1020 | } 1021 | else if(e < 0x6400) 1022 | { 1023 | e = 25 - (e>>10); 1024 | unsigned int mask = (1<>e)&E); 1027 | else if(R == std::round_toward_infinity) 1028 | result += mask & ((value>>15)-1); 1029 | else if(R == std::round_toward_neg_infinity) 1030 | result += mask & -(value>>15); 1031 | result &= ~mask; 1032 | } 1033 | return result; 1034 | } 1035 | 1036 | /// Round half-precision number to nearest integer value. 1037 | /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding 1038 | /// \param value binary representation of half-precision value 1039 | /// \return half-precision bits for nearest integral value 1040 | template uint16 round_half(uint16 value) { return round_half_impl(value); } 1041 | 1042 | /// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. 1043 | /// \param value binary representation of half-precision value 1044 | /// \return half-precision bits for nearest integral value 1045 | inline uint16 round_half_up(uint16 value) { return round_half_impl(value); } 1046 | /// \} 1047 | 1048 | struct functions; 1049 | template struct unary_specialized; 1050 | template struct binary_specialized; 1051 | template struct half_caster; 1052 | } 1053 | 1054 | /// Half-precision floating point type. 1055 | /// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and 1056 | /// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and 1057 | /// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations 1058 | /// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to 1059 | /// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic 1060 | /// expressions are kept in single-precision as long as possible (while of course still maintaining a strong half-precision type). 1061 | /// 1062 | /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and 1063 | /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which 1064 | /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the 1065 | /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of 1066 | /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most 1067 | /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit 1068 | /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if 1069 | /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on 1070 | /// nearly any reasonable platform. 1071 | /// 1072 | /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable 1073 | /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. 1074 | class half 1075 | { 1076 | friend struct detail::functions; 1077 | friend struct detail::unary_specialized; 1078 | friend struct detail::binary_specialized; 1079 | template friend struct detail::half_caster; 1080 | friend class std::numeric_limits; 1081 | #if HALF_ENABLE_CPP11_HASH 1082 | friend struct std::hash; 1083 | #endif 1084 | #if HALF_ENABLE_CPP11_USER_LITERALS 1085 | friend half literal::operator""_h(long double); 1086 | #endif 1087 | 1088 | public: 1089 | /// Default constructor. 1090 | /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics 1091 | /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. 1092 | HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} 1093 | 1094 | /// Copy constructor. 1095 | /// \tparam T type of concrete half expression 1096 | /// \param rhs half expression to copy from 1097 | half(detail::expr rhs) : data_(detail::float2half(static_cast(rhs))) {} 1098 | 1099 | /// Conversion constructor. 1100 | /// \param rhs float to convert 1101 | explicit half(float rhs) : data_(detail::float2half(rhs)) {} 1102 | 1103 | /// Conversion to single-precision. 1104 | /// \return single precision value representing expression value 1105 | operator float() const { return detail::half2float(data_); } 1106 | 1107 | /// Assignment operator. 1108 | /// \tparam T type of concrete half expression 1109 | /// \param rhs half expression to copy from 1110 | /// \return reference to this half 1111 | half& operator=(detail::expr rhs) { return *this = static_cast(rhs); } 1112 | 1113 | /// Arithmetic assignment. 1114 | /// \tparam T type of concrete half expression 1115 | /// \param rhs half expression to add 1116 | /// \return reference to this half 1117 | template typename detail::enable::type operator+=(T rhs) { return *this += static_cast(rhs); } 1118 | 1119 | /// Arithmetic assignment. 1120 | /// \tparam T type of concrete half expression 1121 | /// \param rhs half expression to subtract 1122 | /// \return reference to this half 1123 | template typename detail::enable::type operator-=(T rhs) { return *this -= static_cast(rhs); } 1124 | 1125 | /// Arithmetic assignment. 1126 | /// \tparam T type of concrete half expression 1127 | /// \param rhs half expression to multiply with 1128 | /// \return reference to this half 1129 | template typename detail::enable::type operator*=(T rhs) { return *this *= static_cast(rhs); } 1130 | 1131 | /// Arithmetic assignment. 1132 | /// \tparam T type of concrete half expression 1133 | /// \param rhs half expression to divide by 1134 | /// \return reference to this half 1135 | template typename detail::enable::type operator/=(T rhs) { return *this /= static_cast(rhs); } 1136 | 1137 | /// Assignment operator. 1138 | /// \param rhs single-precision value to copy from 1139 | /// \return reference to this half 1140 | half& operator=(float rhs) { data_ = detail::float2half(rhs); return *this; } 1141 | 1142 | /// Arithmetic assignment. 1143 | /// \param rhs single-precision value to add 1144 | /// \return reference to this half 1145 | half& operator+=(float rhs) { data_ = detail::float2half(detail::half2float(data_)+rhs); return *this; } 1146 | 1147 | /// Arithmetic assignment. 1148 | /// \param rhs single-precision value to subtract 1149 | /// \return reference to this half 1150 | half& operator-=(float rhs) { data_ = detail::float2half(detail::half2float(data_)-rhs); return *this; } 1151 | 1152 | /// Arithmetic assignment. 1153 | /// \param rhs single-precision value to multiply with 1154 | /// \return reference to this half 1155 | half& operator*=(float rhs) { data_ = detail::float2half(detail::half2float(data_)*rhs); return *this; } 1156 | 1157 | /// Arithmetic assignment. 1158 | /// \param rhs single-precision value to divide by 1159 | /// \return reference to this half 1160 | half& operator/=(float rhs) { data_ = detail::float2half(detail::half2float(data_)/rhs); return *this; } 1161 | 1162 | /// Prefix increment. 1163 | /// \return incremented half value 1164 | half& operator++() { return *this += 1.0f; } 1165 | 1166 | /// Prefix decrement. 1167 | /// \return decremented half value 1168 | half& operator--() { return *this -= 1.0f; } 1169 | 1170 | /// Postfix increment. 1171 | /// \return non-incremented half value 1172 | half operator++(int) { half out(*this); ++*this; return out; } 1173 | 1174 | /// Postfix decrement. 1175 | /// \return non-decremented half value 1176 | half operator--(int) { half out(*this); --*this; return out; } 1177 | 1178 | private: 1179 | /// Rounding mode to use 1180 | static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); 1181 | 1182 | /// Constructor. 1183 | /// \param bits binary representation to set half to 1184 | HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} 1185 | 1186 | /// Internal binary representation 1187 | detail::uint16 data_; 1188 | }; 1189 | 1190 | #if HALF_ENABLE_CPP11_USER_LITERALS 1191 | namespace literal 1192 | { 1193 | /// Half literal. 1194 | /// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due 1195 | /// to rather involved conversions. 1196 | /// \param value literal value 1197 | /// \return half with given value (if representable) 1198 | inline half operator""_h(long double value) { return half(detail::binary, detail::float2half(value)); } 1199 | } 1200 | #endif 1201 | 1202 | namespace detail 1203 | { 1204 | /// Wrapper implementing unspecialized half-precision functions. 1205 | struct functions 1206 | { 1207 | /// Addition implementation. 1208 | /// \param x first operand 1209 | /// \param y second operand 1210 | /// \return Half-precision sum stored in single-precision 1211 | static expr plus(float x, float y) { return expr(x+y); } 1212 | 1213 | /// Subtraction implementation. 1214 | /// \param x first operand 1215 | /// \param y second operand 1216 | /// \return Half-precision difference stored in single-precision 1217 | static expr minus(float x, float y) { return expr(x-y); } 1218 | 1219 | /// Multiplication implementation. 1220 | /// \param x first operand 1221 | /// \param y second operand 1222 | /// \return Half-precision product stored in single-precision 1223 | static expr multiplies(float x, float y) { return expr(x*y); } 1224 | 1225 | /// Division implementation. 1226 | /// \param x first operand 1227 | /// \param y second operand 1228 | /// \return Half-precision quotient stored in single-precision 1229 | static expr divides(float x, float y) { return expr(x/y); } 1230 | 1231 | /// Output implementation. 1232 | /// \param out stream to write to 1233 | /// \param arg value to write 1234 | /// \return reference to stream 1235 | template static std::basic_ostream& write(std::basic_ostream &out, float arg) { return out << arg; } 1236 | 1237 | /// Input implementation. 1238 | /// \param in stream to read from 1239 | /// \param arg half to read into 1240 | /// \return reference to stream 1241 | template static std::basic_istream& read(std::basic_istream &in, half &arg) 1242 | { 1243 | float f; 1244 | if(in >> f) 1245 | arg = f; 1246 | return in; 1247 | } 1248 | 1249 | /// Modulo implementation. 1250 | /// \param x first operand 1251 | /// \param y second operand 1252 | /// \return Half-precision division remainder stored in single-precision 1253 | static expr fmod(float x, float y) { return expr(std::fmod(x, y)); } 1254 | 1255 | /// Remainder implementation. 1256 | /// \param x first operand 1257 | /// \param y second operand 1258 | /// \return Half-precision division remainder stored in single-precision 1259 | static expr remainder(float x, float y) 1260 | { 1261 | #if HALF_ENABLE_CPP11_CMATH 1262 | return expr(std::remainder(x, y)); 1263 | #else 1264 | if(builtin_isnan(x) || builtin_isnan(y)) 1265 | return expr(std::numeric_limits::quiet_NaN()); 1266 | float ax = std::fabs(x), ay = std::fabs(y); 1267 | if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) 1268 | return expr(std::numeric_limits::quiet_NaN()); 1269 | if(ay >= 65536.0f) 1270 | return expr(x); 1271 | if(ax == ay) 1272 | return expr(builtin_signbit(x) ? -0.0f : 0.0f); 1273 | ax = std::fmod(ax, ay+ay); 1274 | float y2 = 0.5f * ay; 1275 | if(ax > y2) 1276 | { 1277 | ax -= ay; 1278 | if(ax >= y2) 1279 | ax -= ay; 1280 | } 1281 | return expr(builtin_signbit(x) ? -ax : ax); 1282 | #endif 1283 | } 1284 | 1285 | /// Remainder implementation. 1286 | /// \param x first operand 1287 | /// \param y second operand 1288 | /// \param quo address to store quotient bits at 1289 | /// \return Half-precision division remainder stored in single-precision 1290 | static expr remquo(float x, float y, int *quo) 1291 | { 1292 | #if HALF_ENABLE_CPP11_CMATH 1293 | return expr(std::remquo(x, y, quo)); 1294 | #else 1295 | if(builtin_isnan(x) || builtin_isnan(y)) 1296 | return expr(std::numeric_limits::quiet_NaN()); 1297 | bool sign = builtin_signbit(x), qsign = static_cast(sign^builtin_signbit(y)); 1298 | float ax = std::fabs(x), ay = std::fabs(y); 1299 | if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) 1300 | return expr(std::numeric_limits::quiet_NaN()); 1301 | if(ay >= 65536.0f) 1302 | return expr(x); 1303 | if(ax == ay) 1304 | return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); 1305 | ax = std::fmod(ax, 8.0f*ay); 1306 | int cquo = 0; 1307 | if(ax >= 4.0f * ay) 1308 | { 1309 | ax -= 4.0f * ay; 1310 | cquo += 4; 1311 | } 1312 | if(ax >= 2.0f * ay) 1313 | { 1314 | ax -= 2.0f * ay; 1315 | cquo += 2; 1316 | } 1317 | float y2 = 0.5f * ay; 1318 | if(ax > y2) 1319 | { 1320 | ax -= ay; 1321 | ++cquo; 1322 | if(ax >= y2) 1323 | { 1324 | ax -= ay; 1325 | ++cquo; 1326 | } 1327 | } 1328 | return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); 1329 | #endif 1330 | } 1331 | 1332 | /// Positive difference implementation. 1333 | /// \param x first operand 1334 | /// \param y second operand 1335 | /// \return Positive difference stored in single-precision 1336 | static expr fdim(float x, float y) 1337 | { 1338 | #if HALF_ENABLE_CPP11_CMATH 1339 | return expr(std::fdim(x, y)); 1340 | #else 1341 | return expr((x<=y) ? 0.0f : (x-y)); 1342 | #endif 1343 | } 1344 | 1345 | /// Fused multiply-add implementation. 1346 | /// \param x first operand 1347 | /// \param y second operand 1348 | /// \param z third operand 1349 | /// \return \a x * \a y + \a z stored in single-precision 1350 | static expr fma(float x, float y, float z) 1351 | { 1352 | #if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) 1353 | return expr(std::fma(x, y, z)); 1354 | #else 1355 | return expr(x*y+z); 1356 | #endif 1357 | } 1358 | 1359 | /// Get NaN. 1360 | /// \return Half-precision quiet NaN 1361 | static half nanh() { return half(binary, 0x7FFF); } 1362 | 1363 | /// Exponential implementation. 1364 | /// \param arg function argument 1365 | /// \return function value stored in single-preicision 1366 | static expr exp(float arg) { return expr(std::exp(arg)); } 1367 | 1368 | /// Exponential implementation. 1369 | /// \param arg function argument 1370 | /// \return function value stored in single-preicision 1371 | static expr expm1(float arg) 1372 | { 1373 | #if HALF_ENABLE_CPP11_CMATH 1374 | return expr(std::expm1(arg)); 1375 | #else 1376 | return expr(static_cast(std::exp(static_cast(arg))-1.0)); 1377 | #endif 1378 | } 1379 | 1380 | /// Binary exponential implementation. 1381 | /// \param arg function argument 1382 | /// \return function value stored in single-preicision 1383 | static expr exp2(float arg) 1384 | { 1385 | #if HALF_ENABLE_CPP11_CMATH 1386 | return expr(std::exp2(arg)); 1387 | #else 1388 | return expr(static_cast(std::exp(arg*0.69314718055994530941723212145818))); 1389 | #endif 1390 | } 1391 | 1392 | /// Logarithm implementation. 1393 | /// \param arg function argument 1394 | /// \return function value stored in single-preicision 1395 | static expr log(float arg) { return expr(std::log(arg)); } 1396 | 1397 | /// Common logarithm implementation. 1398 | /// \param arg function argument 1399 | /// \return function value stored in single-preicision 1400 | static expr log10(float arg) { return expr(std::log10(arg)); } 1401 | 1402 | /// Logarithm implementation. 1403 | /// \param arg function argument 1404 | /// \return function value stored in single-preicision 1405 | static expr log1p(float arg) 1406 | { 1407 | #if HALF_ENABLE_CPP11_CMATH 1408 | return expr(std::log1p(arg)); 1409 | #else 1410 | return expr(static_cast(std::log(1.0+arg))); 1411 | #endif 1412 | } 1413 | 1414 | /// Binary logarithm implementation. 1415 | /// \param arg function argument 1416 | /// \return function value stored in single-preicision 1417 | static expr log2(float arg) 1418 | { 1419 | #if HALF_ENABLE_CPP11_CMATH 1420 | return expr(std::log2(arg)); 1421 | #else 1422 | return expr(static_cast(std::log(static_cast(arg))*1.4426950408889634073599246810019)); 1423 | #endif 1424 | } 1425 | 1426 | /// Square root implementation. 1427 | /// \param arg function argument 1428 | /// \return function value stored in single-preicision 1429 | static expr sqrt(float arg) { return expr(std::sqrt(arg)); } 1430 | 1431 | /// Cubic root implementation. 1432 | /// \param arg function argument 1433 | /// \return function value stored in single-preicision 1434 | static expr cbrt(float arg) 1435 | { 1436 | #if HALF_ENABLE_CPP11_CMATH 1437 | return expr(std::cbrt(arg)); 1438 | #else 1439 | if(builtin_isnan(arg) || builtin_isinf(arg)) 1440 | return expr(arg); 1441 | return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0/3.0)) : 1442 | static_cast(std::pow(static_cast(arg), 1.0/3.0))); 1443 | #endif 1444 | } 1445 | 1446 | /// Hypotenuse implementation. 1447 | /// \param x first argument 1448 | /// \param y second argument 1449 | /// \return function value stored in single-preicision 1450 | static expr hypot(float x, float y) 1451 | { 1452 | #if HALF_ENABLE_CPP11_CMATH 1453 | return expr(std::hypot(x, y)); 1454 | #else 1455 | return expr((builtin_isinf(x) || builtin_isinf(y)) ? std::numeric_limits::infinity() : 1456 | static_cast(std::sqrt(static_cast(x)*x+static_cast(y)*y))); 1457 | #endif 1458 | } 1459 | 1460 | /// Power implementation. 1461 | /// \param base value to exponentiate 1462 | /// \param exp power to expontiate to 1463 | /// \return function value stored in single-preicision 1464 | static expr pow(float base, float exp) { return expr(std::pow(base, exp)); } 1465 | 1466 | /// Sine implementation. 1467 | /// \param arg function argument 1468 | /// \return function value stored in single-preicision 1469 | static expr sin(float arg) { return expr(std::sin(arg)); } 1470 | 1471 | /// Cosine implementation. 1472 | /// \param arg function argument 1473 | /// \return function value stored in single-preicision 1474 | static expr cos(float arg) { return expr(std::cos(arg)); } 1475 | 1476 | /// Tan implementation. 1477 | /// \param arg function argument 1478 | /// \return function value stored in single-preicision 1479 | static expr tan(float arg) { return expr(std::tan(arg)); } 1480 | 1481 | /// Arc sine implementation. 1482 | /// \param arg function argument 1483 | /// \return function value stored in single-preicision 1484 | static expr asin(float arg) { return expr(std::asin(arg)); } 1485 | 1486 | /// Arc cosine implementation. 1487 | /// \param arg function argument 1488 | /// \return function value stored in single-preicision 1489 | static expr acos(float arg) { return expr(std::acos(arg)); } 1490 | 1491 | /// Arc tangent implementation. 1492 | /// \param arg function argument 1493 | /// \return function value stored in single-preicision 1494 | static expr atan(float arg) { return expr(std::atan(arg)); } 1495 | 1496 | /// Arc tangent implementation. 1497 | /// \param x first argument 1498 | /// \param y second argument 1499 | /// \return function value stored in single-preicision 1500 | static expr atan2(float x, float y) { return expr(std::atan2(x, y)); } 1501 | 1502 | /// Hyperbolic sine implementation. 1503 | /// \param arg function argument 1504 | /// \return function value stored in single-preicision 1505 | static expr sinh(float arg) { return expr(std::sinh(arg)); } 1506 | 1507 | /// Hyperbolic cosine implementation. 1508 | /// \param arg function argument 1509 | /// \return function value stored in single-preicision 1510 | static expr cosh(float arg) { return expr(std::cosh(arg)); } 1511 | 1512 | /// Hyperbolic tangent implementation. 1513 | /// \param arg function argument 1514 | /// \return function value stored in single-preicision 1515 | static expr tanh(float arg) { return expr(std::tanh(arg)); } 1516 | 1517 | /// Hyperbolic area sine implementation. 1518 | /// \param arg function argument 1519 | /// \return function value stored in single-preicision 1520 | static expr asinh(float arg) 1521 | { 1522 | #if HALF_ENABLE_CPP11_CMATH 1523 | return expr(std::asinh(arg)); 1524 | #else 1525 | return expr((arg==-std::numeric_limits::infinity()) ? arg : static_cast(std::log(arg+std::sqrt(arg*arg+1.0)))); 1526 | #endif 1527 | } 1528 | 1529 | /// Hyperbolic area cosine implementation. 1530 | /// \param arg function argument 1531 | /// \return function value stored in single-preicision 1532 | static expr acosh(float arg) 1533 | { 1534 | #if HALF_ENABLE_CPP11_CMATH 1535 | return expr(std::acosh(arg)); 1536 | #else 1537 | return expr((arg<-1.0f) ? std::numeric_limits::quiet_NaN() : static_cast(std::log(arg+std::sqrt(arg*arg-1.0)))); 1538 | #endif 1539 | } 1540 | 1541 | /// Hyperbolic area tangent implementation. 1542 | /// \param arg function argument 1543 | /// \return function value stored in single-preicision 1544 | static expr atanh(float arg) 1545 | { 1546 | #if HALF_ENABLE_CPP11_CMATH 1547 | return expr(std::atanh(arg)); 1548 | #else 1549 | return expr(static_cast(0.5*std::log((1.0+arg)/(1.0-arg)))); 1550 | #endif 1551 | } 1552 | 1553 | /// Error function implementation. 1554 | /// \param arg function argument 1555 | /// \return function value stored in single-preicision 1556 | static expr erf(float arg) 1557 | { 1558 | #if HALF_ENABLE_CPP11_CMATH 1559 | return expr(std::erf(arg)); 1560 | #else 1561 | return expr(static_cast(erf(static_cast(arg)))); 1562 | #endif 1563 | } 1564 | 1565 | /// Complementary implementation. 1566 | /// \param arg function argument 1567 | /// \return function value stored in single-preicision 1568 | static expr erfc(float arg) 1569 | { 1570 | #if HALF_ENABLE_CPP11_CMATH 1571 | return expr(std::erfc(arg)); 1572 | #else 1573 | return expr(static_cast(1.0-erf(static_cast(arg)))); 1574 | #endif 1575 | } 1576 | 1577 | /// Gamma logarithm implementation. 1578 | /// \param arg function argument 1579 | /// \return function value stored in single-preicision 1580 | static expr lgamma(float arg) 1581 | { 1582 | #if HALF_ENABLE_CPP11_CMATH 1583 | return expr(std::lgamma(arg)); 1584 | #else 1585 | if(builtin_isinf(arg)) 1586 | return expr(std::numeric_limits::infinity()); 1587 | if(arg < 0.0f) 1588 | { 1589 | float i, f = std::modf(-arg, &i); 1590 | if(f == 0.0f) 1591 | return expr(std::numeric_limits::infinity()); 1592 | return expr(static_cast(1.1447298858494001741434273513531- 1593 | std::log(std::abs(std::sin(3.1415926535897932384626433832795*f)))-lgamma(1.0-arg))); 1594 | } 1595 | return expr(static_cast(lgamma(static_cast(arg)))); 1596 | #endif 1597 | } 1598 | 1599 | /// Gamma implementation. 1600 | /// \param arg function argument 1601 | /// \return function value stored in single-preicision 1602 | static expr tgamma(float arg) 1603 | { 1604 | #if HALF_ENABLE_CPP11_CMATH 1605 | return expr(std::tgamma(arg)); 1606 | #else 1607 | if(arg == 0.0f) 1608 | return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) : expr(std::numeric_limits::infinity()); 1609 | if(arg < 0.0f) 1610 | { 1611 | float i, f = std::modf(-arg, &i); 1612 | if(f == 0.0f) 1613 | return expr(std::numeric_limits::quiet_NaN()); 1614 | double value = 3.1415926535897932384626433832795 / (std::sin(3.1415926535897932384626433832795*f)*std::exp(lgamma(1.0-arg))); 1615 | return expr(static_cast((std::fmod(i, 2.0f)==0.0f) ? -value : value)); 1616 | } 1617 | if(builtin_isinf(arg)) 1618 | return expr(arg); 1619 | return expr(static_cast(std::exp(lgamma(static_cast(arg))))); 1620 | #endif 1621 | } 1622 | 1623 | /// Floor implementation. 1624 | /// \param arg value to round 1625 | /// \return rounded value 1626 | static half floor(half arg) { return half(binary, round_half(arg.data_)); } 1627 | 1628 | /// Ceiling implementation. 1629 | /// \param arg value to round 1630 | /// \return rounded value 1631 | static half ceil(half arg) { return half(binary, round_half(arg.data_)); } 1632 | 1633 | /// Truncation implementation. 1634 | /// \param arg value to round 1635 | /// \return rounded value 1636 | static half trunc(half arg) { return half(binary, round_half(arg.data_)); } 1637 | 1638 | /// Nearest integer implementation. 1639 | /// \param arg value to round 1640 | /// \return rounded value 1641 | static half round(half arg) { return half(binary, round_half_up(arg.data_)); } 1642 | 1643 | /// Nearest integer implementation. 1644 | /// \param arg value to round 1645 | /// \return rounded value 1646 | static long lround(half arg) { return detail::half2int_up(arg.data_); } 1647 | 1648 | /// Nearest integer implementation. 1649 | /// \param arg value to round 1650 | /// \return rounded value 1651 | static half rint(half arg) { return half(binary, round_half(arg.data_)); } 1652 | 1653 | /// Nearest integer implementation. 1654 | /// \param arg value to round 1655 | /// \return rounded value 1656 | static long lrint(half arg) { return detail::half2int(arg.data_); } 1657 | 1658 | #if HALF_ENABLE_CPP11_LONG_LONG 1659 | /// Nearest integer implementation. 1660 | /// \param arg value to round 1661 | /// \return rounded value 1662 | static long long llround(half arg) { return detail::half2int_up(arg.data_); } 1663 | 1664 | /// Nearest integer implementation. 1665 | /// \param arg value to round 1666 | /// \return rounded value 1667 | static long long llrint(half arg) { return detail::half2int(arg.data_); } 1668 | #endif 1669 | 1670 | /// Decompression implementation. 1671 | /// \param arg number to decompress 1672 | /// \param exp address to store exponent at 1673 | /// \return normalized significant 1674 | static half frexp(half arg, int *exp) 1675 | { 1676 | int m = arg.data_ & 0x7FFF, e = -14; 1677 | if(m >= 0x7C00 || !m) 1678 | return *exp = 0, arg; 1679 | for(; m<0x400; m<<=1,--e) ; 1680 | return *exp = e+(m>>10), half(binary, (arg.data_&0x8000)|0x3800|(m&0x3FF)); 1681 | } 1682 | 1683 | /// Decompression implementation. 1684 | /// \param arg number to decompress 1685 | /// \param iptr address to store integer part at 1686 | /// \return fractional part 1687 | static half modf(half arg, half *iptr) 1688 | { 1689 | unsigned int e = arg.data_ & 0x7FFF; 1690 | if(e >= 0x6400) 1691 | return *iptr = arg, half(binary, arg.data_&(0x8000U|-(e>0x7C00))); 1692 | if(e < 0x3C00) 1693 | return iptr->data_ = arg.data_ & 0x8000, arg; 1694 | e >>= 10; 1695 | unsigned int mask = (1<<(25-e)) - 1, m = arg.data_ & mask; 1696 | iptr->data_ = arg.data_ & ~mask; 1697 | if(!m) 1698 | return half(binary, arg.data_&0x8000); 1699 | for(; m<0x400; m<<=1,--e) ; 1700 | return half(binary, static_cast((arg.data_&0x8000)|(e<<10)|(m&0x3FF))); 1701 | } 1702 | 1703 | /// Scaling implementation. 1704 | /// \param arg number to scale 1705 | /// \param exp power of two to scale by 1706 | /// \return scaled number 1707 | static half scalbln(half arg, long exp) 1708 | { 1709 | unsigned int m = arg.data_ & 0x7FFF; 1710 | if(m >= 0x7C00 || !m) 1711 | return arg; 1712 | for(; m<0x400; m<<=1,--exp) ; 1713 | exp += m >> 10; 1714 | uint16 value = arg.data_ & 0x8000; 1715 | if(exp > 30) 1716 | { 1717 | if(half::round_style == std::round_toward_zero) 1718 | value |= 0x7BFF; 1719 | else if(half::round_style == std::round_toward_infinity) 1720 | value |= 0x7C00 - (value>>15); 1721 | else if(half::round_style == std::round_toward_neg_infinity) 1722 | value |= 0x7BFF + (value>>15); 1723 | else 1724 | value |= 0x7C00; 1725 | } 1726 | else if(exp > 0) 1727 | value |= (exp<<10) | (m&0x3FF); 1728 | else if(exp > -11) 1729 | { 1730 | m = (m&0x3FF) | 0x400; 1731 | if(half::round_style == std::round_to_nearest) 1732 | { 1733 | m += 1 << -exp; 1734 | #if HALF_ROUND_TIES_TO_EVEN 1735 | m -= (m>>(1-exp)) & 1; 1736 | #endif 1737 | } 1738 | else if(half::round_style == std::round_toward_infinity) 1739 | m += ((value>>15)-1) & ((1<<(1-exp))-1U); 1740 | else if(half::round_style == std::round_toward_neg_infinity) 1741 | m += -(value>>15) & ((1<<(1-exp))-1U); 1742 | value |= m >> (1-exp); 1743 | } 1744 | else if(half::round_style == std::round_toward_infinity) 1745 | value -= (value>>15) - 1; 1746 | else if(half::round_style == std::round_toward_neg_infinity) 1747 | value += value >> 15; 1748 | return half(binary, value); 1749 | } 1750 | 1751 | /// Exponent implementation. 1752 | /// \param arg number to query 1753 | /// \return floating point exponent 1754 | static int ilogb(half arg) 1755 | { 1756 | int abs = arg.data_ & 0x7FFF; 1757 | if(!abs) 1758 | return FP_ILOGB0; 1759 | if(abs < 0x7C00) 1760 | { 1761 | int exp = (abs>>10) - 15; 1762 | if(abs < 0x400) 1763 | for(; abs<0x200; abs<<=1,--exp) ; 1764 | return exp; 1765 | } 1766 | if(abs > 0x7C00) 1767 | return FP_ILOGBNAN; 1768 | return INT_MAX; 1769 | } 1770 | 1771 | /// Exponent implementation. 1772 | /// \param arg number to query 1773 | /// \return floating point exponent 1774 | static half logb(half arg) 1775 | { 1776 | int abs = arg.data_ & 0x7FFF; 1777 | if(!abs) 1778 | return half(binary, 0xFC00); 1779 | if(abs < 0x7C00) 1780 | { 1781 | int exp = (abs>>10) - 15; 1782 | if(abs < 0x400) 1783 | for(; abs<0x200; abs<<=1,--exp) ; 1784 | uint16 bits = (exp<0) << 15; 1785 | if(exp) 1786 | { 1787 | unsigned int m = std::abs(exp) << 6, e = 18; 1788 | for(; m<0x400; m<<=1,--e) ; 1789 | bits |= (e<<10) + m; 1790 | } 1791 | return half(binary, bits); 1792 | } 1793 | if(abs > 0x7C00) 1794 | return arg; 1795 | return half(binary, 0x7C00); 1796 | } 1797 | 1798 | /// Enumeration implementation. 1799 | /// \param from number to increase/decrease 1800 | /// \param to direction to enumerate into 1801 | /// \return next representable number 1802 | static half nextafter(half from, half to) 1803 | { 1804 | uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; 1805 | if(fabs > 0x7C00) 1806 | return from; 1807 | if(tabs > 0x7C00 || from.data_ == to.data_ || !(fabs|tabs)) 1808 | return to; 1809 | if(!fabs) 1810 | return half(binary, (to.data_&0x8000)+1); 1811 | bool lt = ((fabs==from.data_) ? static_cast(fabs) : -static_cast(fabs)) < 1812 | ((tabs==to.data_) ? static_cast(tabs) : -static_cast(tabs)); 1813 | return half(binary, from.data_+(((from.data_>>15)^static_cast(lt))<<1)-1); 1814 | } 1815 | 1816 | /// Enumeration implementation. 1817 | /// \param from number to increase/decrease 1818 | /// \param to direction to enumerate into 1819 | /// \return next representable number 1820 | static half nexttoward(half from, long double to) 1821 | { 1822 | if(isnan(from)) 1823 | return from; 1824 | long double lfrom = static_cast(from); 1825 | if(builtin_isnan(to) || lfrom == to) 1826 | return half(static_cast(to)); 1827 | if(!(from.data_&0x7FFF)) 1828 | return half(binary, (static_cast(builtin_signbit(to))<<15)+1); 1829 | return half(binary, from.data_+(((from.data_>>15)^static_cast(lfrom0x3FF) ? ((abs>=0x7C00) ? ((abs>0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) :FP_SUBNORMAL) : FP_ZERO; 1846 | } 1847 | 1848 | /// Classification implementation. 1849 | /// \param arg value to classify 1850 | /// \retval true if finite number 1851 | /// \retval false else 1852 | static bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; } 1853 | 1854 | /// Classification implementation. 1855 | /// \param arg value to classify 1856 | /// \retval true if infinite number 1857 | /// \retval false else 1858 | static bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; } 1859 | 1860 | /// Classification implementation. 1861 | /// \param arg value to classify 1862 | /// \retval true if not a number 1863 | /// \retval false else 1864 | static bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; } 1865 | 1866 | /// Classification implementation. 1867 | /// \param arg value to classify 1868 | /// \retval true if normal number 1869 | /// \retval false else 1870 | static bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } 1871 | 1872 | /// Sign bit implementation. 1873 | /// \param arg value to check 1874 | /// \retval true if signed 1875 | /// \retval false if unsigned 1876 | static bool signbit(half arg) { return (arg.data_&0x8000) != 0; } 1877 | 1878 | /// Comparison implementation. 1879 | /// \param x first operand 1880 | /// \param y second operand 1881 | /// \retval true if operands equal 1882 | /// \retval false else 1883 | static bool isequal(half x, half y) { return (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)) && !isnan(x); } 1884 | 1885 | /// Comparison implementation. 1886 | /// \param x first operand 1887 | /// \param y second operand 1888 | /// \retval true if operands not equal 1889 | /// \retval false else 1890 | static bool isnotequal(half x, half y) { return (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)) || isnan(x); } 1891 | 1892 | /// Comparison implementation. 1893 | /// \param x first operand 1894 | /// \param y second operand 1895 | /// \retval true if \a x > \a y 1896 | /// \retval false else 1897 | static bool isgreater(half x, half y) 1898 | { 1899 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 1900 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)); 1901 | } 1902 | 1903 | /// Comparison implementation. 1904 | /// \param x first operand 1905 | /// \param y second operand 1906 | /// \retval true if \a x >= \a y 1907 | /// \retval false else 1908 | static bool isgreaterequal(half x, half y) 1909 | { 1910 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 1911 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) >= ((yabs==y.data_) ? yabs : -yabs)); 1912 | } 1913 | 1914 | /// Comparison implementation. 1915 | /// \param x first operand 1916 | /// \param y second operand 1917 | /// \retval true if \a x < \a y 1918 | /// \retval false else 1919 | static bool isless(half x, half y) 1920 | { 1921 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 1922 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)); 1923 | } 1924 | 1925 | /// Comparison implementation. 1926 | /// \param x first operand 1927 | /// \param y second operand 1928 | /// \retval true if \a x <= \a y 1929 | /// \retval false else 1930 | static bool islessequal(half x, half y) 1931 | { 1932 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 1933 | return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) <= ((yabs==y.data_) ? yabs : -yabs)); 1934 | } 1935 | 1936 | /// Comparison implementation. 1937 | /// \param x first operand 1938 | /// \param y second operand 1939 | /// \retval true if either \a x > \a y nor \a x < \a y 1940 | /// \retval false else 1941 | static bool islessgreater(half x, half y) 1942 | { 1943 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 1944 | if(xabs > 0x7C00 || yabs > 0x7C00) 1945 | return false; 1946 | int a = (xabs==x.data_) ? xabs : -xabs, b = (yabs==y.data_) ? yabs : -yabs; 1947 | return a < b || a > b; 1948 | } 1949 | 1950 | /// Comparison implementation. 1951 | /// \param x first operand 1952 | /// \param y second operand 1953 | /// \retval true if operand unordered 1954 | /// \retval false else 1955 | static bool isunordered(half x, half y) { return isnan(x) || isnan(y); } 1956 | 1957 | private: 1958 | static double erf(double arg) 1959 | { 1960 | if(builtin_isinf(arg)) 1961 | return (arg<0.0) ? -1.0 : 1.0; 1962 | double x2 = arg * arg, ax2 = 0.147 * x2, value = std::sqrt(1.0-std::exp(-x2*(1.2732395447351626861510701069801+ax2)/(1.0+ax2))); 1963 | return builtin_signbit(arg) ? -value : value; 1964 | } 1965 | 1966 | static double lgamma(double arg) 1967 | { 1968 | double v = 1.0; 1969 | for(; arg<8.0; ++arg) v *= arg; 1970 | double w = 1.0 / (arg*arg); 1971 | return (((((((-0.02955065359477124183006535947712*w+0.00641025641025641025641025641026)*w+ 1972 | -0.00191752691752691752691752691753)*w+8.4175084175084175084175084175084e-4)*w+ 1973 | -5.952380952380952380952380952381e-4)*w+7.9365079365079365079365079365079e-4)*w+ 1974 | -0.00277777777777777777777777777778)*w+0.08333333333333333333333333333333)/arg + 1975 | 0.91893853320467274178032973640562 - std::log(v) - arg + (arg-0.5) * std::log(arg); 1976 | } 1977 | }; 1978 | 1979 | /// Wrapper for unary half-precision functions needing specialization for individual argument types. 1980 | /// \tparam T argument type 1981 | template struct unary_specialized 1982 | { 1983 | /// Negation implementation. 1984 | /// \param arg value to negate 1985 | /// \return negated value 1986 | static HALF_CONSTEXPR half negate(half arg) { return half(binary, arg.data_^0x8000); } 1987 | 1988 | /// Absolute value implementation. 1989 | /// \param arg function argument 1990 | /// \return absolute value 1991 | static half fabs(half arg) { return half(binary, arg.data_&0x7FFF); } 1992 | }; 1993 | template<> struct unary_specialized 1994 | { 1995 | static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); } 1996 | static expr fabs(float arg) { return expr(std::fabs(arg)); } 1997 | }; 1998 | 1999 | /// Wrapper for binary half-precision functions needing specialization for individual argument types. 2000 | /// \tparam T first argument type 2001 | /// \tparam U first argument type 2002 | template struct binary_specialized 2003 | { 2004 | /// Minimum implementation. 2005 | /// \param x first operand 2006 | /// \param y second operand 2007 | /// \return minimum value 2008 | static expr fmin(float x, float y) 2009 | { 2010 | #if HALF_ENABLE_CPP11_CMATH 2011 | return expr(std::fmin(x, y)); 2012 | #else 2013 | if(builtin_isnan(x)) 2014 | return expr(y); 2015 | if(builtin_isnan(y)) 2016 | return expr(x); 2017 | return expr(std::min(x, y)); 2018 | #endif 2019 | } 2020 | 2021 | /// Maximum implementation. 2022 | /// \param x first operand 2023 | /// \param y second operand 2024 | /// \return maximum value 2025 | static expr fmax(float x, float y) 2026 | { 2027 | #if HALF_ENABLE_CPP11_CMATH 2028 | return expr(std::fmax(x, y)); 2029 | #else 2030 | if(builtin_isnan(x)) 2031 | return expr(y); 2032 | if(builtin_isnan(y)) 2033 | return expr(x); 2034 | return expr(std::max(x, y)); 2035 | #endif 2036 | } 2037 | }; 2038 | template<> struct binary_specialized 2039 | { 2040 | static half fmin(half x, half y) 2041 | { 2042 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 2043 | if(xabs > 0x7C00) 2044 | return y; 2045 | if(yabs > 0x7C00) 2046 | return x; 2047 | return (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)) ? y : x; 2048 | } 2049 | static half fmax(half x, half y) 2050 | { 2051 | int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; 2052 | if(xabs > 0x7C00) 2053 | return y; 2054 | if(yabs > 0x7C00) 2055 | return x; 2056 | return (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)) ? y : x; 2057 | } 2058 | }; 2059 | 2060 | /// Helper class for half casts. 2061 | /// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member 2062 | /// function and a corresponding `type` member denoting its return type. 2063 | /// \tparam T destination type 2064 | /// \tparam U source type 2065 | /// \tparam R rounding mode to use 2066 | template struct half_caster {}; 2067 | template struct half_caster 2068 | { 2069 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS 2070 | static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); 2071 | #endif 2072 | 2073 | static half cast(U arg) { return cast_impl(arg, is_float()); }; 2074 | 2075 | private: 2076 | static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } 2077 | static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } 2078 | }; 2079 | template struct half_caster 2080 | { 2081 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS 2082 | static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); 2083 | #endif 2084 | 2085 | static T cast(half arg) { return cast_impl(arg, is_float()); } 2086 | 2087 | private: 2088 | static T cast_impl(half arg, true_type) { return half2float(arg.data_); } 2089 | static T cast_impl(half arg, false_type) { return half2int(arg.data_); } 2090 | }; 2091 | template struct half_caster 2092 | { 2093 | #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS 2094 | static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); 2095 | #endif 2096 | 2097 | static T cast(expr arg) { return cast_impl(arg, is_float()); } 2098 | 2099 | private: 2100 | static T cast_impl(float arg, true_type) { return static_cast(arg); } 2101 | static T cast_impl(half arg, false_type) { return half2int(arg.data_); } 2102 | }; 2103 | template struct half_caster 2104 | { 2105 | static half cast(half arg) { return arg; } 2106 | }; 2107 | template struct half_caster : half_caster {}; 2108 | 2109 | /// \name Comparison operators 2110 | /// \{ 2111 | 2112 | /// Comparison for equality. 2113 | /// \param x first operand 2114 | /// \param y second operand 2115 | /// \retval true if operands equal 2116 | /// \retval false else 2117 | template typename enable::type operator==(T x, U y) { return functions::isequal(x, y); } 2118 | 2119 | /// Comparison for inequality. 2120 | /// \param x first operand 2121 | /// \param y second operand 2122 | /// \retval true if operands not equal 2123 | /// \retval false else 2124 | template typename enable::type operator!=(T x, U y) { return functions::isnotequal(x, y); } 2125 | 2126 | /// Comparison for less than. 2127 | /// \param x first operand 2128 | /// \param y second operand 2129 | /// \retval true if \a x less than \a y 2130 | /// \retval false else 2131 | template typename enable::type operator<(T x, U y) { return functions::isless(x, y); } 2132 | 2133 | /// Comparison for greater than. 2134 | /// \param x first operand 2135 | /// \param y second operand 2136 | /// \retval true if \a x greater than \a y 2137 | /// \retval false else 2138 | template typename enable::type operator>(T x, U y) { return functions::isgreater(x, y); } 2139 | 2140 | /// Comparison for less equal. 2141 | /// \param x first operand 2142 | /// \param y second operand 2143 | /// \retval true if \a x less equal \a y 2144 | /// \retval false else 2145 | template typename enable::type operator<=(T x, U y) { return functions::islessequal(x, y); } 2146 | 2147 | /// Comparison for greater equal. 2148 | /// \param x first operand 2149 | /// \param y second operand 2150 | /// \retval true if \a x greater equal \a y 2151 | /// \retval false else 2152 | template typename enable::type operator>=(T x, U y) { return functions::isgreaterequal(x, y); } 2153 | 2154 | /// \} 2155 | /// \name Arithmetic operators 2156 | /// \{ 2157 | 2158 | /// Add halfs. 2159 | /// \param x left operand 2160 | /// \param y right operand 2161 | /// \return sum of half expressions 2162 | template typename enable::type operator+(T x, U y) { return functions::plus(x, y); } 2163 | 2164 | /// Subtract halfs. 2165 | /// \param x left operand 2166 | /// \param y right operand 2167 | /// \return difference of half expressions 2168 | template typename enable::type operator-(T x, U y) { return functions::minus(x, y); } 2169 | 2170 | /// Multiply halfs. 2171 | /// \param x left operand 2172 | /// \param y right operand 2173 | /// \return product of half expressions 2174 | template typename enable::type operator*(T x, U y) { return functions::multiplies(x, y); } 2175 | 2176 | /// Divide halfs. 2177 | /// \param x left operand 2178 | /// \param y right operand 2179 | /// \return quotient of half expressions 2180 | template typename enable::type operator/(T x, U y) { return functions::divides(x, y); } 2181 | 2182 | /// Identity. 2183 | /// \param arg operand 2184 | /// \return uncahnged operand 2185 | template HALF_CONSTEXPR typename enable::type operator+(T arg) { return arg; } 2186 | 2187 | /// Negation. 2188 | /// \param arg operand 2189 | /// \return negated operand 2190 | template HALF_CONSTEXPR typename enable::type operator-(T arg) { return unary_specialized::negate(arg); } 2191 | 2192 | /// \} 2193 | /// \name Input and output 2194 | /// \{ 2195 | 2196 | /// Output operator. 2197 | /// \param out output stream to write into 2198 | /// \param arg half expression to write 2199 | /// \return reference to output stream 2200 | template typename enable&,T>::type 2201 | operator<<(std::basic_ostream &out, T arg) { return functions::write(out, arg); } 2202 | 2203 | /// Input operator. 2204 | /// \param in input stream to read from 2205 | /// \param arg half to read into 2206 | /// \return reference to input stream 2207 | template std::basic_istream& 2208 | operator>>(std::basic_istream &in, half &arg) { return functions::read(in, arg); } 2209 | 2210 | /// \} 2211 | /// \name Basic mathematical operations 2212 | /// \{ 2213 | 2214 | /// Absolute value. 2215 | /// \param arg operand 2216 | /// \return absolute value of \a arg 2217 | // template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } 2218 | inline half abs(half arg) { return unary_specialized::fabs(arg); } 2219 | inline expr abs(expr arg) { return unary_specialized::fabs(arg); } 2220 | 2221 | /// Absolute value. 2222 | /// \param arg operand 2223 | /// \return absolute value of \a arg 2224 | // template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } 2225 | inline half fabs(half arg) { return unary_specialized::fabs(arg); } 2226 | inline expr fabs(expr arg) { return unary_specialized::fabs(arg); } 2227 | 2228 | /// Remainder of division. 2229 | /// \param x first operand 2230 | /// \param y second operand 2231 | /// \return remainder of floating point division. 2232 | // template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } 2233 | inline expr fmod(half x, half y) { return functions::fmod(x, y); } 2234 | inline expr fmod(half x, expr y) { return functions::fmod(x, y); } 2235 | inline expr fmod(expr x, half y) { return functions::fmod(x, y); } 2236 | inline expr fmod(expr x, expr y) { return functions::fmod(x, y); } 2237 | 2238 | /// Remainder of division. 2239 | /// \param x first operand 2240 | /// \param y second operand 2241 | /// \return remainder of floating point division. 2242 | // template typename enable::type remainder(T x, U y) { return functions::remainder(x, y); } 2243 | inline expr remainder(half x, half y) { return functions::remainder(x, y); } 2244 | inline expr remainder(half x, expr y) { return functions::remainder(x, y); } 2245 | inline expr remainder(expr x, half y) { return functions::remainder(x, y); } 2246 | inline expr remainder(expr x, expr y) { return functions::remainder(x, y); } 2247 | 2248 | /// Remainder of division. 2249 | /// \param x first operand 2250 | /// \param y second operand 2251 | /// \param quo address to store some bits of quotient at 2252 | /// \return remainder of floating point division. 2253 | // template typename enable::type remquo(T x, U y, int *quo) { return functions::remquo(x, y, quo); } 2254 | inline expr remquo(half x, half y, int *quo) { return functions::remquo(x, y, quo); } 2255 | inline expr remquo(half x, expr y, int *quo) { return functions::remquo(x, y, quo); } 2256 | inline expr remquo(expr x, half y, int *quo) { return functions::remquo(x, y, quo); } 2257 | inline expr remquo(expr x, expr y, int *quo) { return functions::remquo(x, y, quo); } 2258 | 2259 | /// Fused multiply add. 2260 | /// \param x first operand 2261 | /// \param y second operand 2262 | /// \param z third operand 2263 | /// \return ( \a x * \a y ) + \a z rounded as one operation. 2264 | // template typename enable::type fma(T x, U y, V z) { return functions::fma(x, y, z); } 2265 | inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); } 2266 | inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); } 2267 | inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); } 2268 | inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); } 2269 | inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); } 2270 | inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); } 2271 | inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); } 2272 | inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); } 2273 | 2274 | /// Maximum of half expressions. 2275 | /// \param x first operand 2276 | /// \param y second operand 2277 | /// \return maximum of operands 2278 | // template typename result::type fmax(T x, U y) { return binary_specialized::fmax(x, y); } 2279 | inline half fmax(half x, half y) { return binary_specialized::fmax(x, y); } 2280 | inline expr fmax(half x, expr y) { return binary_specialized::fmax(x, y); } 2281 | inline expr fmax(expr x, half y) { return binary_specialized::fmax(x, y); } 2282 | inline expr fmax(expr x, expr y) { return binary_specialized::fmax(x, y); } 2283 | 2284 | /// Minimum of half expressions. 2285 | /// \param x first operand 2286 | /// \param y second operand 2287 | /// \return minimum of operands 2288 | // template typename result::type fmin(T x, U y) { return binary_specialized::fmin(x, y); } 2289 | inline half fmin(half x, half y) { return binary_specialized::fmin(x, y); } 2290 | inline expr fmin(half x, expr y) { return binary_specialized::fmin(x, y); } 2291 | inline expr fmin(expr x, half y) { return binary_specialized::fmin(x, y); } 2292 | inline expr fmin(expr x, expr y) { return binary_specialized::fmin(x, y); } 2293 | 2294 | /// Positive difference. 2295 | /// \param x first operand 2296 | /// \param y second operand 2297 | /// \return \a x - \a y or 0 if difference negative 2298 | // template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } 2299 | inline expr fdim(half x, half y) { return functions::fdim(x, y); } 2300 | inline expr fdim(half x, expr y) { return functions::fdim(x, y); } 2301 | inline expr fdim(expr x, half y) { return functions::fdim(x, y); } 2302 | inline expr fdim(expr x, expr y) { return functions::fdim(x, y); } 2303 | 2304 | /// Get NaN value. 2305 | /// \return quiet NaN 2306 | inline half nanh(const char*) { return functions::nanh(); } 2307 | 2308 | /// \} 2309 | /// \name Exponential functions 2310 | /// \{ 2311 | 2312 | /// Exponential function. 2313 | /// \param arg function argument 2314 | /// \return e raised to \a arg 2315 | // template typename enable::type exp(T arg) { return functions::exp(arg); } 2316 | inline expr exp(half arg) { return functions::exp(arg); } 2317 | inline expr exp(expr arg) { return functions::exp(arg); } 2318 | 2319 | /// Exponential minus one. 2320 | /// \param arg function argument 2321 | /// \return e raised to \a arg subtracted by 1 2322 | // template typename enable::type expm1(T arg) { return functions::expm1(arg); } 2323 | inline expr expm1(half arg) { return functions::expm1(arg); } 2324 | inline expr expm1(expr arg) { return functions::expm1(arg); } 2325 | 2326 | /// Binary exponential. 2327 | /// \param arg function argument 2328 | /// \return 2 raised to \a arg 2329 | // template typename enable::type exp2(T arg) { return functions::exp2(arg); } 2330 | inline expr exp2(half arg) { return functions::exp2(arg); } 2331 | inline expr exp2(expr arg) { return functions::exp2(arg); } 2332 | 2333 | /// Natural logorithm. 2334 | /// \param arg function argument 2335 | /// \return logarithm of \a arg to base e 2336 | // template typename enable::type log(T arg) { return functions::log(arg); } 2337 | inline expr log(half arg) { return functions::log(arg); } 2338 | inline expr log(expr arg) { return functions::log(arg); } 2339 | 2340 | /// Common logorithm. 2341 | /// \param arg function argument 2342 | /// \return logarithm of \a arg to base 10 2343 | // template typename enable::type log10(T arg) { return functions::log10(arg); } 2344 | inline expr log10(half arg) { return functions::log10(arg); } 2345 | inline expr log10(expr arg) { return functions::log10(arg); } 2346 | 2347 | /// Natural logorithm. 2348 | /// \param arg function argument 2349 | /// \return logarithm of \a arg plus 1 to base e 2350 | // template typename enable::type log1p(T arg) { return functions::log1p(arg); } 2351 | inline expr log1p(half arg) { return functions::log1p(arg); } 2352 | inline expr log1p(expr arg) { return functions::log1p(arg); } 2353 | 2354 | /// Binary logorithm. 2355 | /// \param arg function argument 2356 | /// \return logarithm of \a arg to base 2 2357 | // template typename enable::type log2(T arg) { return functions::log2(arg); } 2358 | inline expr log2(half arg) { return functions::log2(arg); } 2359 | inline expr log2(expr arg) { return functions::log2(arg); } 2360 | 2361 | /// \} 2362 | /// \name Power functions 2363 | /// \{ 2364 | 2365 | /// Square root. 2366 | /// \param arg function argument 2367 | /// \return square root of \a arg 2368 | // template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } 2369 | inline expr sqrt(half arg) { return functions::sqrt(arg); } 2370 | inline expr sqrt(expr arg) { return functions::sqrt(arg); } 2371 | 2372 | /// Cubic root. 2373 | /// \param arg function argument 2374 | /// \return cubic root of \a arg 2375 | // template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } 2376 | inline expr cbrt(half arg) { return functions::cbrt(arg); } 2377 | inline expr cbrt(expr arg) { return functions::cbrt(arg); } 2378 | 2379 | /// Hypotenuse function. 2380 | /// \param x first argument 2381 | /// \param y second argument 2382 | /// \return square root of sum of squares without internal over- or underflows 2383 | // template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); } 2384 | inline expr hypot(half x, half y) { return functions::hypot(x, y); } 2385 | inline expr hypot(half x, expr y) { return functions::hypot(x, y); } 2386 | inline expr hypot(expr x, half y) { return functions::hypot(x, y); } 2387 | inline expr hypot(expr x, expr y) { return functions::hypot(x, y); } 2388 | 2389 | /// Power function. 2390 | /// \param base first argument 2391 | /// \param exp second argument 2392 | /// \return \a base raised to \a exp 2393 | // template typename enable::type pow(T base, U exp) { return functions::pow(base, exp); } 2394 | inline expr pow(half base, half exp) { return functions::pow(base, exp); } 2395 | inline expr pow(half base, expr exp) { return functions::pow(base, exp); } 2396 | inline expr pow(expr base, half exp) { return functions::pow(base, exp); } 2397 | inline expr pow(expr base, expr exp) { return functions::pow(base, exp); } 2398 | 2399 | /// \} 2400 | /// \name Trigonometric functions 2401 | /// \{ 2402 | 2403 | /// Sine function. 2404 | /// \param arg function argument 2405 | /// \return sine value of \a arg 2406 | // template typename enable::type sin(T arg) { return functions::sin(arg); } 2407 | inline expr sin(half arg) { return functions::sin(arg); } 2408 | inline expr sin(expr arg) { return functions::sin(arg); } 2409 | 2410 | /// Cosine function. 2411 | /// \param arg function argument 2412 | /// \return cosine value of \a arg 2413 | // template typename enable::type cos(T arg) { return functions::cos(arg); } 2414 | inline expr cos(half arg) { return functions::cos(arg); } 2415 | inline expr cos(expr arg) { return functions::cos(arg); } 2416 | 2417 | /// Tangent function. 2418 | /// \param arg function argument 2419 | /// \return tangent value of \a arg 2420 | // template typename enable::type tan(T arg) { return functions::tan(arg); } 2421 | inline expr tan(half arg) { return functions::tan(arg); } 2422 | inline expr tan(expr arg) { return functions::tan(arg); } 2423 | 2424 | /// Arc sine. 2425 | /// \param arg function argument 2426 | /// \return arc sine value of \a arg 2427 | // template typename enable::type asin(T arg) { return functions::asin(arg); } 2428 | inline expr asin(half arg) { return functions::asin(arg); } 2429 | inline expr asin(expr arg) { return functions::asin(arg); } 2430 | 2431 | /// Arc cosine function. 2432 | /// \param arg function argument 2433 | /// \return arc cosine value of \a arg 2434 | // template typename enable::type acos(T arg) { return functions::acos(arg); } 2435 | inline expr acos(half arg) { return functions::acos(arg); } 2436 | inline expr acos(expr arg) { return functions::acos(arg); } 2437 | 2438 | /// Arc tangent function. 2439 | /// \param arg function argument 2440 | /// \return arc tangent value of \a arg 2441 | // template typename enable::type atan(T arg) { return functions::atan(arg); } 2442 | inline expr atan(half arg) { return functions::atan(arg); } 2443 | inline expr atan(expr arg) { return functions::atan(arg); } 2444 | 2445 | /// Arc tangent function. 2446 | /// \param x first argument 2447 | /// \param y second argument 2448 | /// \return arc tangent value 2449 | // template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); } 2450 | inline expr atan2(half x, half y) { return functions::atan2(x, y); } 2451 | inline expr atan2(half x, expr y) { return functions::atan2(x, y); } 2452 | inline expr atan2(expr x, half y) { return functions::atan2(x, y); } 2453 | inline expr atan2(expr x, expr y) { return functions::atan2(x, y); } 2454 | 2455 | /// \} 2456 | /// \name Hyperbolic functions 2457 | /// \{ 2458 | 2459 | /// Hyperbolic sine. 2460 | /// \param arg function argument 2461 | /// \return hyperbolic sine value of \a arg 2462 | // template typename enable::type sinh(T arg) { return functions::sinh(arg); } 2463 | inline expr sinh(half arg) { return functions::sinh(arg); } 2464 | inline expr sinh(expr arg) { return functions::sinh(arg); } 2465 | 2466 | /// Hyperbolic cosine. 2467 | /// \param arg function argument 2468 | /// \return hyperbolic cosine value of \a arg 2469 | // template typename enable::type cosh(T arg) { return functions::cosh(arg); } 2470 | inline expr cosh(half arg) { return functions::cosh(arg); } 2471 | inline expr cosh(expr arg) { return functions::cosh(arg); } 2472 | 2473 | /// Hyperbolic tangent. 2474 | /// \param arg function argument 2475 | /// \return hyperbolic tangent value of \a arg 2476 | // template typename enable::type tanh(T arg) { return functions::tanh(arg); } 2477 | inline expr tanh(half arg) { return functions::tanh(arg); } 2478 | inline expr tanh(expr arg) { return functions::tanh(arg); } 2479 | 2480 | /// Hyperbolic area sine. 2481 | /// \param arg function argument 2482 | /// \return area sine value of \a arg 2483 | // template typename enable::type asinh(T arg) { return functions::asinh(arg); } 2484 | inline expr asinh(half arg) { return functions::asinh(arg); } 2485 | inline expr asinh(expr arg) { return functions::asinh(arg); } 2486 | 2487 | /// Hyperbolic area cosine. 2488 | /// \param arg function argument 2489 | /// \return area cosine value of \a arg 2490 | // template typename enable::type acosh(T arg) { return functions::acosh(arg); } 2491 | inline expr acosh(half arg) { return functions::acosh(arg); } 2492 | inline expr acosh(expr arg) { return functions::acosh(arg); } 2493 | 2494 | /// Hyperbolic area tangent. 2495 | /// \param arg function argument 2496 | /// \return area tangent value of \a arg 2497 | // template typename enable::type atanh(T arg) { return functions::atanh(arg); } 2498 | inline expr atanh(half arg) { return functions::atanh(arg); } 2499 | inline expr atanh(expr arg) { return functions::atanh(arg); } 2500 | 2501 | /// \} 2502 | /// \name Error and gamma functions 2503 | /// \{ 2504 | 2505 | /// Error function. 2506 | /// \param arg function argument 2507 | /// \return error function value of \a arg 2508 | // template typename enable::type erf(T arg) { return functions::erf(arg); } 2509 | inline expr erf(half arg) { return functions::erf(arg); } 2510 | inline expr erf(expr arg) { return functions::erf(arg); } 2511 | 2512 | /// Complementary error function. 2513 | /// \param arg function argument 2514 | /// \return 1 minus error function value of \a arg 2515 | // template typename enable::type erfc(T arg) { return functions::erfc(arg); } 2516 | inline expr erfc(half arg) { return functions::erfc(arg); } 2517 | inline expr erfc(expr arg) { return functions::erfc(arg); } 2518 | 2519 | /// Natural logarithm of gamma function. 2520 | /// \param arg function argument 2521 | /// \return natural logarith of gamma function for \a arg 2522 | // template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } 2523 | inline expr lgamma(half arg) { return functions::lgamma(arg); } 2524 | inline expr lgamma(expr arg) { return functions::lgamma(arg); } 2525 | 2526 | /// Gamma function. 2527 | /// \param arg function argument 2528 | /// \return gamma function value of \a arg 2529 | // template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } 2530 | inline expr tgamma(half arg) { return functions::tgamma(arg); } 2531 | inline expr tgamma(expr arg) { return functions::tgamma(arg); } 2532 | 2533 | /// \} 2534 | /// \name Rounding 2535 | /// \{ 2536 | 2537 | /// Nearest integer not less than half value. 2538 | /// \param arg half to round 2539 | /// \return nearest integer not less than \a arg 2540 | // template typename enable::type ceil(T arg) { return functions::ceil(arg); } 2541 | inline half ceil(half arg) { return functions::ceil(arg); } 2542 | inline half ceil(expr arg) { return functions::ceil(arg); } 2543 | 2544 | /// Nearest integer not greater than half value. 2545 | /// \param arg half to round 2546 | /// \return nearest integer not greater than \a arg 2547 | // template typename enable::type floor(T arg) { return functions::floor(arg); } 2548 | inline half floor(half arg) { return functions::floor(arg); } 2549 | inline half floor(expr arg) { return functions::floor(arg); } 2550 | 2551 | /// Nearest integer not greater in magnitude than half value. 2552 | /// \param arg half to round 2553 | /// \return nearest integer not greater in magnitude than \a arg 2554 | // template typename enable::type trunc(T arg) { return functions::trunc(arg); } 2555 | inline half trunc(half arg) { return functions::trunc(arg); } 2556 | inline half trunc(expr arg) { return functions::trunc(arg); } 2557 | 2558 | /// Nearest integer. 2559 | /// \param arg half to round 2560 | /// \return nearest integer, rounded away from zero in half-way cases 2561 | // template typename enable::type round(T arg) { return functions::round(arg); } 2562 | inline half round(half arg) { return functions::round(arg); } 2563 | inline half round(expr arg) { return functions::round(arg); } 2564 | 2565 | /// Nearest integer. 2566 | /// \param arg half to round 2567 | /// \return nearest integer, rounded away from zero in half-way cases 2568 | // template typename enable::type lround(T arg) { return functions::lround(arg); } 2569 | inline long lround(half arg) { return functions::lround(arg); } 2570 | inline long lround(expr arg) { return functions::lround(arg); } 2571 | 2572 | /// Nearest integer using half's internal rounding mode. 2573 | /// \param arg half expression to round 2574 | /// \return nearest integer using default rounding mode 2575 | // template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } 2576 | inline half nearbyint(half arg) { return functions::rint(arg); } 2577 | inline half nearbyint(expr arg) { return functions::rint(arg); } 2578 | 2579 | /// Nearest integer using half's internal rounding mode. 2580 | /// \param arg half expression to round 2581 | /// \return nearest integer using default rounding mode 2582 | // template typename enable::type rint(T arg) { return functions::rint(arg); } 2583 | inline half rint(half arg) { return functions::rint(arg); } 2584 | inline half rint(expr arg) { return functions::rint(arg); } 2585 | 2586 | /// Nearest integer using half's internal rounding mode. 2587 | /// \param arg half expression to round 2588 | /// \return nearest integer using default rounding mode 2589 | // template typename enable::type lrint(T arg) { return functions::lrint(arg); } 2590 | inline long lrint(half arg) { return functions::lrint(arg); } 2591 | inline long lrint(expr arg) { return functions::lrint(arg); } 2592 | #if HALF_ENABLE_CPP11_LONG_LONG 2593 | /// Nearest integer. 2594 | /// \param arg half to round 2595 | /// \return nearest integer, rounded away from zero in half-way cases 2596 | // template typename enable::type llround(T arg) { return functions::llround(arg); } 2597 | inline long long llround(half arg) { return functions::llround(arg); } 2598 | inline long long llround(expr arg) { return functions::llround(arg); } 2599 | 2600 | /// Nearest integer using half's internal rounding mode. 2601 | /// \param arg half expression to round 2602 | /// \return nearest integer using default rounding mode 2603 | // template typename enable::type llrint(T arg) { return functions::llrint(arg); } 2604 | inline long long llrint(half arg) { return functions::llrint(arg); } 2605 | inline long long llrint(expr arg) { return functions::llrint(arg); } 2606 | #endif 2607 | 2608 | /// \} 2609 | /// \name Floating point manipulation 2610 | /// \{ 2611 | 2612 | /// Decompress floating point number. 2613 | /// \param arg number to decompress 2614 | /// \param exp address to store exponent at 2615 | /// \return significant in range [0.5, 1) 2616 | // template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } 2617 | inline half frexp(half arg, int *exp) { return functions::frexp(arg, exp); } 2618 | inline half frexp(expr arg, int *exp) { return functions::frexp(arg, exp); } 2619 | 2620 | /// Multiply by power of two. 2621 | /// \param arg number to modify 2622 | /// \param exp power of two to multiply with 2623 | /// \return \a arg multplied by 2 raised to \a exp 2624 | // template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); } 2625 | inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); } 2626 | inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); } 2627 | 2628 | /// Extract integer and fractional parts. 2629 | /// \param arg number to decompress 2630 | /// \param iptr address to store integer part at 2631 | /// \return fractional part 2632 | // template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); } 2633 | inline half modf(half arg, half *iptr) { return functions::modf(arg, iptr); } 2634 | inline half modf(expr arg, half *iptr) { return functions::modf(arg, iptr); } 2635 | 2636 | /// Multiply by power of two. 2637 | /// \param arg number to modify 2638 | /// \param exp power of two to multiply with 2639 | /// \return \a arg multplied by 2 raised to \a exp 2640 | // template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); } 2641 | inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); } 2642 | inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); } 2643 | 2644 | /// Multiply by power of two. 2645 | /// \param arg number to modify 2646 | /// \param exp power of two to multiply with 2647 | /// \return \a arg multplied by 2 raised to \a exp 2648 | // template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); } 2649 | inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); } 2650 | inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); } 2651 | 2652 | /// Extract exponent. 2653 | /// \param arg number to query 2654 | /// \return floating point exponent 2655 | /// \retval FP_ILOGB0 for zero 2656 | /// \retval FP_ILOGBNAN for NaN 2657 | /// \retval MAX_INT for infinity 2658 | // template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } 2659 | inline int ilogb(half arg) { return functions::ilogb(arg); } 2660 | inline int ilogb(expr arg) { return functions::ilogb(arg); } 2661 | 2662 | /// Extract exponent. 2663 | /// \param arg number to query 2664 | /// \return floating point exponent 2665 | // template typename enable::type logb(T arg) { return functions::logb(arg); } 2666 | inline half logb(half arg) { return functions::logb(arg); } 2667 | inline half logb(expr arg) { return functions::logb(arg); } 2668 | 2669 | /// Next representable value. 2670 | /// \param from value to compute next representable value for 2671 | /// \param to direction towards which to compute next value 2672 | /// \return next representable value after \a from in direction towards \a to 2673 | // template typename enable::type nextafter(T from, U to) { return functions::nextafter(from, to); } 2674 | inline half nextafter(half from, half to) { return functions::nextafter(from, to); } 2675 | inline half nextafter(half from, expr to) { return functions::nextafter(from, to); } 2676 | inline half nextafter(expr from, half to) { return functions::nextafter(from, to); } 2677 | inline half nextafter(expr from, expr to) { return functions::nextafter(from, to); } 2678 | 2679 | /// Next representable value. 2680 | /// \param from value to compute next representable value for 2681 | /// \param to direction towards which to compute next value 2682 | /// \return next representable value after \a from in direction towards \a to 2683 | // template typename enable::type nexttoward(T from, long double to) { return functions::nexttoward(from, to); } 2684 | inline half nexttoward(half from, long double to) { return functions::nexttoward(from, to); } 2685 | inline half nexttoward(expr from, long double to) { return functions::nexttoward(from, to); } 2686 | 2687 | /// Take sign. 2688 | /// \param x value to change sign for 2689 | /// \param y value to take sign from 2690 | /// \return value equal to \a x in magnitude and to \a y in sign 2691 | // template typename enable::type copysign(T x, U y) { return functions::copysign(x, y); } 2692 | inline half copysign(half x, half y) { return functions::copysign(x, y); } 2693 | inline half copysign(half x, expr y) { return functions::copysign(x, y); } 2694 | inline half copysign(expr x, half y) { return functions::copysign(x, y); } 2695 | inline half copysign(expr x, expr y) { return functions::copysign(x, y); } 2696 | 2697 | /// \} 2698 | /// \name Floating point classification 2699 | /// \{ 2700 | 2701 | 2702 | /// Classify floating point value. 2703 | /// \param arg number to classify 2704 | /// \retval FP_ZERO for positive and negative zero 2705 | /// \retval FP_SUBNORMAL for subnormal numbers 2706 | /// \retval FP_INFINITY for positive and negative infinity 2707 | /// \retval FP_NAN for NaNs 2708 | /// \retval FP_NORMAL for all other (normal) values 2709 | // template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } 2710 | inline int fpclassify(half arg) { return functions::fpclassify(arg); } 2711 | inline int fpclassify(expr arg) { return functions::fpclassify(arg); } 2712 | 2713 | /// Check if finite number. 2714 | /// \param arg number to check 2715 | /// \retval true if neither infinity nor NaN 2716 | /// \retval false else 2717 | // template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } 2718 | inline bool isfinite(half arg) { return functions::isfinite(arg); } 2719 | inline bool isfinite(expr arg) { return functions::isfinite(arg); } 2720 | 2721 | /// Check for infinity. 2722 | /// \param arg number to check 2723 | /// \retval true for positive or negative infinity 2724 | /// \retval false else 2725 | // template typename enable::type isinf(T arg) { return functions::isinf(arg); } 2726 | inline bool isinf(half arg) { return functions::isinf(arg); } 2727 | inline bool isinf(expr arg) { return functions::isinf(arg); } 2728 | 2729 | /// Check for NaN. 2730 | /// \param arg number to check 2731 | /// \retval true for NaNs 2732 | /// \retval false else 2733 | // template typename enable::type isnan(T arg) { return functions::isnan(arg); } 2734 | inline bool isnan(half arg) { return functions::isnan(arg); } 2735 | inline bool isnan(expr arg) { return functions::isnan(arg); } 2736 | 2737 | /// Check if normal number. 2738 | /// \param arg number to check 2739 | /// \retval true if normal number 2740 | /// \retval false if either subnormal, zero, infinity or NaN 2741 | // template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } 2742 | inline bool isnormal(half arg) { return functions::isnormal(arg); } 2743 | inline bool isnormal(expr arg) { return functions::isnormal(arg); } 2744 | 2745 | /// Check sign. 2746 | /// \param arg number to check 2747 | /// \retval true for negative number 2748 | /// \retval false for positive number 2749 | // template typename enable::type signbit(T arg) { return functions::signbit(arg); } 2750 | inline bool signbit(half arg) { return functions::signbit(arg); } 2751 | inline bool signbit(expr arg) { return functions::signbit(arg); } 2752 | 2753 | /// \} 2754 | /// \name Comparison 2755 | /// \{ 2756 | 2757 | /// Comparison for greater than. 2758 | /// \param x first operand 2759 | /// \param y second operand 2760 | /// \retval true if \a x greater than \a y 2761 | /// \retval false else 2762 | // template typename enable::type isgreater(T x, U y) { return functions::isgreater(x, y); } 2763 | inline bool isgreater(half x, half y) { return functions::isgreater(x, y); } 2764 | inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); } 2765 | inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); } 2766 | inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); } 2767 | 2768 | /// Comparison for greater equal. 2769 | /// \param x first operand 2770 | /// \param y second operand 2771 | /// \retval true if \a x greater equal \a y 2772 | /// \retval false else 2773 | // template typename enable::type isgreaterequal(T x, U y) { return functions::isgreaterequal(x, y); } 2774 | inline bool isgreaterequal(half x, half y) { return functions::isgreaterequal(x, y); } 2775 | inline bool isgreaterequal(half x, expr y) { return functions::isgreaterequal(x, y); } 2776 | inline bool isgreaterequal(expr x, half y) { return functions::isgreaterequal(x, y); } 2777 | inline bool isgreaterequal(expr x, expr y) { return functions::isgreaterequal(x, y); } 2778 | 2779 | /// Comparison for less than. 2780 | /// \param x first operand 2781 | /// \param y second operand 2782 | /// \retval true if \a x less than \a y 2783 | /// \retval false else 2784 | // template typename enable::type isless(T x, U y) { return functions::isless(x, y); } 2785 | inline bool isless(half x, half y) { return functions::isless(x, y); } 2786 | inline bool isless(half x, expr y) { return functions::isless(x, y); } 2787 | inline bool isless(expr x, half y) { return functions::isless(x, y); } 2788 | inline bool isless(expr x, expr y) { return functions::isless(x, y); } 2789 | 2790 | /// Comparison for less equal. 2791 | /// \param x first operand 2792 | /// \param y second operand 2793 | /// \retval true if \a x less equal \a y 2794 | /// \retval false else 2795 | // template typename enable::type islessequal(T x, U y) { return functions::islessequal(x, y); } 2796 | inline bool islessequal(half x, half y) { return functions::islessequal(x, y); } 2797 | inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); } 2798 | inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); } 2799 | inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); } 2800 | 2801 | /// Comarison for less or greater. 2802 | /// \param x first operand 2803 | /// \param y second operand 2804 | /// \retval true if either less or greater 2805 | /// \retval false else 2806 | // template typename enable::type islessgreater(T x, U y) { return functions::islessgreater(x, y); } 2807 | inline bool islessgreater(half x, half y) { return functions::islessgreater(x, y); } 2808 | inline bool islessgreater(half x, expr y) { return functions::islessgreater(x, y); } 2809 | inline bool islessgreater(expr x, half y) { return functions::islessgreater(x, y); } 2810 | inline bool islessgreater(expr x, expr y) { return functions::islessgreater(x, y); } 2811 | 2812 | /// Check if unordered. 2813 | /// \param x first operand 2814 | /// \param y second operand 2815 | /// \retval true if unordered (one or two NaN operands) 2816 | /// \retval false else 2817 | // template typename enable::type isunordered(T x, U y) { return functions::isunordered(x, y); } 2818 | inline bool isunordered(half x, half y) { return functions::isunordered(x, y); } 2819 | inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); } 2820 | inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); } 2821 | inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); } 2822 | 2823 | /// \name Casting 2824 | /// \{ 2825 | 2826 | /// Cast to or from half-precision floating point number. 2827 | /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 2828 | /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. 2829 | /// It uses the default rounding mode. 2830 | /// 2831 | /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 2832 | /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 2833 | /// error and casting between [half](\ref half_float::half)s is just a no-op. 2834 | /// \tparam T destination type (half or built-in arithmetic type) 2835 | /// \tparam U source type (half or built-in arithmetic type) 2836 | /// \param arg value to cast 2837 | /// \return \a arg converted to destination type 2838 | template T half_cast(U arg) { return half_caster::cast(arg); } 2839 | 2840 | /// Cast to or from half-precision floating point number. 2841 | /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted 2842 | /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. 2843 | /// 2844 | /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types 2845 | /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler 2846 | /// error and casting between [half](\ref half_float::half)s is just a no-op. 2847 | /// \tparam T destination type (half or built-in arithmetic type) 2848 | /// \tparam R rounding mode to use. 2849 | /// \tparam U source type (half or built-in arithmetic type) 2850 | /// \param arg value to cast 2851 | /// \return \a arg converted to destination type 2852 | template T half_cast(U arg) { return half_caster::cast(arg); } 2853 | /// \} 2854 | } 2855 | 2856 | using detail::operator==; 2857 | using detail::operator!=; 2858 | using detail::operator<; 2859 | using detail::operator>; 2860 | using detail::operator<=; 2861 | using detail::operator>=; 2862 | using detail::operator+; 2863 | using detail::operator-; 2864 | using detail::operator*; 2865 | using detail::operator/; 2866 | using detail::operator<<; 2867 | using detail::operator>>; 2868 | 2869 | using detail::abs; 2870 | using detail::fabs; 2871 | using detail::fmod; 2872 | using detail::remainder; 2873 | using detail::remquo; 2874 | using detail::fma; 2875 | using detail::fmax; 2876 | using detail::fmin; 2877 | using detail::fdim; 2878 | using detail::nanh; 2879 | using detail::exp; 2880 | using detail::expm1; 2881 | using detail::exp2; 2882 | using detail::log; 2883 | using detail::log10; 2884 | using detail::log1p; 2885 | using detail::log2; 2886 | using detail::sqrt; 2887 | using detail::cbrt; 2888 | using detail::hypot; 2889 | using detail::pow; 2890 | using detail::sin; 2891 | using detail::cos; 2892 | using detail::tan; 2893 | using detail::asin; 2894 | using detail::acos; 2895 | using detail::atan; 2896 | using detail::atan2; 2897 | using detail::sinh; 2898 | using detail::cosh; 2899 | using detail::tanh; 2900 | using detail::asinh; 2901 | using detail::acosh; 2902 | using detail::atanh; 2903 | using detail::erf; 2904 | using detail::erfc; 2905 | using detail::lgamma; 2906 | using detail::tgamma; 2907 | using detail::ceil; 2908 | using detail::floor; 2909 | using detail::trunc; 2910 | using detail::round; 2911 | using detail::lround; 2912 | using detail::nearbyint; 2913 | using detail::rint; 2914 | using detail::lrint; 2915 | #if HALF_ENABLE_CPP11_LONG_LONG 2916 | using detail::llround; 2917 | using detail::llrint; 2918 | #endif 2919 | using detail::frexp; 2920 | using detail::ldexp; 2921 | using detail::modf; 2922 | using detail::scalbn; 2923 | using detail::scalbln; 2924 | using detail::ilogb; 2925 | using detail::logb; 2926 | using detail::nextafter; 2927 | using detail::nexttoward; 2928 | using detail::copysign; 2929 | using detail::fpclassify; 2930 | using detail::isfinite; 2931 | using detail::isinf; 2932 | using detail::isnan; 2933 | using detail::isnormal; 2934 | using detail::signbit; 2935 | using detail::isgreater; 2936 | using detail::isgreaterequal; 2937 | using detail::isless; 2938 | using detail::islessequal; 2939 | using detail::islessgreater; 2940 | using detail::isunordered; 2941 | 2942 | using detail::half_cast; 2943 | } 2944 | 2945 | 2946 | /// Extensions to the C++ standard library. 2947 | namespace std 2948 | { 2949 | /// Numeric limits for half-precision floats. 2950 | /// Because of the underlying single-precision implementation of many operations, it inherits some properties from 2951 | /// `std::numeric_limits`. 2952 | template<> class numeric_limits : public numeric_limits 2953 | { 2954 | public: 2955 | /// Supports signed values. 2956 | static HALF_CONSTEXPR_CONST bool is_signed = true; 2957 | 2958 | /// Is not exact. 2959 | static HALF_CONSTEXPR_CONST bool is_exact = false; 2960 | 2961 | /// Doesn't provide modulo arithmetic. 2962 | static HALF_CONSTEXPR_CONST bool is_modulo = false; 2963 | 2964 | /// IEEE conformant. 2965 | static HALF_CONSTEXPR_CONST bool is_iec559 = true; 2966 | 2967 | /// Supports infinity. 2968 | static HALF_CONSTEXPR_CONST bool has_infinity = true; 2969 | 2970 | /// Supports quiet NaNs. 2971 | static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; 2972 | 2973 | /// Supports subnormal values. 2974 | static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; 2975 | 2976 | /// Rounding mode. 2977 | /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying 2978 | /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding 2979 | /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the 2980 | /// single-precision rounding mode. 2981 | static HALF_CONSTEXPR_CONST float_round_style round_style = (std::numeric_limits::round_style== 2982 | half_float::half::round_style) ? half_float::half::round_style : round_indeterminate; 2983 | 2984 | /// Significant digits. 2985 | static HALF_CONSTEXPR_CONST int digits = 11; 2986 | 2987 | /// Significant decimal digits. 2988 | static HALF_CONSTEXPR_CONST int digits10 = 3; 2989 | 2990 | /// Required decimal digits to represent all possible values. 2991 | static HALF_CONSTEXPR_CONST int max_digits10 = 5; 2992 | 2993 | /// Number base. 2994 | static HALF_CONSTEXPR_CONST int radix = 2; 2995 | 2996 | /// One more than smallest exponent. 2997 | static HALF_CONSTEXPR_CONST int min_exponent = -13; 2998 | 2999 | /// Smallest normalized representable power of 10. 3000 | static HALF_CONSTEXPR_CONST int min_exponent10 = -4; 3001 | 3002 | /// One more than largest exponent 3003 | static HALF_CONSTEXPR_CONST int max_exponent = 16; 3004 | 3005 | /// Largest finitely representable power of 10. 3006 | static HALF_CONSTEXPR_CONST int max_exponent10 = 4; 3007 | 3008 | /// Smallest positive normal value. 3009 | static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } 3010 | 3011 | /// Smallest finite value. 3012 | static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } 3013 | 3014 | /// Largest finite value. 3015 | static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } 3016 | 3017 | /// Difference between one and next representable value. 3018 | static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } 3019 | 3020 | /// Maximum rounding error. 3021 | static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW 3022 | { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } 3023 | 3024 | /// Positive infinity. 3025 | static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } 3026 | 3027 | /// Quiet NaN. 3028 | static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } 3029 | 3030 | /// Signalling NaN. 3031 | static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } 3032 | 3033 | /// Smallest positive subnormal value. 3034 | static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } 3035 | }; 3036 | 3037 | #if HALF_ENABLE_CPP11_HASH 3038 | /// Hash function for half-precision floats. 3039 | /// This is only defined if C++11 `std::hash` is supported and enabled. 3040 | template<> struct hash //: unary_function 3041 | { 3042 | /// Type of function argument. 3043 | typedef half_float::half argument_type; 3044 | 3045 | /// Function return type. 3046 | typedef size_t result_type; 3047 | 3048 | /// Compute hash function. 3049 | /// \param arg half to hash 3050 | /// \return hash value 3051 | result_type operator()(argument_type arg) const 3052 | { return hash()(static_cast(arg.data_)&-(arg.data_!=0x8000)); } 3053 | }; 3054 | #endif 3055 | } 3056 | 3057 | 3058 | #undef HALF_CONSTEXPR 3059 | #undef HALF_CONSTEXPR_CONST 3060 | #undef HALF_NOEXCEPT 3061 | #undef HALF_NOTHROW 3062 | #ifdef HALF_POP_WARNINGS 3063 | #pragma warning(pop) 3064 | #undef HALF_POP_WARNINGS 3065 | #endif 3066 | 3067 | #endif 3068 | -------------------------------------------------------------------------------- /src/ofxTextureRecorder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * ofxTextureRecorder.cpp 3 | * 4 | * Created on: Oct 14, 2014 5 | * Author: arturo 6 | */ 7 | 8 | #include "ofxTextureRecorder.h" 9 | 10 | ofxTextureRecorder::~ofxTextureRecorder(){ 11 | if(!encodeThreads.empty()){ 12 | stopThreads(); 13 | } 14 | } 15 | 16 | void ofxTextureRecorder::setup(int w, int h){ 17 | setup(Settings(w,h)); 18 | } 19 | 20 | void ofxTextureRecorder::setup(const ofTexture & tex){ 21 | setup(Settings(tex)); 22 | } 23 | 24 | void ofxTextureRecorder::setup(const ofTextureData & texData){ 25 | setup(Settings(texData)); 26 | } 27 | 28 | void ofxTextureRecorder::setup(const Settings & settings){ 29 | if(!encodeThreads.empty()){ 30 | stopThreads(); 31 | } 32 | width = settings.w; 33 | height = settings.h; 34 | pixelFormat = settings.pixelFormat; 35 | imageFormat = settings.imageFormat; 36 | folderPath = settings.folderPath; 37 | glType = settings.glType; 38 | isVideo = false; 39 | maxMemoryUsage = settings.maxMemoryUsage; 40 | if (!folderPath.empty()) folderPath = ofFilePath::addTrailingSlash(folderPath); 41 | 42 | frame = 0; 43 | firstFrame = true; 44 | auto bufferSize = 0; 45 | switch(glType){ 46 | case GL_UNSIGNED_BYTE: 47 | bufferSize = ofPixels::bytesFromPixelFormat(width,height,pixelFormat); 48 | break; 49 | case GL_SHORT: 50 | case GL_UNSIGNED_SHORT: 51 | bufferSize = ofShortPixels::bytesFromPixelFormat(width,height,pixelFormat); 52 | break; 53 | case GL_FLOAT: 54 | bufferSize = ofFloatPixels::bytesFromPixelFormat(width,height,pixelFormat); 55 | break; 56 | case GL_HALF_FLOAT: 57 | bufferSize = ofFloatPixels::bytesFromPixelFormat(width,height,pixelFormat)/2; 58 | break; 59 | } 60 | 61 | // number of gpu buffers to copy the texture 62 | size_t numGpuBuffers = 2; 63 | for(size_t i=0; i(GL_READ_ONLY); 218 | if(pixels){ 219 | if(isVideo){ 220 | Buffer buffer{front, "", pixels}; 221 | channel.send(buffer); 222 | }else{ 223 | std::ostringstream oss; 224 | oss << folderPath << ofToString(frame_, 5, '0') << "." << ofImageFormatExtension(imageFormat); 225 | Buffer buffer{front, oss.str(), pixels}; 226 | channel.send(buffer); 227 | } 228 | }else{ 229 | ofLogError(__FUNCTION__) << "Couldn't map buffer"; 230 | } 231 | } 232 | buffersCopying.push(back); 233 | }else{ 234 | auto back = buffersReady.front(); 235 | buffersReady.pop(); 236 | auto copyQuery = copyQueryReady.front(); 237 | copyQueryReady.pop(); 238 | glBeginQuery(GL_TIME_ELAPSED, copyQuery); 239 | tex.copyTo(pixelBuffers[back]); 240 | glEndQuery(GL_TIME_ELAPSED); 241 | buffersCopying.push(back); 242 | copyQueryCopying.push(copyQuery); 243 | } 244 | } 245 | 246 | void ofxTextureRecorder::stopThreads(){ 247 | channel.close(); 248 | channelReady.close(); 249 | 250 | switch(glType){ 251 | case GL_UNSIGNED_BYTE: 252 | while(!pixelsChannel.empty()){ 253 | ofSleepMillis(100); 254 | } 255 | pixelsChannel.close(); 256 | break; 257 | case GL_SHORT: 258 | case GL_UNSIGNED_SHORT: 259 | while(!shortPixelsChannel.empty()){ 260 | ofSleepMillis(100); 261 | } 262 | shortPixelsChannel.close(); 263 | break; 264 | case GL_FLOAT: 265 | while(!floatPixelsChannel.empty()){ 266 | ofSleepMillis(100); 267 | } 268 | floatPixelsChannel.close(); 269 | break; 270 | case GL_HALF_FLOAT: 271 | while(!halffloatPixelsChannel.empty()){ 272 | ofSleepMillis(100); 273 | } 274 | halffloatPixelsChannel.close(); 275 | while(!floatPixelsChannel.empty()){ 276 | ofSleepMillis(100); 277 | } 278 | floatPixelsChannel.close(); 279 | for(auto &t: halfDecodingThreads){ 280 | t.join(); 281 | } 282 | break; 283 | } 284 | 285 | while(!encodedChannel.empty()){ 286 | ofSleepMillis(100); 287 | } 288 | encodedChannel.close(); 289 | for(auto &t: encodeThreads){ 290 | t.join(); 291 | } 292 | encodeThreads.clear(); 293 | downloadThread.join(); 294 | saveThread.join(); 295 | #if OFX_VIDEO_RECORDER 296 | if(isVideo){ 297 | videoRecorder.close(); 298 | } 299 | #endif 300 | #if OFX_HPVLIB 301 | if(isHPV){ 302 | hpvCreator.stop(); 303 | hpvProgressThread.join(); 304 | } 305 | #endif 306 | 307 | 308 | } 309 | 310 | void ofxTextureRecorder::createThreads(size_t numThreads){ 311 | for(size_t i=0; i pixels; 334 | pixels.resize(size); 335 | returnHalfFloatPixelsChannel.send(std::move(pixels)); 336 | halfFloatPoolSize += 1; 337 | } 338 | } 339 | } 340 | 341 | downloadThread = std::thread([&]{ 342 | Buffer buffer; 343 | size_t frameNum = 0; 344 | switch(glType){ 345 | case GL_UNSIGNED_BYTE:{ 346 | while(channel.receive(buffer)){ 347 | auto then = ofGetElapsedTimeMicros(); 348 | ofPixels pixels = getBuffer(); 349 | pixels.setFromPixels((unsigned char*)buffer.data, width, height, pixelFormat); 350 | channelReady.send(buffer.id); 351 | auto now = ofGetElapsedTimeMicros(); 352 | timeDownload = timeDownload * 0.9 + (now - then) * 0.1; 353 | pixelsChannel.send({frameNum++, buffer.path, std::move(pixels)}); 354 | } 355 | }break; 356 | case GL_SHORT: 357 | case GL_UNSIGNED_SHORT:{ 358 | while(channel.receive(buffer)){ 359 | auto then = ofGetElapsedTimeMicros(); 360 | ofShortPixels pixels = getShortBuffer(); 361 | pixels.setFromPixels((unsigned short*)buffer.data, width, height, pixelFormat); 362 | channelReady.send(buffer.id); 363 | auto now = ofGetElapsedTimeMicros(); 364 | timeDownload = timeDownload * 0.9 + (now - then) * 0.1; 365 | shortPixelsChannel.send({frameNum++, buffer.path, std::move(pixels)}); 366 | } 367 | }break; 368 | case GL_FLOAT:{ 369 | while(channel.receive(buffer)){ 370 | auto then = ofGetElapsedTimeMicros(); 371 | ofFloatPixels pixels = getFloatBuffer(); 372 | pixels.setFromPixels((float*)buffer.data, width, height, pixelFormat); 373 | channelReady.send(buffer.id); 374 | auto now = ofGetElapsedTimeMicros(); 375 | timeDownload = timeDownload * 0.9 + (now - then) * 0.1; 376 | floatPixelsChannel.send({frameNum++, buffer.path, std::move(pixels)}); 377 | } 378 | }break; 379 | case GL_HALF_FLOAT:{ 380 | while(channel.receive(buffer)){ 381 | auto then = ofGetElapsedTimeMicros(); 382 | std::vector halfdata = getHalfFloatBuffer(); 383 | auto halfptr = (half_float::half*)buffer.data; 384 | memcpy(halfdata.data(), halfptr, size * sizeof(half_float::half)); 385 | channelReady.send(buffer.id); 386 | auto now = ofGetElapsedTimeMicros(); 387 | timeDownload = timeDownload * 0.9 + (now - then) * 0.1; 388 | halffloatPixelsChannel.send({frameNum++, buffer.path, std::move(halfdata)}); 389 | } 390 | }break; 391 | } 392 | }); 393 | 394 | 395 | #if OFX_HPVLIB 396 | if(isHPV){ 397 | hpvProgressThread = std::thread([this]{ 398 | while(true){ 399 | HPV::HPVCompressionProgress progress; 400 | hpvProgress.wait_and_pop(progress); 401 | if(progress.state == HPV_CREATOR_STATE_ERROR){ 402 | std::cerr << "error: " << progress.done_item_name << std::endl; 403 | return 1; 404 | }else if(progress.state == HPV_CREATOR_STATE_DONE){ 405 | std::cout << "done compressing: " << progress.done_item_name << std::endl; 406 | return 0; 407 | }else{ 408 | std::cout << progress.done_items << " / " << progress.total_items << " " 409 | << std::filesystem::path(progress.done_item_name).filename() << " " 410 | << progress.compression_ratio << "%" <> halfdata; 421 | while(halffloatPixelsChannel.receive(halfdata)){ 422 | auto then = ofGetElapsedTimeMicros(); 423 | ofFloatPixels pixels = getFloatBuffer(); 424 | auto halfptr = halfdata.pixels.data(); 425 | for(auto & p: pixels){ 426 | p = *halfptr++; 427 | } 428 | returnHalfFloatPixelsChannel.send(std::move(halfdata.pixels)); 429 | auto now = ofGetElapsedTimeMicros(); 430 | halfDecodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 431 | floatPixelsChannel.send({halfdata.id, halfdata.path, std::move(pixels)}); 432 | } 433 | })); 434 | } 435 | } 436 | 437 | 438 | ofLogNotice(__FUNCTION__) << "Initializing with " << numThreads << " encoding threads"; 439 | for(size_t i=0;i frame; 448 | while(pixelsChannel.receive(frame)){ 449 | auto then = ofGetElapsedTimeMicros(); 450 | rgb8Pixels = frame.pixels; 451 | rgb8Pixels.setNumChannels(3); 452 | videoRecorder.addFrame(rgb8Pixels); 453 | returnPixelsChannel.send(std::move(frame.pixels)); 454 | auto now = ofGetElapsedTimeMicros(); 455 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 456 | } 457 | }break; 458 | case GL_SHORT: 459 | case GL_UNSIGNED_SHORT:{ 460 | Frame frame; 461 | while(shortPixelsChannel.receive(frame)){ 462 | auto then = ofGetElapsedTimeMicros(); 463 | rgb8Pixels = frame.pixels; 464 | rgb8Pixels.setNumChannels(3); 465 | videoRecorder.addFrame(rgb8Pixels); 466 | returnShortPixelsChannel.send(std::move(frame.pixels)); 467 | auto now = ofGetElapsedTimeMicros(); 468 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 469 | } 470 | }break; 471 | case GL_FLOAT: 472 | case GL_HALF_FLOAT:{ 473 | Frame frame; 474 | while(floatPixelsChannel.receive(frame)){ 475 | auto then = ofGetElapsedTimeMicros(); 476 | rgb8Pixels =frame.pixels; 477 | rgb8Pixels.setNumChannels(3); 478 | videoRecorder.addFrame(rgb8Pixels); 479 | returnFloatPixelsChannel.send(std::move(frame.pixels)); 480 | auto now = ofGetElapsedTimeMicros(); 481 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 482 | } 483 | }break; 484 | } 485 | #endif 486 | 487 | }else if(isHPV){ 488 | 489 | #if OFX_HPVLIB 490 | ofPixels rgb8Pixels; 491 | HPV::HPVCompressionWorkItem item; 492 | switch(glType){ 493 | case GL_UNSIGNED_BYTE:{ 494 | Frame frame; 495 | while(pixelsChannel.receive(frame)){ 496 | auto then = ofGetElapsedTimeMicros(); 497 | rgb8Pixels = frame.pixels; 498 | rgb8Pixels.setNumChannels(4); 499 | item.offset = frame.id; 500 | item.path = frame.path; 501 | hpvCreator.process_frame(rgb8Pixels.getData(), item); 502 | returnPixelsChannel.send(std::move(frame.pixels)); 503 | auto now = ofGetElapsedTimeMicros(); 504 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 505 | } 506 | }break; 507 | case GL_SHORT: 508 | case GL_UNSIGNED_SHORT:{ 509 | Frame frame; 510 | while(shortPixelsChannel.receive(frame)){ 511 | auto then = ofGetElapsedTimeMicros(); 512 | rgb8Pixels = frame.pixels; 513 | rgb8Pixels.setNumChannels(4); 514 | item.offset = frame.id; 515 | item.path = frame.path; 516 | hpvCreator.process_frame(rgb8Pixels.getData(), item); 517 | returnShortPixelsChannel.send(std::move(frame.pixels)); 518 | auto now = ofGetElapsedTimeMicros(); 519 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 520 | } 521 | }break; 522 | case GL_FLOAT: 523 | case GL_HALF_FLOAT:{ 524 | Frame frame; 525 | while(floatPixelsChannel.receive(frame)){ 526 | auto then = ofGetElapsedTimeMicros(); 527 | rgb8Pixels = frame.pixels; 528 | rgb8Pixels.setNumChannels(4); 529 | item.offset = frame.id; 530 | item.path = frame.path; 531 | hpvCreator.process_frame(rgb8Pixels.getData(), item); 532 | returnFloatPixelsChannel.send(std::move(frame.pixels)); 533 | auto now = ofGetElapsedTimeMicros(); 534 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 535 | } 536 | }break; 537 | } 538 | #endif 539 | }else{ 540 | switch(glType){ 541 | case GL_UNSIGNED_BYTE:{ 542 | Frame frame; 543 | ofBuffer buffer; 544 | while(pixelsChannel.receive(frame)){ 545 | auto then = ofGetElapsedTimeMicros(); 546 | buffer.clear(); 547 | ofSaveImage(frame.pixels, buffer, imageFormat, OF_IMAGE_QUALITY_BEST); 548 | returnPixelsChannel.send(std::move(frame.pixels)); 549 | auto now = ofGetElapsedTimeMicros(); 550 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 551 | encodedChannel.send(std::make_pair(frame.path, std::move(buffer))); 552 | } 553 | }break; 554 | case GL_SHORT: 555 | case GL_UNSIGNED_SHORT:{ 556 | Frame frame; 557 | ofBuffer buffer; 558 | while(shortPixelsChannel.receive(frame)){ 559 | auto then = ofGetElapsedTimeMicros(); 560 | buffer.clear(); 561 | ofSaveImage(frame.pixels, buffer, imageFormat, OF_IMAGE_QUALITY_BEST); 562 | returnShortPixelsChannel.send(std::move(frame.pixels)); 563 | auto now = ofGetElapsedTimeMicros(); 564 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 565 | encodedChannel.send(std::make_pair(frame.path, std::move(buffer))); 566 | } 567 | }break; 568 | case GL_FLOAT: 569 | case GL_HALF_FLOAT:{ 570 | Frame frame; 571 | ofBuffer buffer; 572 | while(floatPixelsChannel.receive(frame)){ 573 | auto then = ofGetElapsedTimeMicros(); 574 | buffer.clear(); 575 | ofSaveImage(frame.pixels, buffer, imageFormat, OF_IMAGE_QUALITY_BEST); 576 | returnFloatPixelsChannel.send(std::move(frame.pixels)); 577 | auto now = ofGetElapsedTimeMicros(); 578 | encodingTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 579 | encodedChannel.send(std::make_pair(frame.path, std::move(buffer))); 580 | } 581 | }break; 582 | } 583 | } 584 | }); 585 | } 586 | saveThread = std::thread([&]{ 587 | std::pair encoded; 588 | while(encodedChannel.receive(encoded)){ 589 | auto then = ofGetElapsedTimeMicros(); 590 | ofFile file(encoded.first, ofFile::WriteOnly); 591 | file.writeFromBuffer(encoded.second); 592 | auto now = ofGetElapsedTimeMicros(); 593 | saveTime = halfDecodingTime * 0.9 + (now - then) * 0.1; 594 | } 595 | }); 596 | } 597 | 598 | ofPixels ofxTextureRecorder::getBuffer(){ 599 | ofPixels pixels; 600 | if(!returnPixelsChannel.tryReceive(pixels)){ 601 | if(poolSize * ofPixels::bytesFromPixelFormat(width, height, pixelFormat) < maxMemoryUsage){ 602 | pixels.allocate(width, height, pixelFormat); 603 | poolSize += 1; 604 | }else{ 605 | returnPixelsChannel.receive(pixels); 606 | } 607 | } 608 | return pixels; 609 | }; 610 | 611 | ofShortPixels ofxTextureRecorder::getShortBuffer(){ 612 | ofShortPixels pixels; 613 | if(!returnShortPixelsChannel.tryReceive(pixels)){ 614 | if(shortPoolSize * ofShortPixels::bytesFromPixelFormat(width, height, pixelFormat) < maxMemoryUsage){ 615 | pixels.allocate(width, height, pixelFormat); 616 | shortPoolSize += 1; 617 | }else{ 618 | returnShortPixelsChannel.receive(pixels); 619 | } 620 | } 621 | return pixels; 622 | }; 623 | 624 | ofFloatPixels ofxTextureRecorder::getFloatBuffer(){ 625 | if(glType==GL_FLOAT){ 626 | ofFloatPixels pixels; 627 | if(!returnFloatPixelsChannel.tryReceive(pixels)){ 628 | if(floatPoolSize * ofFloatPixels::bytesFromPixelFormat(width, height, pixelFormat) < maxMemoryUsage){ 629 | pixels.allocate(width, height, pixelFormat); 630 | floatPoolSize += 1; 631 | }else{ 632 | returnFloatPixelsChannel.receive(pixels); 633 | } 634 | } 635 | return pixels; 636 | }else{ 637 | ofFloatPixels pixels; 638 | if(!returnFloatPixelsChannel.tryReceive(pixels)){ 639 | auto floatSize = floatPoolSize * ofFloatPixels::bytesFromPixelFormat(width, height, pixelFormat); 640 | auto halfFloatSize = halfFloatPoolSize * size * sizeof(half_float::half); 641 | if(halfFloatSize + floatSize < maxMemoryUsage){ 642 | pixels.allocate(width, height, pixelFormat); 643 | floatPoolSize += 1; 644 | }else{ 645 | returnFloatPixelsChannel.receive(pixels); 646 | } 647 | } 648 | return pixels; 649 | } 650 | }; 651 | 652 | std::vector ofxTextureRecorder::getHalfFloatBuffer(){ 653 | std::vector pixels; 654 | if(!returnHalfFloatPixelsChannel.tryReceive(pixels)){ 655 | auto floatSize = floatPoolSize * ofFloatPixels::bytesFromPixelFormat(width, height, pixelFormat); 656 | auto halfFloatSize = halfFloatPoolSize * size * sizeof(half_float::half); 657 | if(halfFloatSize + floatSize < maxMemoryUsage){ 658 | pixels.resize(size); 659 | halfFloatPoolSize += 1; 660 | }else{ 661 | returnHalfFloatPixelsChannel.receive(pixels); 662 | } 663 | } 664 | return pixels; 665 | }; 666 | 667 | uint64_t ofxTextureRecorder::getAvgTimeEncode() const{ 668 | return encodingTime; 669 | } 670 | 671 | uint64_t ofxTextureRecorder::getAvgTimeSave() const{ 672 | return saveTime; 673 | } 674 | 675 | uint64_t ofxTextureRecorder::getAvgTimeGpuDownload() const{ 676 | return timeDownload; 677 | } 678 | 679 | uint64_t ofxTextureRecorder::getAvgTimeTextureCopy() const{ 680 | return copyTextureTime; 681 | } 682 | 683 | ofxTextureRecorder::Settings::Settings(int w, int h) 684 | :w(w) 685 | ,h(h){} 686 | 687 | ofxTextureRecorder::Settings::Settings(const ofTexture & tex) 688 | :ofxTextureRecorder::Settings(tex.getTextureData()){} 689 | 690 | ofxTextureRecorder::Settings::Settings(const ofTextureData & texData) 691 | :w(texData.width) 692 | ,h(texData.height) 693 | ,textureInternalFormat(texData.glInternalFormat){ 694 | switch(ofGetImageTypeFromGLType(texData.glInternalFormat)){ 695 | case OF_IMAGE_COLOR: 696 | pixelFormat = OF_PIXELS_RGB; 697 | break; 698 | case OF_IMAGE_COLOR_ALPHA: 699 | pixelFormat = OF_PIXELS_RGBA; 700 | break; 701 | case OF_IMAGE_GRAYSCALE: 702 | pixelFormat = OF_PIXELS_GRAY; 703 | break; 704 | default: 705 | ofLogError("ofxTextureRecorder") << "Unsupported texture format"; 706 | } 707 | glType = ofGetGlTypeFromInternal(texData.glInternalFormat); 708 | 709 | } 710 | 711 | #if OFX_VIDEO_RECORDER 712 | ofxTextureRecorder::VideoSettings::VideoSettings(int w, int h, float fps) 713 | :w(w) 714 | ,h(h) 715 | ,fps(fps){} 716 | 717 | ofxTextureRecorder::VideoSettings::VideoSettings(const ofTexture & tex, float fps) 718 | :ofxTextureRecorder::VideoSettings(tex.getTextureData(),fps){} 719 | 720 | ofxTextureRecorder::VideoSettings::VideoSettings(const ofTextureData & texData, float fps) 721 | :w(texData.width) 722 | ,h(texData.height) 723 | ,fps(fps) 724 | ,textureInternalFormat(texData.glInternalFormat){ 725 | switch(ofGetImageTypeFromGLType(texData.glInternalFormat)){ 726 | case OF_IMAGE_COLOR: 727 | pixelFormat = OF_PIXELS_RGB; 728 | break; 729 | case OF_IMAGE_COLOR_ALPHA: 730 | pixelFormat = OF_PIXELS_RGBA; 731 | break; 732 | case OF_IMAGE_GRAYSCALE: 733 | pixelFormat = OF_PIXELS_GRAY; 734 | break; 735 | default: 736 | ofLogError("ofxTextureRecorder") << "Unsupported texture format"; 737 | } 738 | glType = ofGetGlTypeFromInternal(texData.glInternalFormat); 739 | 740 | } 741 | #endif 742 | -------------------------------------------------------------------------------- /src/ofxTextureRecorder.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ofxTextureRecorder.h 3 | * 4 | * Created on: Oct 14, 2014 5 | * Author: arturo 6 | */ 7 | #pragma once 8 | #include "ofMain.h" 9 | #include "half.hpp" 10 | #include 11 | 12 | #if OFX_VIDEO_RECORDER 13 | #include "ofxVideoRecorder.h" 14 | #endif 15 | #if OFX_HPVLIB 16 | #include "HPVCreator.hpp" 17 | #endif 18 | 19 | class ofxTextureRecorder{ 20 | public: 21 | ~ofxTextureRecorder(); 22 | struct Settings{ 23 | Settings(int w, int h); 24 | Settings(const ofTexture & tex); 25 | Settings(const ofTextureData & texData); 26 | 27 | int w; 28 | int h; 29 | GLenum textureInternalFormat = GL_RGB; 30 | ofImageFormat imageFormat = OF_IMAGE_FORMAT_PNG; 31 | string folderPath; 32 | /// number encoding threads, default == number of hw cores - 2 33 | size_t numThreads = std::max(1u, std::thread::hardware_concurrency() - 2); 34 | /// maximum RAM to use in bytes 35 | size_t maxMemoryUsage = 2000000000; 36 | 37 | private: 38 | ofPixelFormat pixelFormat; 39 | GLenum glType; 40 | friend class ofxTextureRecorder; 41 | }; 42 | 43 | #if OFX_VIDEO_RECORDER || OFX_HPVLIB 44 | struct VideoSettings{ 45 | VideoSettings(int w, int h, float fps); 46 | VideoSettings(const ofTexture & tex, float fps); 47 | VideoSettings(const ofTextureData & texData, float fps); 48 | 49 | int w; 50 | int h; 51 | float fps; 52 | string bitrate = "200k"; 53 | string videoCodec = "mp4"; 54 | string extrasettings = ""; 55 | 56 | GLenum textureInternalFormat = GL_RGB; 57 | string videoPath; 58 | /// maximum RAM to use in bytes 59 | size_t maxMemoryUsage = 2000000000; 60 | /// number encoding threads, default == number of hw cores - 2 61 | size_t numThreads = std::max(1u, std::thread::hardware_concurrency() - 2); 62 | 63 | private: 64 | ofPixelFormat pixelFormat; 65 | GLenum glType; 66 | friend class ofxTextureRecorder; 67 | }; 68 | void setup(const VideoSettings & settings); 69 | #endif 70 | 71 | void setup(int w, int h); 72 | void setup(const Settings & settings); 73 | void setup(const ofTexture & tex); 74 | void setup(const ofTextureData & texData); 75 | void save(const ofTexture & tex); 76 | void save(const ofTexture & tex, int frame); 77 | void stop(){ 78 | if(!encodeThreads.empty()){ 79 | stopThreads(); 80 | } 81 | } 82 | 83 | uint64_t getAvgTimeGpuDownload() const; 84 | uint64_t getAvgTimeEncode() const; 85 | uint64_t getAvgTimeSave() const; 86 | uint64_t getAvgTimeTextureCopy() const; 87 | private: 88 | void stopThreads(); 89 | void createThreads(size_t numThreads); 90 | ofPixels getBuffer(); 91 | ofShortPixels getShortBuffer(); 92 | ofFloatPixels getFloatBuffer(); 93 | std::vector getHalfFloatBuffer(); 94 | struct Buffer{ 95 | size_t id; 96 | std::string path; 97 | void * data; 98 | }; 99 | template 100 | struct Frame{ 101 | size_t id; 102 | std::string path; 103 | T pixels; 104 | }; 105 | 106 | ofThreadChannel channel; 107 | ofThreadChannel> pixelsChannel; 108 | size_t poolSize = 0; 109 | ofThreadChannel> shortPixelsChannel; 110 | size_t shortPoolSize = 0; 111 | ofThreadChannel> floatPixelsChannel; 112 | size_t floatPoolSize = 0; 113 | ofThreadChannel>> halffloatPixelsChannel; 114 | size_t halfFloatPoolSize = 0; 115 | ofThreadChannel returnPixelsChannel; 116 | ofThreadChannel returnShortPixelsChannel; 117 | ofThreadChannel returnFloatPixelsChannel; 118 | ofThreadChannel> returnHalfFloatPixelsChannel; 119 | ofThreadChannel> encodedChannel; 120 | ofThreadChannel channelReady; 121 | bool firstFrame = true; 122 | bool isVideo = false; 123 | bool isHPV = false; 124 | std::vector pixelBuffers; 125 | ofPixelFormat pixelFormat; 126 | ofImageFormat imageFormat; 127 | 128 | std::string folderPath; 129 | int width = 0; 130 | int height = 0; 131 | int frame = 0; 132 | GLenum glType = GL_UNSIGNED_BYTE; 133 | size_t size = 0; 134 | size_t maxMemoryUsage = 2000000000; 135 | std::condition_variable done; 136 | std::vector> waiting; 137 | std::thread saveThread; 138 | std::vector encodeThreads; 139 | std::vector halfDecodingThreads; 140 | std::thread downloadThread; 141 | std::queue buffersReady; 142 | std::queue buffersCopying; 143 | std::queue copyQueryReady; 144 | std::queue copyQueryCopying; 145 | uint64_t timeDownload = 0, halfDecodingTime = 0, encodingTime = 0, saveTime = 0, copyTextureTime = 0; 146 | 147 | 148 | #if OFX_VIDEO_RECORDER 149 | ofxVideoRecorder videoRecorder; 150 | #endif 151 | #if OFX_HPVLIB 152 | HPV::HPVCreator hpvCreator; 153 | ThreadSafe_Queue hpvProgress; 154 | std::thread hpvProgressThread; 155 | #endif 156 | }; 157 | --------------------------------------------------------------------------------